summaryrefslogtreecommitdiff
path: root/thirdparty
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty')
-rw-r--r--thirdparty/README.md13
-rw-r--r--thirdparty/basis_universal/basisu_enc.h1
-rw-r--r--thirdparty/bullet/BulletCollision/BroadphaseCollision/btDbvt.h2
-rw-r--r--thirdparty/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h11
-rw-r--r--thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp4
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.h6
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.cpp22
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h1
-rw-r--r--thirdparty/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp7
-rw-r--r--thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h6
-rw-r--r--thirdparty/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.cpp5
-rw-r--r--thirdparty/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.h1
-rw-r--r--thirdparty/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp12
-rw-r--r--thirdparty/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp8
-rw-r--r--thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.cpp19
-rw-r--r--thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h43
-rw-r--r--thirdparty/bullet/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp2
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp46
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h7
-rw-r--r--thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraintSolver.cpp31
-rw-r--r--thirdparty/bullet/BulletSoftBody/btConjugateResidual.h188
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp42
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h43
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp78
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h16
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp184
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h132
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp641
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h46
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h2
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h2
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h11
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h46
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h145
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp8
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp187
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h156
-rw-r--r--thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h50
-rw-r--r--thirdparty/bullet/BulletSoftBody/btPreconditioner.h213
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBody.cpp636
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBody.h244
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp80
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h4
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h798
-rw-r--r--thirdparty/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp3
-rw-r--r--thirdparty/bullet/BulletSoftBody/poly34.cpp419
-rw-r--r--thirdparty/bullet/BulletSoftBody/poly34.h38
-rw-r--r--thirdparty/bullet/LinearMath/btImplicitQRSVD.h4
-rw-r--r--thirdparty/bullet/LinearMath/btMatrix3x3.h16
-rw-r--r--thirdparty/bullet/LinearMath/btMatrixX.h3
-rw-r--r--thirdparty/bullet/LinearMath/btModifiedGramSchmidt.h83
-rw-r--r--thirdparty/bullet/LinearMath/btReducedVector.cpp170
-rw-r--r--thirdparty/bullet/LinearMath/btReducedVector.h320
-rw-r--r--thirdparty/bullet/btBulletCollisionAll.cpp1
-rw-r--r--thirdparty/bullet/btLinearMathAll.cpp1
-rw-r--r--thirdparty/jpeg-compressor/jpgd.cpp6254
-rw-r--r--thirdparty/jpeg-compressor/jpgd.h634
-rw-r--r--thirdparty/jpeg-compressor/jpgd_idct.h462
-rw-r--r--thirdparty/jpeg-compressor/patches/fix-msvc2017-build.patch31
-rw-r--r--thirdparty/mbedtls/include/mbedtls/check_config.h21
-rw-r--r--thirdparty/mbedtls/include/mbedtls/version.h8
-rw-r--r--thirdparty/mbedtls/library/ecp.c28
-rw-r--r--thirdparty/mbedtls/library/ssl_cli.c16
-rw-r--r--thirdparty/mbedtls/library/ssl_tls.c34
-rw-r--r--thirdparty/mbedtls/library/x509.c2
65 files changed, 8486 insertions, 4261 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 95a6902089..7518f5d0f7 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -40,9 +40,12 @@ Files extracted from upstream source:
## bullet
- Upstream: https://github.com/bulletphysics/bullet3
-- Version: 2.89
+- Version: 2.90 (master cd8cf7521cbb8b7808126a6adebd47bb83ea166a)
- License: zlib
+Important: Synced with a pre-release version of bullet 2.90 from the master branch.
+Commit hash: cd8cf7521cbb8b7808126a6adebd47bb83ea166a
+
Files extracted from upstream source:
- src/* apart from CMakeLists.txt and premake4.lua files
@@ -182,12 +185,14 @@ Patches in the `patches` directory should be re-applied after updates.
## jpeg-compressor
- Upstream: https://github.com/richgel999/jpeg-compressor
-- Version: 1.04
+- Version: 2.00 (1eb17d558b9d3b7442d256642a5745974e9eeb1e, 2020)
- License: Public domain
Files extracted from upstream source:
-- `jpgd.{c,h}`
+- `jpgd*.{c,h}`
+
+Patches in the `patches` directory should be re-applied after updates.
## libogg
@@ -302,7 +307,7 @@ changes are marked with `// -- GODOT --` comments.
## mbedtls
- Upstream: https://tls.mbed.org/
-- Version: 2.16.5
+- Version: 2.16.6
- License: Apache 2.0
File extracted from upstream release tarball (`-apache.tgz` variant):
diff --git a/thirdparty/basis_universal/basisu_enc.h b/thirdparty/basis_universal/basisu_enc.h
index c2b9133045..0a0c3c6fc0 100644
--- a/thirdparty/basis_universal/basisu_enc.h
+++ b/thirdparty/basis_universal/basisu_enc.h
@@ -22,6 +22,7 @@
#include <functional>
#include <thread>
#include <unordered_map>
+#include <ostream>
#if !defined(_WIN32) || defined(__MINGW32__)
#include <libgen.h>
diff --git a/thirdparty/bullet/BulletCollision/BroadphaseCollision/btDbvt.h b/thirdparty/bullet/BulletCollision/BroadphaseCollision/btDbvt.h
index 980d19a754..55daa7fb57 100644
--- a/thirdparty/bullet/BulletCollision/BroadphaseCollision/btDbvt.h
+++ b/thirdparty/bullet/BulletCollision/BroadphaseCollision/btDbvt.h
@@ -203,8 +203,8 @@ struct btDbvntNode
btDbvntNode(const btDbvtNode* n)
: volume(n->volume)
- , angle(0)
, normal(0,0,0)
+ , angle(0)
, data(n->data)
{
childs[0] = 0;
diff --git a/thirdparty/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h b/thirdparty/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h
index f4a2d5e368..56011899cb 100644
--- a/thirdparty/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h
+++ b/thirdparty/bullet/BulletCollision/BroadphaseCollision/btOverlappingPairCache.h
@@ -61,7 +61,8 @@ public:
virtual void cleanOverlappingPair(btBroadphasePair& pair, btDispatcher* dispatcher) = 0;
virtual int getNumOverlappingPairs() const = 0;
-
+ virtual bool needsBroadphaseCollision(btBroadphaseProxy * proxy0, btBroadphaseProxy * proxy1) const = 0;
+ virtual btOverlapFilterCallback* getOverlapFilterCallback() = 0;
virtual void cleanProxyFromPairs(btBroadphaseProxy* proxy, btDispatcher* dispatcher) = 0;
virtual void setOverlapFilterCallback(btOverlapFilterCallback* callback) = 0;
@@ -380,6 +381,14 @@ public:
{
}
+ bool needsBroadphaseCollision(btBroadphaseProxy*, btBroadphaseProxy*) const
+ {
+ return true;
+ }
+ btOverlapFilterCallback* getOverlapFilterCallback()
+ {
+ return 0;
+ }
virtual void setOverlapFilterCallback(btOverlapFilterCallback* /*callback*/)
{
}
diff --git a/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp b/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
index b814fd84d8..4954e773e2 100644
--- a/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
+++ b/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp
@@ -468,7 +468,7 @@ void btQuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCall
#ifdef RAYAABB2
btVector3 rayDir = (rayTarget - raySource);
- rayDir.normalize();
+ rayDir.safeNormalize();// stephengold changed normalize to safeNormalize 2020-02-17
lambda_max = rayDir.dot(rayTarget - raySource);
///what about division by zero? --> just set rayDirection[i] to 1.0
btVector3 rayDirectionInverse;
@@ -554,7 +554,7 @@ void btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
#ifdef RAYAABB2
btVector3 rayDirection = (rayTarget - raySource);
- rayDirection.normalize();
+ rayDirection.safeNormalize();// stephengold changed normalize to safeNormalize 2020-02-17
lambda_max = rayDirection.dot(rayTarget - raySource);
///what about division by zero? --> just set rayDirection[i] to 1.0
rayDirection[0] = rayDirection[0] == btScalar(0.0) ? btScalar(BT_LARGE_FLOAT) : btScalar(1.0) / rayDirection[0];
diff --git a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.h b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.h
index 6b9f5e23a5..04309670cf 100644
--- a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.h
+++ b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcher.h
@@ -46,8 +46,6 @@ protected:
btAlignedObjectArray<btPersistentManifold*> m_manifoldsPtr;
- btManifoldResult m_defaultManifoldResult;
-
btNearCallback m_nearCallback;
btPoolAllocator* m_collisionAlgorithmPoolAllocator;
@@ -95,11 +93,15 @@ public:
btPersistentManifold* getManifoldByIndexInternal(int index)
{
+ btAssert(index>=0);
+ btAssert(index<m_manifoldsPtr.size());
return m_manifoldsPtr[index];
}
const btPersistentManifold* getManifoldByIndexInternal(int index) const
{
+ btAssert(index>=0);
+ btAssert(index<m_manifoldsPtr.size());
return m_manifoldsPtr[index];
}
diff --git a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.cpp b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.cpp
index 6fe56538d2..89bc8d920e 100644
--- a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.cpp
+++ b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.cpp
@@ -28,6 +28,7 @@ subject to the following restrictions:
btCollisionDispatcherMt::btCollisionDispatcherMt(btCollisionConfiguration* config, int grainSize)
: btCollisionDispatcher(config)
{
+ m_batchManifoldsPtr.resize(btGetTaskScheduler()->getNumThreads());
m_batchUpdating = false;
m_grainSize = grainSize; // iterations per task
}
@@ -65,6 +66,10 @@ btPersistentManifold* btCollisionDispatcherMt::getNewManifold(const btCollisionO
manifold->m_index1a = m_manifoldsPtr.size();
m_manifoldsPtr.push_back(manifold);
}
+ else
+ {
+ m_batchManifoldsPtr[btGetCurrentThreadIndex()].push_back(manifold);
+ }
return manifold;
}
@@ -121,7 +126,7 @@ struct CollisionDispatcherUpdater : public btIParallelForBody
void btCollisionDispatcherMt::dispatchAllCollisionPairs(btOverlappingPairCache* pairCache, const btDispatcherInfo& info, btDispatcher* dispatcher)
{
- int pairCount = pairCache->getNumOverlappingPairs();
+ const int pairCount = pairCache->getNumOverlappingPairs();
if (pairCount == 0)
{
return;
@@ -136,16 +141,17 @@ void btCollisionDispatcherMt::dispatchAllCollisionPairs(btOverlappingPairCache*
btParallelFor(0, pairCount, m_grainSize, updater);
m_batchUpdating = false;
- // reconstruct the manifolds array to ensure determinism
- m_manifoldsPtr.resizeNoInitialize(0);
-
- btBroadphasePair* pairs = pairCache->getOverlappingPairArrayPtr();
- for (int i = 0; i < pairCount; ++i)
+ // merge new manifolds, if any
+ for (int i = 0; i < m_batchManifoldsPtr.size(); ++i)
{
- if (btCollisionAlgorithm* algo = pairs[i].m_algorithm)
+ btAlignedObjectArray<btPersistentManifold*>& batchManifoldsPtr = m_batchManifoldsPtr[i];
+
+ for (int j = 0; j < batchManifoldsPtr.size(); ++j)
{
- algo->getAllContactManifolds(m_manifoldsPtr);
+ m_manifoldsPtr.push_back(batchManifoldsPtr[j]);
}
+
+ batchManifoldsPtr.resizeNoInitialize(0);
}
// update the indices (used when releasing manifolds)
diff --git a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h
index 28eba7f32a..1155de2cfe 100644
--- a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h
+++ b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionDispatcherMt.h
@@ -30,6 +30,7 @@ public:
virtual void dispatchAllCollisionPairs(btOverlappingPairCache* pairCache, const btDispatcherInfo& info, btDispatcher* dispatcher) BT_OVERRIDE;
protected:
+ btAlignedObjectArray<btAlignedObjectArray<btPersistentManifold*> > m_batchManifoldsPtr;
bool m_batchUpdating;
int m_grainSize;
};
diff --git a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp
index 1bb21104cb..b5f4a3c869 100644
--- a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp
+++ b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCompoundCollisionAlgorithm.cpp
@@ -139,7 +139,12 @@ public:
if (TestAabbAgainstAabb2(aabbMin0, aabbMax0, aabbMin1, aabbMax1))
{
- btCollisionObjectWrapper compoundWrap(this->m_compoundColObjWrap, childShape, m_compoundColObjWrap->getCollisionObject(), newChildWorldTrans, childTrans, -1, index);
+ btTransform preTransform = childTrans;
+ if (this->m_compoundColObjWrap->m_preTransform)
+ {
+ preTransform = preTransform *(*(this->m_compoundColObjWrap->m_preTransform));
+ }
+ btCollisionObjectWrapper compoundWrap(this->m_compoundColObjWrap, childShape, m_compoundColObjWrap->getCollisionObject(), newChildWorldTrans, preTransform, -1, index);
btCollisionAlgorithm* algo = 0;
bool allocatedAlgorithm = false;
diff --git a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
index e82d1b139e..4356c12abf 100644
--- a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
+++ b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h
@@ -46,7 +46,7 @@ struct btContactSolverInfoData
btScalar m_sor; //successive over-relaxation term
btScalar m_erp; //error reduction for non-contact constraints
btScalar m_erp2; //error reduction for contact constraints
- btScalar m_deformable_erp; //error reduction for deformable constraints
+ btScalar m_deformable_erp; //error reduction for deformable constraints
btScalar m_globalCfm; //constraint force mixing for contacts and non-contacts
btScalar m_frictionERP; //error reduction for friction constraints
btScalar m_frictionCFM; //constraint force mixing for friction constraints
@@ -67,6 +67,7 @@ struct btContactSolverInfoData
bool m_jointFeedbackInWorldSpace;
bool m_jointFeedbackInJointFrame;
int m_reportSolverAnalytics;
+ int m_numNonContactInnerIterations;
};
struct btContactSolverInfo : public btContactSolverInfoData
@@ -82,7 +83,7 @@ struct btContactSolverInfo : public btContactSolverInfoData
m_numIterations = 10;
m_erp = btScalar(0.2);
m_erp2 = btScalar(0.2);
- m_deformable_erp = btScalar(0.);
+ m_deformable_erp = btScalar(0.1);
m_globalCfm = btScalar(0.);
m_frictionERP = btScalar(0.2); //positional friction 'anchors' are disabled by default
m_frictionCFM = btScalar(0.);
@@ -104,6 +105,7 @@ struct btContactSolverInfo : public btContactSolverInfoData
m_jointFeedbackInWorldSpace = false;
m_jointFeedbackInJointFrame = false;
m_reportSolverAnalytics = 0;
+ m_numNonContactInnerIterations = 1; // the number of inner iterations for solving motor constraint in a single iteration of the constraint solve
}
};
diff --git a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.cpp b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.cpp
index 93626f18ff..74a13c6249 100644
--- a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.cpp
+++ b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.cpp
@@ -876,7 +876,10 @@ int btGeneric6DofSpring2Constraint::get_limit_motor_info2(
// will we not request a velocity with the wrong direction ?
// and the answer is not, because in practice during the solving the current velocity is subtracted from the m_constraintError
// so the sign of the force that is really matters
- info->m_constraintError[srow] = (rotational ? -1 : 1) * (f < 0 ? -SIMD_INFINITY : SIMD_INFINITY);
+ if (m_flags & BT_6DOF_FLAGS_USE_INFINITE_ERROR)
+ info->m_constraintError[srow] = (rotational ? -1 : 1) * (f < 0 ? -SIMD_INFINITY : SIMD_INFINITY);
+ else
+ info->m_constraintError[srow] = vel + f / m * (rotational ? -1 : 1);
btScalar minf = f < fd ? f : fd;
btScalar maxf = f < fd ? fd : f;
diff --git a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.h b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.h
index 00e24364e0..c86dc373da 100644
--- a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.h
+++ b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.h
@@ -265,6 +265,7 @@ enum bt6DofFlags2
BT_6DOF_FLAGS_ERP_STOP2 = 2,
BT_6DOF_FLAGS_CFM_MOTO2 = 4,
BT_6DOF_FLAGS_ERP_MOTO2 = 8,
+ BT_6DOF_FLAGS_USE_INFINITE_ERROR = (1<<16)
};
#define BT_6DOF_FLAGS_AXIS_SHIFT2 4 // bits per axis
diff --git a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
index e4da468299..d2641c582f 100644
--- a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
+++ b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btSequentialImpulseConstraintSolver.cpp
@@ -14,7 +14,9 @@ subject to the following restrictions:
*/
//#define COMPUTE_IMPULSE_DENOM 1
-//#define BT_ADDITIONAL_DEBUG
+#ifdef BT_DEBUG
+# define BT_ADDITIONAL_DEBUG
+#endif
//It is not necessary (redundant) to refresh contact manifolds, this refresh has been moved to the collision algorithms.
@@ -690,8 +692,10 @@ int btSequentialImpulseConstraintSolver::getOrInitSolverBody(btCollisionObject&
{
#if BT_THREADSAFE
int solverBodyId = -1;
- bool isRigidBodyType = btRigidBody::upcast(&body) != NULL;
- if (isRigidBodyType && !body.isStaticOrKinematicObject())
+ const bool isRigidBodyType = btRigidBody::upcast(&body) != NULL;
+ const bool isStaticOrKinematic = body.isStaticOrKinematicObject();
+ const bool isKinematic = body.isKinematicObject();
+ if (isRigidBodyType && !isStaticOrKinematic)
{
// dynamic body
// Dynamic bodies can only be in one island, so it's safe to write to the companionId
@@ -704,7 +708,7 @@ int btSequentialImpulseConstraintSolver::getOrInitSolverBody(btCollisionObject&
body.setCompanionId(solverBodyId);
}
}
- else if (isRigidBodyType && body.isKinematicObject())
+ else if (isRigidBodyType && isKinematic)
{
//
// NOTE: must test for kinematic before static because some kinematic objects also
diff --git a/thirdparty/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp b/thirdparty/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp
index a3c9f42eb9..fb15ae31eb 100644
--- a/thirdparty/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp
+++ b/thirdparty/bullet/BulletDynamics/Dynamics/btDiscreteDynamicsWorld.cpp
@@ -800,6 +800,14 @@ public:
///don't do CCD when the collision filters are not matching
if (!ClosestConvexResultCallback::needsCollision(proxy0))
return false;
+ if (m_pairCache->getOverlapFilterCallback()) {
+ btBroadphaseProxy* proxy1 = m_me->getBroadphaseHandle();
+ bool collides = m_pairCache->needsBroadphaseCollision(proxy0, proxy1);
+ if (!collides)
+ {
+ return false;
+ }
+ }
btCollisionObject* otherObj = (btCollisionObject*)proxy0->m_clientObject;
diff --git a/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.cpp b/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.cpp
index 9e8705b001..27fdead761 100644
--- a/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.cpp
+++ b/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.cpp
@@ -136,8 +136,13 @@ void btRigidBody::setGravity(const btVector3& acceleration)
void btRigidBody::setDamping(btScalar lin_damping, btScalar ang_damping)
{
- m_linearDamping = btClamped(lin_damping, (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
- m_angularDamping = btClamped(ang_damping, (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
+#ifdef BT_USE_OLD_DAMPING_METHOD
+ m_linearDamping = btMax(lin_damping, btScalar(0.0));
+ m_angularDamping = btMax(ang_damping, btScalar(0.0));
+#else
+ m_linearDamping = btClamped(lin_damping, btScalar(0.0), btScalar(1.0));
+ m_angularDamping = btClamped(ang_damping, btScalar(0.0), btScalar(1.0));
+#endif
}
///applyDamping damps the velocity, using the given m_linearDamping and m_angularDamping
@@ -146,10 +151,9 @@ void btRigidBody::applyDamping(btScalar timeStep)
//On new damping: see discussion/issue report here: http://code.google.com/p/bullet/issues/detail?id=74
//todo: do some performance comparisons (but other parts of the engine are probably bottleneck anyway
-//#define USE_OLD_DAMPING_METHOD 1
-#ifdef USE_OLD_DAMPING_METHOD
- m_linearVelocity *= GEN_clamped((btScalar(1.) - timeStep * m_linearDamping), (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
- m_angularVelocity *= GEN_clamped((btScalar(1.) - timeStep * m_angularDamping), (btScalar)btScalar(0.0), (btScalar)btScalar(1.0));
+#ifdef BT_USE_OLD_DAMPING_METHOD
+ m_linearVelocity *= btMax((btScalar(1.0) - timeStep * m_linearDamping), btScalar(0.0));
+ m_angularVelocity *= btMax((btScalar(1.0) - timeStep * m_angularDamping), btScalar(0.0));
#else
m_linearVelocity *= btPow(btScalar(1) - m_linearDamping, timeStep);
m_angularVelocity *= btPow(btScalar(1) - m_angularDamping, timeStep);
@@ -380,6 +384,9 @@ void btRigidBody::integrateVelocities(btScalar step)
{
m_angularVelocity *= (MAX_ANGVEL / step) / angvel;
}
+ #if defined(BT_CLAMP_VELOCITY_TO) && BT_CLAMP_VELOCITY_TO > 0
+ clampVelocity(m_angularVelocity);
+ #endif
}
btQuaternion btRigidBody::getOrientation() const
diff --git a/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h b/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h
index 39d47cbbda..943d724cce 100644
--- a/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h
+++ b/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h
@@ -305,6 +305,9 @@ public:
void applyTorque(const btVector3& torque)
{
m_totalTorque += torque * m_angularFactor;
+ #if defined(BT_CLAMP_VELOCITY_TO) && BT_CLAMP_VELOCITY_TO > 0
+ clampVelocity(m_totalTorque);
+ #endif
}
void applyForce(const btVector3& force, const btVector3& rel_pos)
@@ -316,11 +319,17 @@ public:
void applyCentralImpulse(const btVector3& impulse)
{
m_linearVelocity += impulse * m_linearFactor * m_inverseMass;
+ #if defined(BT_CLAMP_VELOCITY_TO) && BT_CLAMP_VELOCITY_TO > 0
+ clampVelocity(m_linearVelocity);
+ #endif
}
void applyTorqueImpulse(const btVector3& torque)
{
m_angularVelocity += m_invInertiaTensorWorld * torque * m_angularFactor;
+ #if defined(BT_CLAMP_VELOCITY_TO) && BT_CLAMP_VELOCITY_TO > 0
+ clampVelocity(m_angularVelocity);
+ #endif
}
void applyImpulse(const btVector3& impulse, const btVector3& rel_pos)
@@ -361,20 +370,46 @@ public:
{
m_pushVelocity = v;
}
-
+
+ #if defined(BT_CLAMP_VELOCITY_TO) && BT_CLAMP_VELOCITY_TO > 0
+ void clampVelocity(btVector3& v) const {
+ v.setX(
+ fmax(-BT_CLAMP_VELOCITY_TO,
+ fmin(BT_CLAMP_VELOCITY_TO, v.getX()))
+ );
+ v.setY(
+ fmax(-BT_CLAMP_VELOCITY_TO,
+ fmin(BT_CLAMP_VELOCITY_TO, v.getY()))
+ );
+ v.setZ(
+ fmax(-BT_CLAMP_VELOCITY_TO,
+ fmin(BT_CLAMP_VELOCITY_TO, v.getZ()))
+ );
+ }
+ #endif
+
void setTurnVelocity(const btVector3& v)
{
m_turnVelocity = v;
+ #if defined(BT_CLAMP_VELOCITY_TO) && BT_CLAMP_VELOCITY_TO > 0
+ clampVelocity(m_turnVelocity);
+ #endif
}
void applyCentralPushImpulse(const btVector3& impulse)
{
m_pushVelocity += impulse * m_linearFactor * m_inverseMass;
+ #if defined(BT_CLAMP_VELOCITY_TO) && BT_CLAMP_VELOCITY_TO > 0
+ clampVelocity(m_pushVelocity);
+ #endif
}
void applyTorqueTurnImpulse(const btVector3& torque)
{
m_turnVelocity += m_invInertiaTensorWorld * torque * m_angularFactor;
+ #if defined(BT_CLAMP_VELOCITY_TO) && BT_CLAMP_VELOCITY_TO > 0
+ clampVelocity(m_turnVelocity);
+ #endif
}
void clearForces()
@@ -408,12 +443,18 @@ public:
{
m_updateRevision++;
m_linearVelocity = lin_vel;
+ #if defined(BT_CLAMP_VELOCITY_TO) && BT_CLAMP_VELOCITY_TO > 0
+ clampVelocity(m_linearVelocity);
+ #endif
}
inline void setAngularVelocity(const btVector3& ang_vel)
{
m_updateRevision++;
m_angularVelocity = ang_vel;
+ #if defined(BT_CLAMP_VELOCITY_TO) && BT_CLAMP_VELOCITY_TO > 0
+ clampVelocity(m_angularVelocity);
+ #endif
}
btVector3 getVelocityInLocalPoint(const btVector3& rel_pos) const
diff --git a/thirdparty/bullet/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp b/thirdparty/bullet/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp
index 5353fe009e..772b774202 100644
--- a/thirdparty/bullet/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp
+++ b/thirdparty/bullet/BulletDynamics/Dynamics/btSimulationIslandManagerMt.cpp
@@ -171,6 +171,8 @@ void btSimulationIslandManagerMt::initIslandPools()
btSimulationIslandManagerMt::Island* btSimulationIslandManagerMt::getIsland(int id)
{
+ btAssert(id >= 0);
+ btAssert(id < m_lookupIslandFromId.size());
Island* island = m_lookupIslandFromId[id];
if (island == NULL)
{
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp
index bdaa473476..a1d5bb9ca8 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp
@@ -583,52 +583,6 @@ void btMultiBody::compTreeLinkVelocities(btVector3 *omega, btVector3 *vel) const
}
}
-btScalar btMultiBody::getKineticEnergy() const
-{
- int num_links = getNumLinks();
- // TODO: would be better not to allocate memory here
- btAlignedObjectArray<btVector3> omega;
- omega.resize(num_links + 1);
- btAlignedObjectArray<btVector3> vel;
- vel.resize(num_links + 1);
- compTreeLinkVelocities(&omega[0], &vel[0]);
-
- // we will do the factor of 0.5 at the end
- btScalar result = m_baseMass * vel[0].dot(vel[0]);
- result += omega[0].dot(m_baseInertia * omega[0]);
-
- for (int i = 0; i < num_links; ++i)
- {
- result += m_links[i].m_mass * vel[i + 1].dot(vel[i + 1]);
- result += omega[i + 1].dot(m_links[i].m_inertiaLocal * omega[i + 1]);
- }
-
- return 0.5f * result;
-}
-
-btVector3 btMultiBody::getAngularMomentum() const
-{
- int num_links = getNumLinks();
- // TODO: would be better not to allocate memory here
- btAlignedObjectArray<btVector3> omega;
- omega.resize(num_links + 1);
- btAlignedObjectArray<btVector3> vel;
- vel.resize(num_links + 1);
- btAlignedObjectArray<btQuaternion> rot_from_world;
- rot_from_world.resize(num_links + 1);
- compTreeLinkVelocities(&omega[0], &vel[0]);
-
- rot_from_world[0] = m_baseQuat;
- btVector3 result = quatRotate(rot_from_world[0].inverse(), (m_baseInertia * omega[0]));
-
- for (int i = 0; i < num_links; ++i)
- {
- rot_from_world[i + 1] = m_links[i].m_cachedRotParentToThis * rot_from_world[m_links[i].m_parent + 1];
- result += (quatRotate(rot_from_world[i + 1].inverse(), (m_links[i].m_inertiaLocal * omega[i + 1])));
- }
-
- return result;
-}
void btMultiBody::clearConstraintForces()
{
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h
index afed669a7b..be795633fd 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h
@@ -307,13 +307,6 @@ public:
//
btMatrix3x3 localFrameToWorld(int i, const btMatrix3x3 &local_frame) const;
- //
- // calculate kinetic energy and angular momentum
- // useful for debugging.
- //
-
- btScalar getKineticEnergy() const;
- btVector3 getAngularMomentum() const;
//
// set external forces and torques. Note all external forces/torques are given in the WORLD frame.
diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraintSolver.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraintSolver.cpp
index ffae5300f0..2788367431 100644
--- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraintSolver.cpp
+++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraintSolver.cpp
@@ -30,23 +30,28 @@ btScalar btMultiBodyConstraintSolver::solveSingleIteration(int iteration, btColl
btScalar leastSquaredResidual = btSequentialImpulseConstraintSolver::solveSingleIteration(iteration, bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer);
//solve featherstone non-contact constraints
-
+ btScalar nonContactResidual = 0;
//printf("m_multiBodyNonContactConstraints = %d\n",m_multiBodyNonContactConstraints.size());
-
- for (int j = 0; j < m_multiBodyNonContactConstraints.size(); j++)
+ for (int i = 0; i < infoGlobal.m_numNonContactInnerIterations; ++i)
{
- int index = iteration & 1 ? j : m_multiBodyNonContactConstraints.size() - 1 - j;
+ // reset the nonContactResdual to 0 at start of each inner iteration
+ nonContactResidual = 0;
+ for (int j = 0; j < m_multiBodyNonContactConstraints.size(); j++)
+ {
+ int index = iteration & 1 ? j : m_multiBodyNonContactConstraints.size() - 1 - j;
- btMultiBodySolverConstraint& constraint = m_multiBodyNonContactConstraints[index];
+ btMultiBodySolverConstraint& constraint = m_multiBodyNonContactConstraints[index];
- btScalar residual = resolveSingleConstraintRowGeneric(constraint);
- leastSquaredResidual = btMax(leastSquaredResidual, residual * residual);
+ btScalar residual = resolveSingleConstraintRowGeneric(constraint);
+ nonContactResidual = btMax(nonContactResidual, residual * residual);
- if (constraint.m_multiBodyA)
- constraint.m_multiBodyA->setPosUpdated(false);
- if (constraint.m_multiBodyB)
- constraint.m_multiBodyB->setPosUpdated(false);
+ if (constraint.m_multiBodyA)
+ constraint.m_multiBodyA->setPosUpdated(false);
+ if (constraint.m_multiBodyB)
+ constraint.m_multiBodyB->setPosUpdated(false);
+ }
}
+ leastSquaredResidual = btMax(leastSquaredResidual, nonContactResidual);
//solve featherstone normal contact
for (int j0 = 0; j0 < m_multiBodyNormalContactConstraints.size(); j0++)
@@ -1250,7 +1255,7 @@ void btMultiBodyConstraintSolver::convertMultiBodyContact(btPersistentManifold*
{
const btMultiBodyLinkCollider* fcA = btMultiBodyLinkCollider::upcast(manifold->getBody0());
const btMultiBodyLinkCollider* fcB = btMultiBodyLinkCollider::upcast(manifold->getBody1());
-
+
btMultiBody* mbA = fcA ? fcA->m_multiBody : 0;
btMultiBody* mbB = fcB ? fcB->m_multiBody : 0;
@@ -1270,7 +1275,7 @@ void btMultiBodyConstraintSolver::convertMultiBodyContact(btPersistentManifold*
// return;
//only a single rollingFriction per manifold
- int rollingFriction = 1;
+ int rollingFriction = 4;
for (int j = 0; j < manifold->getNumContacts(); j++)
{
diff --git a/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h b/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h
new file mode 100644
index 0000000000..7b211c4172
--- /dev/null
+++ b/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h
@@ -0,0 +1,188 @@
+/*
+ Written by Xuchen Han <xuchenhan2015@u.northwestern.edu>
+
+ Bullet Continuous Collision Detection and Physics Library
+ Copyright (c) 2019 Google Inc. http://bulletphysics.org
+ This software is provided 'as-is', without any express or implied warranty.
+ In no event will the authors be held liable for any damages arising from the use of this software.
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it freely,
+ subject to the following restrictions:
+ 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#ifndef BT_CONJUGATE_RESIDUAL_H
+#define BT_CONJUGATE_RESIDUAL_H
+#include <iostream>
+#include <cmath>
+#include <limits>
+#include <LinearMath/btAlignedObjectArray.h>
+#include <LinearMath/btVector3.h>
+#include <LinearMath/btScalar.h>
+#include "LinearMath/btQuickprof.h"
+template <class MatrixX>
+class btConjugateResidual
+{
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ TVStack r,p,z,temp_p, temp_r, best_x;
+ // temp_r = A*r
+ // temp_p = A*p
+ // z = M^(-1) * temp_p = M^(-1) * A * p
+ int max_iterations;
+ btScalar tolerance_squared, best_r;
+public:
+ btConjugateResidual(const int max_it_in)
+ : max_iterations(max_it_in)
+ {
+ tolerance_squared = 1e-2;
+ }
+
+ virtual ~btConjugateResidual(){}
+
+ // return the number of iterations taken
+ int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false)
+ {
+ BT_PROFILE("CRSolve");
+ btAssert(x.size() == b.size());
+ reinitialize(b);
+ // r = b - A * x --with assigned dof zeroed out
+ A.multiply(x, temp_r); // borrow temp_r here to store A*x
+ r = sub(b, temp_r);
+ // z = M^(-1) * r
+ A.precondition(r, z); // borrow z to store preconditioned r
+ r = z;
+ btScalar residual_norm = norm(r);
+ if (residual_norm <= tolerance_squared) {
+ if (verbose)
+ {
+ std::cout << "Iteration = 0" << std::endl;
+ std::cout << "Two norm of the residual = " << residual_norm << std::endl;
+ }
+ return 0;
+ }
+ p = r;
+ btScalar r_dot_Ar, r_dot_Ar_new;
+ // temp_p = A*p
+ A.multiply(p, temp_p);
+ // temp_r = A*r
+ temp_r = temp_p;
+ r_dot_Ar = dot(r, temp_r);
+ for (int k = 1; k <= max_iterations; k++) {
+ // z = M^(-1) * Ap
+ A.precondition(temp_p, z);
+ // alpha = r^T * A * r / (Ap)^T * M^-1 * Ap)
+ btScalar alpha = r_dot_Ar / dot(temp_p, z);
+ // x += alpha * p;
+ multAndAddTo(alpha, p, x);
+ // r -= alpha * z;
+ multAndAddTo(-alpha, z, r);
+ btScalar norm_r = norm(r);
+ if (norm_r < best_r)
+ {
+ best_x = x;
+ best_r = norm_r;
+ if (norm_r < tolerance_squared) {
+ if (verbose)
+ {
+ std::cout << "ConjugateResidual iterations " << k << std::endl;
+ }
+ return k;
+ }
+ else
+ {
+ if (verbose)
+ {
+ std::cout << "ConjugateResidual iterations " << k << " has residual "<< norm_r << std::endl;
+ }
+ }
+ }
+ // temp_r = A * r;
+ A.multiply(r, temp_r);
+ r_dot_Ar_new = dot(r, temp_r);
+ btScalar beta = r_dot_Ar_new/r_dot_Ar;
+ r_dot_Ar = r_dot_Ar_new;
+ // p = beta*p + r;
+ p = multAndAdd(beta, p, r);
+ // temp_p = beta*temp_p + temp_r;
+ temp_p = multAndAdd(beta, temp_p, temp_r);
+ }
+ if (verbose)
+ {
+ std::cout << "ConjugateResidual max iterations reached " << max_iterations << std::endl;
+ }
+ x = best_x;
+ return max_iterations;
+ }
+
+ void reinitialize(const TVStack& b)
+ {
+ r.resize(b.size());
+ p.resize(b.size());
+ z.resize(b.size());
+ temp_p.resize(b.size());
+ temp_r.resize(b.size());
+ best_x.resize(b.size());
+ best_r = SIMD_INFINITY;
+ }
+
+ TVStack sub(const TVStack& a, const TVStack& b)
+ {
+ // c = a-b
+ btAssert(a.size() == b.size());
+ TVStack c;
+ c.resize(a.size());
+ for (int i = 0; i < a.size(); ++i)
+ {
+ c[i] = a[i] - b[i];
+ }
+ return c;
+ }
+
+ btScalar squaredNorm(const TVStack& a)
+ {
+ return dot(a,a);
+ }
+
+ btScalar norm(const TVStack& a)
+ {
+ btScalar ret = 0;
+ for (int i = 0; i < a.size(); ++i)
+ {
+ for (int d = 0; d < 3; ++d)
+ {
+ ret = btMax(ret, btFabs(a[i][d]));
+ }
+ }
+ return ret;
+ }
+
+ btScalar dot(const TVStack& a, const TVStack& b)
+ {
+ btScalar ans(0);
+ for (int i = 0; i < a.size(); ++i)
+ ans += a[i].dot(b[i]);
+ return ans;
+ }
+
+ void multAndAddTo(btScalar s, const TVStack& a, TVStack& result)
+ {
+ // result += s*a
+ btAssert(a.size() == result.size());
+ for (int i = 0; i < a.size(); ++i)
+ result[i] += s * a[i];
+ }
+
+ TVStack multAndAdd(btScalar s, const TVStack& a, const TVStack& b)
+ {
+ // result = a*s + b
+ TVStack result;
+ result.resize(a.size());
+ for (int i = 0; i < a.size(); ++i)
+ result[i] = s * a[i] + b[i];
+ return result;
+ }
+};
+#endif /* btConjugateResidual_h */
+
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp
index 1b247641aa..5381ee6265 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp
@@ -23,12 +23,15 @@ btDeformableBackwardEulerObjective::btDeformableBackwardEulerObjective(btAligned
, m_backupVelocity(backup_v)
, m_implicit(false)
{
- m_preconditioner = new MassPreconditioner(m_softBodies);
+ m_massPreconditioner = new MassPreconditioner(m_softBodies);
+ m_KKTPreconditioner = new KKTPreconditioner(m_softBodies, m_projection, m_lf, m_dt, m_implicit);
+ m_preconditioner = m_KKTPreconditioner;
}
btDeformableBackwardEulerObjective::~btDeformableBackwardEulerObjective()
{
- delete m_preconditioner;
+ delete m_KKTPreconditioner;
+ delete m_massPreconditioner;
}
void btDeformableBackwardEulerObjective::reinitialize(bool nodeUpdated, btScalar dt)
@@ -47,7 +50,7 @@ void btDeformableBackwardEulerObjective::reinitialize(bool nodeUpdated, btScalar
m_lf[i]->reinitialize(nodeUpdated);
}
m_projection.reinitialize(nodeUpdated);
- m_preconditioner->reinitialize(nodeUpdated);
+// m_preconditioner->reinitialize(nodeUpdated);
}
void btDeformableBackwardEulerObjective::setDt(btScalar dt)
@@ -80,6 +83,33 @@ void btDeformableBackwardEulerObjective::multiply(const TVStack& x, TVStack& b)
m_lf[i]->addScaledElasticForceDifferential(-m_dt*m_dt, x, b);
}
}
+ int offset = m_nodes.size();
+ for (int i = offset; i < b.size(); ++i)
+ {
+ b[i].setZero();
+ }
+ // add in the lagrange multiplier terms
+
+ for (int c = 0; c < m_projection.m_lagrangeMultipliers.size(); ++c)
+ {
+ // C^T * lambda
+ const LagrangeMultiplier& lm = m_projection.m_lagrangeMultipliers[c];
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ for (int j = 0; j < lm.m_num_constraints; ++j)
+ {
+ b[lm.m_indices[i]] += x[offset+c][j] * lm.m_weights[i] * lm.m_dirs[j];
+ }
+ }
+ // C * x
+ for (int d = 0; d < lm.m_num_constraints; ++d)
+ {
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ b[offset+c][d] += lm.m_weights[i] * x[lm.m_indices[i]].dot(lm.m_dirs[d]);
+ }
+ }
+ }
}
void btDeformableBackwardEulerObjective::updateVelocity(const TVStack& dv)
@@ -134,7 +164,7 @@ void btDeformableBackwardEulerObjective::computeResidual(btScalar dt, TVStack &r
m_lf[i]->addScaledDampingForce(dt, residual);
}
}
- m_projection.project(residual);
+// m_projection.project(residual);
}
btScalar btDeformableBackwardEulerObjective::computeNorm(const TVStack& residual) const
@@ -186,9 +216,9 @@ void btDeformableBackwardEulerObjective::initialGuess(TVStack& dv, const TVStack
}
//set constraints as projections
-void btDeformableBackwardEulerObjective::setConstraints()
+void btDeformableBackwardEulerObjective::setConstraints(const btContactSolverInfo& infoGlobal)
{
- m_projection.setConstraints();
+ m_projection.setConstraints(infoGlobal);
}
void btDeformableBackwardEulerObjective::applyDynamicFriction(TVStack& r)
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h
index 05ab42ff0a..86579e71ac 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h
@@ -15,11 +15,12 @@
#ifndef BT_BACKWARD_EULER_OBJECTIVE_H
#define BT_BACKWARD_EULER_OBJECTIVE_H
-#include "btConjugateGradient.h"
+//#include "btConjugateGradient.h"
#include "btDeformableLagrangianForce.h"
#include "btDeformableMassSpringForce.h"
#include "btDeformableGravityForce.h"
#include "btDeformableCorotatedForce.h"
+#include "btDeformableMousePickingForce.h"
#include "btDeformableLinearElasticityForce.h"
#include "btDeformableNeoHookeanForce.h"
#include "btDeformableContactProjection.h"
@@ -39,6 +40,8 @@ public:
const TVStack& m_backupVelocity;
btAlignedObjectArray<btSoftBody::Node* > m_nodes;
bool m_implicit;
+ MassPreconditioner* m_massPreconditioner;
+ KKTPreconditioner* m_KKTPreconditioner;
btDeformableBackwardEulerObjective(btAlignedObjectArray<btSoftBody *>& softBodies, const TVStack& backup_v);
@@ -79,7 +82,7 @@ public:
void updateVelocity(const TVStack& dv);
//set constraints as projections
- void setConstraints();
+ void setConstraints(const btContactSolverInfo& infoGlobal);
// update the projections and project the residual
void project(TVStack& r)
@@ -129,6 +132,42 @@ public:
// Calculate the total potential energy in the system
btScalar totalEnergy(btScalar dt);
+
+ void addLagrangeMultiplier(const TVStack& vec, TVStack& extended_vec)
+ {
+ extended_vec.resize(vec.size() + m_projection.m_lagrangeMultipliers.size());
+ for (int i = 0; i < vec.size(); ++i)
+ {
+ extended_vec[i] = vec[i];
+ }
+ int offset = vec.size();
+ for (int i = 0; i < m_projection.m_lagrangeMultipliers.size(); ++i)
+ {
+ extended_vec[offset + i].setZero();
+ }
+ }
+
+ void addLagrangeMultiplierRHS(const TVStack& residual, const TVStack& m_dv, TVStack& extended_residual)
+ {
+ extended_residual.resize(residual.size() + m_projection.m_lagrangeMultipliers.size());
+ for (int i = 0; i < residual.size(); ++i)
+ {
+ extended_residual[i] = residual[i];
+ }
+ int offset = residual.size();
+ for (int i = 0; i < m_projection.m_lagrangeMultipliers.size(); ++i)
+ {
+ const LagrangeMultiplier& lm = m_projection.m_lagrangeMultipliers[i];
+ extended_residual[offset + i].setZero();
+ for (int d = 0; d < lm.m_num_constraints; ++d)
+ {
+ for (int n = 0; n < lm.m_num_nodes; ++n)
+ {
+ extended_residual[offset + i][d] += lm.m_weights[n] * m_dv[lm.m_indices[n]].dot(lm.m_dirs[d]);
+ }
+ }
+ }
+ }
};
#endif /* btBackwardEulerObjective_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp
index 7724a8ec69..132699c54f 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp
@@ -18,13 +18,15 @@
#include "btDeformableBodySolver.h"
#include "btSoftBodyInternals.h"
#include "LinearMath/btQuickprof.h"
-static const int kMaxConjugateGradientIterations = 50;
+static const int kMaxConjugateGradientIterations = 50;
btDeformableBodySolver::btDeformableBodySolver()
: m_numNodes(0)
, m_cg(kMaxConjugateGradientIterations)
+, m_cr(kMaxConjugateGradientIterations)
, m_maxNewtonIterations(5)
, m_newtonTolerance(1e-4)
, m_lineSearch(false)
+, m_useProjection(false)
{
m_objective = new btDeformableBackwardEulerObjective(m_softBodies, m_backupVelocity);
}
@@ -41,7 +43,22 @@ void btDeformableBodySolver::solveDeformableConstraints(btScalar solverdt)
{
m_objective->computeResidual(solverdt, m_residual);
m_objective->applyDynamicFriction(m_residual);
- computeStep(m_dv, m_residual);
+ if (m_useProjection)
+ {
+ computeStep(m_dv, m_residual);
+ }
+ else
+ {
+ TVStack rhs, x;
+ m_objective->addLagrangeMultiplierRHS(m_residual, m_dv, rhs);
+ m_objective->addLagrangeMultiplier(m_dv, x);
+ m_objective->m_preconditioner->reinitialize(true);
+ computeStep(x, rhs);
+ for (int i = 0; i<m_dv.size(); ++i)
+ {
+ m_dv[i] = x[i];
+ }
+ }
updateVelocity();
}
else
@@ -63,7 +80,7 @@ void btDeformableBodySolver::solveDeformableConstraints(btScalar solverdt)
++counter;
}
}
-
+
m_objective->computeResidual(solverdt, m_residual);
if (m_objective->computeNorm(m_residual) < m_newtonTolerance && i > 0)
{
@@ -200,7 +217,10 @@ void btDeformableBodySolver::updateDv(btScalar scale)
void btDeformableBodySolver::computeStep(TVStack& ddv, const TVStack& residual)
{
- m_cg.solve(*m_objective, ddv, residual);
+ if (m_useProjection)
+ m_cg.solve(*m_objective, ddv, residual, false);
+ else
+ m_cr.solve(*m_objective, ddv, residual, false);
}
void btDeformableBodySolver::reinitialize(const btAlignedObjectArray<btSoftBody *>& softBodies, btScalar dt)
@@ -226,27 +246,22 @@ void btDeformableBodySolver::reinitialize(const btAlignedObjectArray<btSoftBody
m_dt = dt;
m_objective->reinitialize(nodeUpdated, dt);
+ updateSoftBodies();
}
-void btDeformableBodySolver::setConstraints()
+void btDeformableBodySolver::setConstraints(const btContactSolverInfo& infoGlobal)
{
BT_PROFILE("setConstraint");
- m_objective->setConstraints();
+ m_objective->setConstraints(infoGlobal);
}
-btScalar btDeformableBodySolver::solveContactConstraints(btCollisionObject** deformableBodies,int numDeformableBodies)
+btScalar btDeformableBodySolver::solveContactConstraints(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal)
{
BT_PROFILE("solveContactConstraints");
- btScalar maxSquaredResidual = m_objective->m_projection.update(deformableBodies,numDeformableBodies);
+ btScalar maxSquaredResidual = m_objective->m_projection.update(deformableBodies,numDeformableBodies, infoGlobal);
return maxSquaredResidual;
}
-btScalar btDeformableBodySolver::solveSplitImpulse(const btContactSolverInfo& infoGlobal)
-{
- BT_PROFILE("solveSplitImpulse");
- return m_objective->m_projection.solveSplitImpulse(infoGlobal);
-}
-
void btDeformableBodySolver::splitImpulseSetup(const btContactSolverInfo& infoGlobal)
{
m_objective->m_projection.splitImpulseSetup(infoGlobal);
@@ -333,8 +348,10 @@ void btDeformableBodySolver::setupDeformableSolve(bool implicit)
m_backupVelocity[counter] = psb->m_nodes[j].m_vn;
}
else
+ {
m_dv[counter] = psb->m_nodes[j].m_v - m_backupVelocity[counter];
- psb->m_nodes[j].m_v = m_backupVelocity[counter] + psb->m_nodes[j].m_vsplit;
+ }
+ psb->m_nodes[j].m_v = m_backupVelocity[counter];
++counter;
}
}
@@ -385,6 +402,7 @@ void btDeformableBodySolver::predictMotion(btScalar solverdt)
void btDeformableBodySolver::predictDeformableMotion(btSoftBody* psb, btScalar dt)
{
+ BT_PROFILE("btDeformableBodySolver::predictDeformableMotion");
int i, ni;
/* Update */
@@ -423,40 +441,22 @@ void btDeformableBodySolver::predictDeformableMotion(btSoftBody* psb, btScalar d
n.m_v *= max_v;
}
n.m_q = n.m_x + n.m_v * dt;
+ n.m_penetration = 0;
}
/* Nodes */
- ATTRIBUTE_ALIGNED16(btDbvtVolume)
- vol;
- for (i = 0, ni = psb->m_nodes.size(); i < ni; ++i)
- {
- btSoftBody::Node& n = psb->m_nodes[i];
- btVector3 points[2] = {n.m_x, n.m_q};
- vol = btDbvtVolume::FromPoints(points, 2);
- vol.Expand(btVector3(psb->m_sst.radmrg, psb->m_sst.radmrg, psb->m_sst.radmrg));
- psb->m_ndbvt.update(n.m_leaf, vol);
- }
-
+ psb->updateNodeTree(true, true);
if (!psb->m_fdbvt.empty())
{
- for (int i = 0; i < psb->m_faces.size(); ++i)
- {
- btSoftBody::Face& f = psb->m_faces[i];
- btVector3 points[6] = {f.m_n[0]->m_x, f.m_n[0]->m_q,
- f.m_n[1]->m_x, f.m_n[1]->m_q,
- f.m_n[2]->m_x, f.m_n[2]->m_q};
- vol = btDbvtVolume::FromPoints(points, 6);
- vol.Expand(btVector3(psb->m_sst.radmrg, psb->m_sst.radmrg, psb->m_sst.radmrg));
- psb->m_fdbvt.update(f.m_leaf, vol);
- }
+ psb->updateFaceTree(true, true);
}
- /* Clear contacts */
+ /* Clear contacts */
psb->m_nodeRigidContacts.resize(0);
psb->m_faceRigidContacts.resize(0);
psb->m_faceNodeContacts.resize(0);
/* Optimize dbvt's */
- psb->m_ndbvt.optimizeIncremental(1);
- psb->m_fdbvt.optimizeIncremental(1);
+// psb->m_ndbvt.optimizeIncremental(1);
+// psb->m_fdbvt.optimizeIncremental(1);
}
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h
index f78a8f696b..d4e5f4c603 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h
@@ -22,7 +22,8 @@
#include "btDeformableMultiBodyDynamicsWorld.h"
#include "BulletDynamics/Featherstone/btMultiBodyLinkCollider.h"
#include "BulletDynamics/Featherstone/btMultiBodyConstraint.h"
-
+#include "btConjugateResidual.h"
+#include "btConjugateGradient.h"
struct btCollisionObjectWrapper;
class btDeformableBackwardEulerObjective;
class btDeformableMultiBodyDynamicsWorld;
@@ -40,14 +41,15 @@ protected:
TVStack m_backupVelocity; // backed up v, equals v_n for implicit, equals v_{n+1}^* for explicit
btScalar m_dt; // dt
btConjugateGradient<btDeformableBackwardEulerObjective> m_cg; // CG solver
+ btConjugateResidual<btDeformableBackwardEulerObjective> m_cr; // CR solver
bool m_implicit; // use implicit scheme if true, explicit scheme if false
int m_maxNewtonIterations; // max number of newton iterations
btScalar m_newtonTolerance; // stop newton iterations if f(x) < m_newtonTolerance
bool m_lineSearch; // If true, use newton's method with line search under implicit scheme
-
public:
// handles data related to objective function
btDeformableBackwardEulerObjective* m_objective;
+ bool m_useProjection;
btDeformableBodySolver();
@@ -61,15 +63,11 @@ public:
// update soft body normals
virtual void updateSoftBodies();
+ virtual btScalar solveContactConstraints(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal);
+
// solve the momentum equation
virtual void solveDeformableConstraints(btScalar solverdt);
- // solve the contact between deformable and rigid as well as among deformables
- btScalar solveContactConstraints(btCollisionObject** deformableBodies,int numDeformableBodies);
-
- // solve the position error between deformable and rigid as well as among deformables;
- btScalar solveSplitImpulse(const btContactSolverInfo& infoGlobal);
-
// set up the position error in split impulse
void splitImpulseSetup(const btContactSolverInfo& infoGlobal);
@@ -77,7 +75,7 @@ public:
void reinitialize(const btAlignedObjectArray<btSoftBody *>& softBodies, btScalar dt);
// set up contact constraints
- void setConstraints();
+ void setConstraints(const btContactSolverInfo& infoGlobal);
// add in elastic forces and gravity to obtain v_{n+1}^* and calls predictDeformableMotion
virtual void predictMotion(btScalar solverdt);
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp
index e8219dc50e..2864446de6 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp
@@ -15,9 +15,9 @@
#include "btDeformableContactConstraint.h"
/* ================ Deformable Node Anchor =================== */
-btDeformableNodeAnchorConstraint::btDeformableNodeAnchorConstraint(const btSoftBody::DeformableNodeRigidAnchor& a)
+btDeformableNodeAnchorConstraint::btDeformableNodeAnchorConstraint(const btSoftBody::DeformableNodeRigidAnchor& a, const btContactSolverInfo& infoGlobal)
: m_anchor(&a)
-, btDeformableContactConstraint(a.m_cti.m_normal)
+, btDeformableContactConstraint(a.m_cti.m_normal, infoGlobal)
{
}
@@ -79,14 +79,14 @@ btVector3 btDeformableNodeAnchorConstraint::getVa() const
return va;
}
-btScalar btDeformableNodeAnchorConstraint::solveConstraint()
+btScalar btDeformableNodeAnchorConstraint::solveConstraint(const btContactSolverInfo& infoGlobal)
{
const btSoftBody::sCti& cti = m_anchor->m_cti;
btVector3 va = getVa();
btVector3 vb = getVb();
btVector3 vr = (vb - va);
// + (m_anchor->m_node->m_x - cti.m_colObj->getWorldTransform() * m_anchor->m_local) * 10.0
- const btScalar dn = btDot(vr, cti.m_normal);
+ const btScalar dn = btDot(vr, vr);
// dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt
btScalar residualSquare = dn*dn;
btVector3 impulse = m_anchor->m_c0 * vr;
@@ -134,14 +134,15 @@ void btDeformableNodeAnchorConstraint::applyImpulse(const btVector3& impulse)
}
/* ================ Deformable vs. Rigid =================== */
-btDeformableRigidContactConstraint::btDeformableRigidContactConstraint(const btSoftBody::DeformableRigidContact& c)
+btDeformableRigidContactConstraint::btDeformableRigidContactConstraint(const btSoftBody::DeformableRigidContact& c, const btContactSolverInfo& infoGlobal)
: m_contact(&c)
-, btDeformableContactConstraint(c.m_cti.m_normal)
+, btDeformableContactConstraint(c.m_cti.m_normal, infoGlobal)
{
m_total_normal_dv.setZero();
m_total_tangent_dv.setZero();
- // penetration is non-positive. The magnitude of penetration is the depth of penetration.
- m_penetration = btMin(btScalar(0), c.m_cti.m_offset);
+ // The magnitude of penetration is the depth of penetration.
+ m_penetration = c.m_cti.m_offset;
+// m_penetration = btMin(btScalar(0),c.m_cti.m_offset);
}
btDeformableRigidContactConstraint::btDeformableRigidContactConstraint(const btDeformableRigidContactConstraint& other)
@@ -206,16 +207,16 @@ btVector3 btDeformableRigidContactConstraint::getVa() const
return va;
}
-btScalar btDeformableRigidContactConstraint::solveConstraint()
+btScalar btDeformableRigidContactConstraint::solveConstraint(const btContactSolverInfo& infoGlobal)
{
const btSoftBody::sCti& cti = m_contact->m_cti;
btVector3 va = getVa();
btVector3 vb = getVb();
btVector3 vr = vb - va;
- const btScalar dn = btDot(vr, cti.m_normal);
+ btScalar dn = btDot(vr, cti.m_normal) + m_penetration * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep;
// dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt
btScalar residualSquare = dn*dn;
- btVector3 impulse = m_contact->m_c0 * vr;
+ btVector3 impulse = m_contact->m_c0 * (vr + m_penetration * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep * cti.m_normal) ;
const btVector3 impulse_normal = m_contact->m_c0 * (cti.m_normal * dn);
btVector3 impulse_tangent = impulse - impulse_normal;
btVector3 old_total_tangent_dv = m_total_tangent_dv;
@@ -256,6 +257,8 @@ btScalar btDeformableRigidContactConstraint::solveConstraint()
impulse = impulse_normal + impulse_tangent;
// apply impulse to deformable nodes involved and change their velocities
applyImpulse(impulse);
+ if (residualSquare < 1e-7)
+ return residualSquare;
// apply impulse to the rigid/multibodies involved and change their velocities
if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
{
@@ -285,43 +288,17 @@ btScalar btDeformableRigidContactConstraint::solveConstraint()
}
}
}
+// va = getVa();
+// vb = getVb();
+// vr = vb - va;
+// btScalar dn1 = btDot(vr, cti.m_normal) / 150;
+// m_penetration += dn1;
return residualSquare;
}
-
-btScalar btDeformableRigidContactConstraint::solveSplitImpulse(const btContactSolverInfo& infoGlobal)
-{
- const btSoftBody::sCti& cti = m_contact->m_cti;
- const btScalar dn = m_penetration;
- if (dn != 0)
- {
- const btVector3 impulse = (m_contact->m_c0 * (cti.m_normal * dn / infoGlobal.m_timeStep));
- // one iteration of the position impulse corrects all the position error at this timestep
- m_penetration -= dn;
- // apply impulse to deformable nodes involved and change their position
- applySplitImpulse(impulse);
- // apply impulse to the rigid/multibodies involved and change their position
- if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY)
- {
- btRigidBody* rigidCol = 0;
- rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj);
- if (rigidCol)
- {
- rigidCol->applyPushImpulse(impulse, m_contact->m_c1);
- }
- }
- else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK)
- {
- // todo xuchenhan@
- }
- return (m_penetration/infoGlobal.m_timeStep) * (m_penetration/infoGlobal.m_timeStep);
- }
- return 0;
-}
-
/* ================ Node vs. Rigid =================== */
-btDeformableNodeRigidContactConstraint::btDeformableNodeRigidContactConstraint(const btSoftBody::DeformableNodeRigidContact& contact)
+btDeformableNodeRigidContactConstraint::btDeformableNodeRigidContactConstraint(const btSoftBody::DeformableNodeRigidContact& contact, const btContactSolverInfo& infoGlobal)
: m_node(contact.m_node)
- , btDeformableRigidContactConstraint(contact)
+ , btDeformableRigidContactConstraint(contact, infoGlobal)
{
}
@@ -349,22 +326,17 @@ void btDeformableNodeRigidContactConstraint::applyImpulse(const btVector3& impul
contact->m_node->m_v -= dv;
}
-void btDeformableNodeRigidContactConstraint::applySplitImpulse(const btVector3& impulse)
-{
- const btSoftBody::DeformableNodeRigidContact* contact = getContact();
- btVector3 dv = impulse * contact->m_c2;
- contact->m_node->m_vsplit -= dv;
-};
-
/* ================ Face vs. Rigid =================== */
-btDeformableFaceRigidContactConstraint::btDeformableFaceRigidContactConstraint(const btSoftBody::DeformableFaceRigidContact& contact)
+btDeformableFaceRigidContactConstraint::btDeformableFaceRigidContactConstraint(const btSoftBody::DeformableFaceRigidContact& contact, const btContactSolverInfo& infoGlobal, bool useStrainLimiting)
: m_face(contact.m_face)
-, btDeformableRigidContactConstraint(contact)
+, m_useStrainLimiting(useStrainLimiting)
+, btDeformableRigidContactConstraint(contact, infoGlobal)
{
}
btDeformableFaceRigidContactConstraint::btDeformableFaceRigidContactConstraint(const btDeformableFaceRigidContactConstraint& other)
: m_face(other.m_face)
+, m_useStrainLimiting(other.m_useStrainLimiting)
, btDeformableRigidContactConstraint(other)
{
}
@@ -411,47 +383,70 @@ void btDeformableFaceRigidContactConstraint::applyImpulse(const btVector3& impul
v1 -= dv * contact->m_weights[1];
if (im2 > 0)
v2 -= dv * contact->m_weights[2];
-
- // apply strain limiting to prevent undamped modes
- btScalar m01 = (btScalar(1)/(im0 + im1));
- btScalar m02 = (btScalar(1)/(im0 + im2));
- btScalar m12 = (btScalar(1)/(im1 + im2));
-
- btVector3 dv0 = im0 * (m01 * (v1-v0) + m02 * (v2-v0));
- btVector3 dv1 = im1 * (m01 * (v0-v1) + m12 * (v2-v1));
- btVector3 dv2 = im2 * (m12 * (v1-v2) + m02 * (v0-v2));
-
- v0 += dv0;
- v1 += dv1;
- v2 += dv2;
-}
-
-void btDeformableFaceRigidContactConstraint::applySplitImpulse(const btVector3& impulse)
-{
- const btSoftBody::DeformableFaceRigidContact* contact = getContact();
- btVector3 dv = impulse * contact->m_c2;
- btSoftBody::Face* face = contact->m_face;
-
- btVector3& v0 = face->m_n[0]->m_vsplit;
- btVector3& v1 = face->m_n[1]->m_vsplit;
- btVector3& v2 = face->m_n[2]->m_vsplit;
- const btScalar& im0 = face->m_n[0]->m_im;
- const btScalar& im1 = face->m_n[1]->m_im;
- const btScalar& im2 = face->m_n[2]->m_im;
- if (im0 > 0)
- v0 -= dv * contact->m_weights[0];
- if (im1 > 0)
- v1 -= dv * contact->m_weights[1];
- if (im2 > 0)
- v2 -= dv * contact->m_weights[2];
+ if (m_useStrainLimiting)
+ {
+ btScalar relaxation = 1./btScalar(m_infoGlobal->m_numIterations);
+ btScalar m01 = (relaxation/(im0 + im1));
+ btScalar m02 = (relaxation/(im0 + im2));
+ btScalar m12 = (relaxation/(im1 + im2));
+ #ifdef USE_STRAIN_RATE_LIMITING
+ // apply strain limiting to prevent the new velocity to change the current length of the edge by more than 1%.
+ btScalar p = 0.01;
+ btVector3& x0 = face->m_n[0]->m_x;
+ btVector3& x1 = face->m_n[1]->m_x;
+ btVector3& x2 = face->m_n[2]->m_x;
+ const btVector3 x_diff[3] = {x1-x0, x2-x0, x2-x1};
+ const btVector3 v_diff[3] = {v1-v0, v2-v0, v2-v1};
+ btVector3 u[3];
+ btScalar x_diff_dot_u, dn[3];
+ btScalar dt = m_infoGlobal->m_timeStep;
+ for (int i = 0; i < 3; ++i)
+ {
+ btScalar x_diff_norm = x_diff[i].safeNorm();
+ btScalar x_diff_norm_new = (x_diff[i] + v_diff[i] * dt).safeNorm();
+ btScalar strainRate = x_diff_norm_new/x_diff_norm;
+ u[i] = v_diff[i];
+ u[i].safeNormalize();
+ if (x_diff_norm == 0 || (1-p <= strainRate && strainRate <= 1+p))
+ {
+ dn[i] = 0;
+ continue;
+ }
+ x_diff_dot_u = btDot(x_diff[i], u[i]);
+ btScalar s;
+ if (1-p > strainRate)
+ {
+ s = 1/dt * (-x_diff_dot_u - btSqrt(x_diff_dot_u*x_diff_dot_u + (p*p-2*p) * x_diff_norm * x_diff_norm));
+ }
+ else
+ {
+ s = 1/dt * (-x_diff_dot_u + btSqrt(x_diff_dot_u*x_diff_dot_u + (p*p+2*p) * x_diff_norm * x_diff_norm));
+ }
+ // x_diff_norm_new = (x_diff[i] + s * u[i] * dt).safeNorm();
+ // strainRate = x_diff_norm_new/x_diff_norm;
+ dn[i] = s - v_diff[i].safeNorm();
+ }
+ btVector3 dv0 = im0 * (m01 * u[0]*(-dn[0]) + m02 * u[1]*-(dn[1]));
+ btVector3 dv1 = im1 * (m01 * u[0]*(dn[0]) + m12 * u[2]*(-dn[2]));
+ btVector3 dv2 = im2 * (m12 * u[2]*(dn[2]) + m02 * u[1]*(dn[1]));
+ #else
+ // apply strain limiting to prevent undamped modes
+ btVector3 dv0 = im0 * (m01 * (v1-v0) + m02 * (v2-v0));
+ btVector3 dv1 = im1 * (m01 * (v0-v1) + m12 * (v2-v1));
+ btVector3 dv2 = im2 * (m12 * (v1-v2) + m02 * (v0-v2));
+ #endif
+ v0 += dv0;
+ v1 += dv1;
+ v2 += dv2;
+ }
}
/* ================ Face vs. Node =================== */
-btDeformableFaceNodeContactConstraint::btDeformableFaceNodeContactConstraint(const btSoftBody::DeformableFaceNodeContact& contact)
+btDeformableFaceNodeContactConstraint::btDeformableFaceNodeContactConstraint(const btSoftBody::DeformableFaceNodeContact& contact, const btContactSolverInfo& infoGlobal)
: m_node(contact.m_node)
, m_face(contact.m_face)
, m_contact(&contact)
-, btDeformableContactConstraint(contact.m_normal)
+, btDeformableContactConstraint(contact.m_normal, infoGlobal)
{
m_total_normal_dv.setZero();
m_total_tangent_dv.setZero();
@@ -487,7 +482,7 @@ btVector3 btDeformableFaceNodeContactConstraint::getDv(const btSoftBody::Node* n
return dv * contact->m_weights[2];
}
-btScalar btDeformableFaceNodeContactConstraint::solveConstraint()
+btScalar btDeformableFaceNodeContactConstraint::solveConstraint(const btContactSolverInfo& infoGlobal)
{
btVector3 va = getVa();
btVector3 vb = getVb();
@@ -577,15 +572,4 @@ void btDeformableFaceNodeContactConstraint::applyImpulse(const btVector3& impuls
{
v2 -= dvb * contact->m_weights[2];
}
- // todo: Face node constraints needs more work
-// btScalar m01 = (btScalar(1)/(im0 + im1));
-// btScalar m02 = (btScalar(1)/(im0 + im2));
-// btScalar m12 = (btScalar(1)/(im1 + im2));
-//
-// btVector3 dv0 = im0 * (m01 * (v1-v0) + m02 * (v2-v0));
-// btVector3 dv1 = im1 * (m01 * (v0-v1) + m12 * (v2-v1));
-// btVector3 dv2 = im2 * (m12 * (v1-v2) + m02 * (v0-v2));
-// v0 += dv0;
-// v1 += dv1;
-// v2 += dv2;
}
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h
index 912119e7c3..9f9d5bf0a3 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h
@@ -24,34 +24,33 @@ public:
// True if the friction is static
// False if the friction is dynamic
bool m_static;
-
- // normal of the contact
- btVector3 m_normal;
-
- btDeformableContactConstraint(const btVector3& normal): m_static(false), m_normal(normal)
- {
- }
-
- btDeformableContactConstraint(bool isStatic, const btVector3& normal): m_static(isStatic), m_normal(normal)
- {
- }
-
- btDeformableContactConstraint(const btDeformableContactConstraint& other)
- : m_static(other.m_static)
- , m_normal(other.m_normal)
- {
-
- }
- btDeformableContactConstraint(){}
-
+ const btContactSolverInfo* m_infoGlobal;
+
+ // normal of the contact
+ btVector3 m_normal;
+
+ btDeformableContactConstraint(const btVector3& normal, const btContactSolverInfo& infoGlobal): m_static(false), m_normal(normal), m_infoGlobal(&infoGlobal)
+ {
+ }
+
+ btDeformableContactConstraint(bool isStatic, const btVector3& normal, const btContactSolverInfo& infoGlobal): m_static(isStatic), m_normal(normal), m_infoGlobal(&infoGlobal)
+ {
+ }
+
+ btDeformableContactConstraint(){}
+
+ btDeformableContactConstraint(const btDeformableContactConstraint& other)
+ : m_static(other.m_static)
+ , m_normal(other.m_normal)
+ , m_infoGlobal(other.m_infoGlobal)
+ {
+ }
+
virtual ~btDeformableContactConstraint(){}
// solve the constraint with inelastic impulse and return the error, which is the square of normal component of velocity diffrerence
// the constraint is solved by calculating the impulse between object A and B in the contact and apply the impulse to both objects involved in the contact
- virtual btScalar solveConstraint() = 0;
-
- // solve the position error by applying an inelastic impulse that changes only the position (not velocity)
- virtual btScalar solveSplitImpulse(const btContactSolverInfo& infoGlobal) = 0;
+ virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal) = 0;
// get the velocity of the object A in the contact
virtual btVector3 getVa() const = 0;
@@ -65,9 +64,6 @@ public:
// apply impulse to the soft body node and/or face involved
virtual void applyImpulse(const btVector3& impulse) = 0;
- // apply position based impulse to the soft body node and/or face involved
- virtual void applySplitImpulse(const btVector3& impulse) = 0;
-
// scale the penetration depth by erp
virtual void setPenetrationScale(btScalar scale) = 0;
};
@@ -77,29 +73,21 @@ public:
class btDeformableStaticConstraint : public btDeformableContactConstraint
{
public:
- const btSoftBody::Node* m_node;
-
- btDeformableStaticConstraint(){}
+ btSoftBody::Node* m_node;
- btDeformableStaticConstraint(const btSoftBody::Node* node): m_node(node), btDeformableContactConstraint(false, btVector3(0,0,0))
+ btDeformableStaticConstraint(btSoftBody::Node* node, const btContactSolverInfo& infoGlobal): m_node(node), btDeformableContactConstraint(false, btVector3(0,0,0), infoGlobal)
{
}
-
+ btDeformableStaticConstraint(){}
btDeformableStaticConstraint(const btDeformableStaticConstraint& other)
: m_node(other.m_node)
, btDeformableContactConstraint(other)
{
-
}
virtual ~btDeformableStaticConstraint(){}
- virtual btScalar solveConstraint()
- {
- return 0;
- }
-
- virtual btScalar solveSplitImpulse(const btContactSolverInfo& infoGlobal)
+ virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal)
{
return 0;
}
@@ -120,7 +108,6 @@ public:
}
virtual void applyImpulse(const btVector3& impulse){}
- virtual void applySplitImpulse(const btVector3& impulse){}
virtual void setPenetrationScale(btScalar scale){}
};
@@ -130,19 +117,15 @@ class btDeformableNodeAnchorConstraint : public btDeformableContactConstraint
{
public:
const btSoftBody::DeformableNodeRigidAnchor* m_anchor;
-
- btDeformableNodeAnchorConstraint(){}
- btDeformableNodeAnchorConstraint(const btSoftBody::DeformableNodeRigidAnchor& c);
+
+ btDeformableNodeAnchorConstraint(const btSoftBody::DeformableNodeRigidAnchor& c, const btContactSolverInfo& infoGlobal);
btDeformableNodeAnchorConstraint(const btDeformableNodeAnchorConstraint& other);
+ btDeformableNodeAnchorConstraint(){}
virtual ~btDeformableNodeAnchorConstraint()
{
}
- virtual btScalar solveConstraint();
- virtual btScalar solveSplitImpulse(const btContactSolverInfo& infoGlobal)
- {
- // todo xuchenhan@
- return 0;
- }
+ virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal);
+
// object A is the rigid/multi body, and object B is the deformable node/face
virtual btVector3 getVa() const;
// get the velocity of the deformable node in contact
@@ -152,10 +135,7 @@ public:
return btVector3(0,0,0);
}
virtual void applyImpulse(const btVector3& impulse);
- virtual void applySplitImpulse(const btVector3& impulse)
- {
- // todo xuchenhan@
- };
+
virtual void setPenetrationScale(btScalar scale){}
};
@@ -169,10 +149,10 @@ public:
btVector3 m_total_tangent_dv;
btScalar m_penetration;
const btSoftBody::DeformableRigidContact* m_contact;
-
- btDeformableRigidContactConstraint(){}
- btDeformableRigidContactConstraint(const btSoftBody::DeformableRigidContact& c);
+
+ btDeformableRigidContactConstraint(const btSoftBody::DeformableRigidContact& c, const btContactSolverInfo& infoGlobal);
btDeformableRigidContactConstraint(const btDeformableRigidContactConstraint& other);
+ btDeformableRigidContactConstraint(){}
virtual ~btDeformableRigidContactConstraint()
{
}
@@ -180,9 +160,7 @@ public:
// object A is the rigid/multi body, and object B is the deformable node/face
virtual btVector3 getVa() const;
- virtual btScalar solveConstraint();
-
- virtual btScalar solveSplitImpulse(const btContactSolverInfo& infoGlobal);
+ virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal);
virtual void setPenetrationScale(btScalar scale)
{
@@ -196,12 +174,11 @@ class btDeformableNodeRigidContactConstraint : public btDeformableRigidContactCo
{
public:
// the deformable node in contact
- const btSoftBody::Node* m_node;
-
- btDeformableNodeRigidContactConstraint(){}
- btDeformableNodeRigidContactConstraint(const btSoftBody::DeformableNodeRigidContact& contact);
+ btSoftBody::Node* m_node;
+
+ btDeformableNodeRigidContactConstraint(const btSoftBody::DeformableNodeRigidContact& contact, const btContactSolverInfo& infoGlobal);
btDeformableNodeRigidContactConstraint(const btDeformableNodeRigidContactConstraint& other);
-
+ btDeformableNodeRigidContactConstraint(){}
virtual ~btDeformableNodeRigidContactConstraint()
{
}
@@ -219,7 +196,6 @@ public:
}
virtual void applyImpulse(const btVector3& impulse);
- virtual void applySplitImpulse(const btVector3& impulse);
};
//
@@ -228,10 +204,10 @@ class btDeformableFaceRigidContactConstraint : public btDeformableRigidContactCo
{
public:
const btSoftBody::Face* m_face;
- btDeformableFaceRigidContactConstraint(){}
- btDeformableFaceRigidContactConstraint(const btSoftBody::DeformableFaceRigidContact& contact);
+ bool m_useStrainLimiting;
+ btDeformableFaceRigidContactConstraint(const btSoftBody::DeformableFaceRigidContact& contact, const btContactSolverInfo& infoGlobal, bool useStrainLimiting);
btDeformableFaceRigidContactConstraint(const btDeformableFaceRigidContactConstraint& other);
-
+ btDeformableFaceRigidContactConstraint(): m_useStrainLimiting(false) {}
virtual ~btDeformableFaceRigidContactConstraint()
{
}
@@ -249,7 +225,6 @@ public:
}
virtual void applyImpulse(const btVector3& impulse);
- virtual void applySplitImpulse(const btVector3& impulse);
};
//
@@ -263,19 +238,11 @@ public:
btVector3 m_total_normal_dv;
btVector3 m_total_tangent_dv;
- btDeformableFaceNodeContactConstraint(){}
-
- btDeformableFaceNodeContactConstraint(const btSoftBody::DeformableFaceNodeContact& contact);
-
+ btDeformableFaceNodeContactConstraint(const btSoftBody::DeformableFaceNodeContact& contact, const btContactSolverInfo& infoGlobal);
+ btDeformableFaceNodeContactConstraint(){}
virtual ~btDeformableFaceNodeContactConstraint(){}
- virtual btScalar solveConstraint();
-
- virtual btScalar solveSplitImpulse(const btContactSolverInfo& infoGlobal)
- {
- // todo: xuchenhan@
- return 0;
- }
+ virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal);
// get the velocity of the object A in the contact
virtual btVector3 getVa() const;
@@ -293,10 +260,7 @@ public:
}
virtual void applyImpulse(const btVector3& impulse);
- virtual void applySplitImpulse(const btVector3& impulse)
- {
- // todo xuchenhan@
- }
+
virtual void setPenetrationScale(btScalar scale){}
};
#endif /* BT_DEFORMABLE_CONTACT_CONSTRAINT_H */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp
index 5a4f3241b4..22ca8bf582 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp
@@ -17,7 +17,7 @@
#include "btDeformableMultiBodyDynamicsWorld.h"
#include <algorithm>
#include <cmath>
-btScalar btDeformableContactProjection::update(btCollisionObject** deformableBodies,int numDeformableBodies)
+btScalar btDeformableContactProjection::update(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal)
{
btScalar residualSquare = 0;
for (int i = 0; i < numDeformableBodies; ++i)
@@ -32,25 +32,25 @@ btScalar btDeformableContactProjection::update(btCollisionObject** deformableBod
for (int k = 0; k < m_nodeRigidConstraints[j].size(); ++k)
{
btDeformableNodeRigidContactConstraint& constraint = m_nodeRigidConstraints[j][k];
- btScalar localResidualSquare = constraint.solveConstraint();
+ btScalar localResidualSquare = constraint.solveConstraint(infoGlobal);
residualSquare = btMax(residualSquare, localResidualSquare);
}
for (int k = 0; k < m_nodeAnchorConstraints[j].size(); ++k)
{
btDeformableNodeAnchorConstraint& constraint = m_nodeAnchorConstraints[j][k];
- btScalar localResidualSquare = constraint.solveConstraint();
+ btScalar localResidualSquare = constraint.solveConstraint(infoGlobal);
residualSquare = btMax(residualSquare, localResidualSquare);
}
for (int k = 0; k < m_faceRigidConstraints[j].size(); ++k)
{
btDeformableFaceRigidContactConstraint& constraint = m_faceRigidConstraints[j][k];
- btScalar localResidualSquare = constraint.solveConstraint();
+ btScalar localResidualSquare = constraint.solveConstraint(infoGlobal);
residualSquare = btMax(residualSquare, localResidualSquare);
}
for (int k = 0; k < m_deformableConstraints[j].size(); ++k)
{
btDeformableFaceNodeContactConstraint& constraint = m_deformableConstraints[j][k];
- btScalar localResidualSquare = constraint.solveConstraint();
+ btScalar localResidualSquare = constraint.solveConstraint(infoGlobal);
residualSquare = btMax(residualSquare, localResidualSquare);
}
}
@@ -77,39 +77,8 @@ void btDeformableContactProjection::splitImpulseSetup(const btContactSolverInfo&
}
}
-btScalar btDeformableContactProjection::solveSplitImpulse(const btContactSolverInfo& infoGlobal)
-{
- btScalar residualSquare = 0;
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- // node constraints
- for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
- {
- btDeformableNodeRigidContactConstraint& constraint = m_nodeRigidConstraints[i][j];
- btScalar localResidualSquare = constraint.solveSplitImpulse(infoGlobal);
- residualSquare = btMax(residualSquare, localResidualSquare);
- }
- // anchor constraints
- for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
- {
- btDeformableNodeAnchorConstraint& constraint = m_nodeAnchorConstraints[i][j];
- btScalar localResidualSquare = constraint.solveSplitImpulse(infoGlobal);
- residualSquare = btMax(residualSquare, localResidualSquare);
- }
- // face constraints
- for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
- {
- btDeformableFaceRigidContactConstraint& constraint = m_faceRigidConstraints[i][j];
- btScalar localResidualSquare = constraint.solveSplitImpulse(infoGlobal);
- residualSquare = btMax(residualSquare, localResidualSquare);
- }
-
- }
- return residualSquare;
-}
-
-void btDeformableContactProjection::setConstraints()
-{
+void btDeformableContactProjection::setConstraints(const btContactSolverInfo& infoGlobal)
+{
BT_PROFILE("setConstraints");
for (int i = 0; i < m_softBodies.size(); ++i)
{
@@ -124,7 +93,7 @@ void btDeformableContactProjection::setConstraints()
{
if (psb->m_nodes[j].m_im == 0)
{
- btDeformableStaticConstraint static_constraint(&psb->m_nodes[j]);
+ btDeformableStaticConstraint static_constraint(&psb->m_nodes[j], infoGlobal);
m_staticConstraints[i].push_back(static_constraint);
}
}
@@ -139,7 +108,7 @@ void btDeformableContactProjection::setConstraints()
continue;
}
anchor.m_c1 = anchor.m_cti.m_colObj->getWorldTransform().getBasis() * anchor.m_local;
- btDeformableNodeAnchorConstraint constraint(anchor);
+ btDeformableNodeAnchorConstraint constraint(anchor, infoGlobal);
m_nodeAnchorConstraints[i].push_back(constraint);
}
@@ -152,7 +121,7 @@ void btDeformableContactProjection::setConstraints()
{
continue;
}
- btDeformableNodeRigidContactConstraint constraint(contact);
+ btDeformableNodeRigidContactConstraint constraint(contact, infoGlobal);
btVector3 va = constraint.getVa();
btVector3 vb = constraint.getVb();
const btVector3 vr = vb - va;
@@ -173,7 +142,7 @@ void btDeformableContactProjection::setConstraints()
{
continue;
}
- btDeformableFaceRigidContactConstraint constraint(contact);
+ btDeformableFaceRigidContactConstraint constraint(contact, infoGlobal, m_useStrainLimiting);
btVector3 va = constraint.getVa();
btVector3 vb = constraint.getVb();
const btVector3 vr = vb - va;
@@ -184,253 +153,404 @@ void btDeformableContactProjection::setConstraints()
m_faceRigidConstraints[i].push_back(constraint);
}
}
-
- // set Deformable Face vs. Deformable Node constraint
- for (int j = 0; j < psb->m_faceNodeContacts.size(); ++j)
- {
- const btSoftBody::DeformableFaceNodeContact& contact = psb->m_faceNodeContacts[j];
-
- btDeformableFaceNodeContactConstraint constraint(contact);
- btVector3 va = constraint.getVa();
- btVector3 vb = constraint.getVb();
- const btVector3 vr = vb - va;
- const btScalar dn = btDot(vr, contact.m_normal);
- if (dn > -SIMD_EPSILON)
- {
- m_deformableConstraints[i].push_back(constraint);
- }
- }
}
}
void btDeformableContactProjection::project(TVStack& x)
{
- const int dim = 3;
- for (int index = 0; index < m_projectionsDict.size(); ++index)
- {
- btAlignedObjectArray<btVector3>& projectionDirs = *m_projectionsDict.getAtIndex(index);
- size_t i = m_projectionsDict.getKeyAtIndex(index).getUid1();
- if (projectionDirs.size() >= dim)
- {
- // static node
- x[i].setZero();
- continue;
- }
- else if (projectionDirs.size() == 2)
- {
- btVector3 dir0 = projectionDirs[0];
- btVector3 dir1 = projectionDirs[1];
- btVector3 free_dir = btCross(dir0, dir1);
- if (free_dir.safeNorm() < SIMD_EPSILON)
- {
- x[i] -= x[i].dot(dir0) * dir0;
- x[i] -= x[i].dot(dir1) * dir1;
- }
- else
- {
- free_dir.normalize();
- x[i] = x[i].dot(free_dir) * free_dir;
- }
- }
- else
- {
- btAssert(projectionDirs.size() == 1);
- btVector3 dir0 = projectionDirs[0];
- x[i] -= x[i].dot(dir0) * dir0;
- }
- }
+#ifndef USE_MGS
+ const int dim = 3;
+ for (int index = 0; index < m_projectionsDict.size(); ++index)
+ {
+ btAlignedObjectArray<btVector3>& projectionDirs = *m_projectionsDict.getAtIndex(index);
+ size_t i = m_projectionsDict.getKeyAtIndex(index).getUid1();
+ if (projectionDirs.size() >= dim)
+ {
+ // static node
+ x[i].setZero();
+ continue;
+ }
+ else if (projectionDirs.size() == 2)
+ {
+ btVector3 dir0 = projectionDirs[0];
+ btVector3 dir1 = projectionDirs[1];
+ btVector3 free_dir = btCross(dir0, dir1);
+ if (free_dir.safeNorm() < SIMD_EPSILON)
+ {
+ x[i] -= x[i].dot(dir0) * dir0;
+ x[i] -= x[i].dot(dir1) * dir1;
+ }
+ else
+ {
+ free_dir.normalize();
+ x[i] = x[i].dot(free_dir) * free_dir;
+ }
+ }
+ else
+ {
+ btAssert(projectionDirs.size() == 1);
+ btVector3 dir0 = projectionDirs[0];
+ x[i] -= x[i].dot(dir0) * dir0;
+ }
+ }
+#else
+ btReducedVector p(x.size());
+ for (int i = 0; i < m_projections.size(); ++i)
+ {
+ p += (m_projections[i].dot(x) * m_projections[i]);
+ }
+ for (int i = 0; i < p.m_indices.size(); ++i)
+ {
+ x[p.m_indices[i]] -= p.m_vecs[i];
+ }
+#endif
}
void btDeformableContactProjection::setProjection()
{
- btAlignedObjectArray<btVector3> units;
- units.push_back(btVector3(1,0,0));
- units.push_back(btVector3(0,1,0));
- units.push_back(btVector3(0,0,1));
- for (int i = 0; i < m_softBodies.size(); ++i)
- {
- btSoftBody* psb = m_softBodies[i];
- if (!psb->isActive())
- {
- continue;
- }
- for (int j = 0; j < m_staticConstraints[i].size(); ++j)
- {
- int index = m_staticConstraints[i][j].m_node->index;
- if (m_projectionsDict.find(index) == NULL)
+#ifndef USE_MGS
+ BT_PROFILE("btDeformableContactProjection::setProjection");
+ btAlignedObjectArray<btVector3> units;
+ units.push_back(btVector3(1,0,0));
+ units.push_back(btVector3(0,1,0));
+ units.push_back(btVector3(0,0,1));
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < m_staticConstraints[i].size(); ++j)
+ {
+ int index = m_staticConstraints[i][j].m_node->index;
+ m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY;
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ m_projectionsDict.insert(index, units);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ for (int k = 0; k < 3; ++k)
+ {
+ projections.push_back(units[k]);
+ }
+ }
+ }
+ for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
+ {
+ int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index;
+ m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY;
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ m_projectionsDict.insert(index, units);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ for (int k = 0; k < 3; ++k)
+ {
+ projections.push_back(units[k]);
+ }
+ }
+ }
+ for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
+ {
+ int index = m_nodeRigidConstraints[i][j].m_node->index;
+ m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset;
+ if (m_nodeRigidConstraints[i][j].m_static)
+ {
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ m_projectionsDict.insert(index, units);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ for (int k = 0; k < 3; ++k)
+ {
+ projections.push_back(units[k]);
+ }
+ }
+ }
+ else
+ {
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ btAlignedObjectArray<btVector3> projections;
+ projections.push_back(m_nodeRigidConstraints[i][j].m_normal);
+ m_projectionsDict.insert(index, projections);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ projections.push_back(m_nodeRigidConstraints[i][j].m_normal);
+ }
+ }
+ }
+ for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
+ {
+ const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face;
+ btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset;
+ for (int k = 0; k < 3; ++k)
+ {
+ face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration);
+ }
+ for (int k = 0; k < 3; ++k)
+ {
+ btSoftBody::Node* node = face->m_n[k];
+ node->m_penetration = true;
+ int index = node->index;
+ if (m_faceRigidConstraints[i][j].m_static)
+ {
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ m_projectionsDict.insert(index, units);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ for (int k = 0; k < 3; ++k)
+ {
+ projections.push_back(units[k]);
+ }
+ }
+ }
+ else
+ {
+ if (m_projectionsDict.find(index) == NULL)
+ {
+ btAlignedObjectArray<btVector3> projections;
+ projections.push_back(m_faceRigidConstraints[i][j].m_normal);
+ m_projectionsDict.insert(index, projections);
+ }
+ else
+ {
+ btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+ projections.push_back(m_faceRigidConstraints[i][j].m_normal);
+ }
+ }
+ }
+ }
+ }
+#else
+ int dof = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ dof += m_softBodies[i]->m_nodes.size();
+ }
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < m_staticConstraints[i].size(); ++j)
+ {
+ int index = m_staticConstraints[i][j].m_node->index;
+ m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY;
+ btAlignedObjectArray<int> indices;
+ btAlignedObjectArray<btVector3> vecs1,vecs2,vecs3;
+ indices.push_back(index);
+ vecs1.push_back(btVector3(1,0,0));
+ vecs2.push_back(btVector3(0,1,0));
+ vecs3.push_back(btVector3(0,0,1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs2));
+ m_projections.push_back(btReducedVector(dof, indices, vecs3));
+ }
+
+ for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
+ {
+ int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index;
+ m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY;
+ btAlignedObjectArray<int> indices;
+ btAlignedObjectArray<btVector3> vecs1,vecs2,vecs3;
+ indices.push_back(index);
+ vecs1.push_back(btVector3(1,0,0));
+ vecs2.push_back(btVector3(0,1,0));
+ vecs3.push_back(btVector3(0,0,1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs2));
+ m_projections.push_back(btReducedVector(dof, indices, vecs3));
+ }
+ for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
+ {
+ int index = m_nodeRigidConstraints[i][j].m_node->index;
+ m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset;
+ btAlignedObjectArray<int> indices;
+ indices.push_back(index);
+ btAlignedObjectArray<btVector3> vecs1,vecs2,vecs3;
+ if (m_nodeRigidConstraints[i][j].m_static)
+ {
+ vecs1.push_back(btVector3(1,0,0));
+ vecs2.push_back(btVector3(0,1,0));
+ vecs3.push_back(btVector3(0,0,1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs1));
+ m_projections.push_back(btReducedVector(dof, indices, vecs2));
+ m_projections.push_back(btReducedVector(dof, indices, vecs3));
+ }
+ else
+ {
+ vecs1.push_back(m_nodeRigidConstraints[i][j].m_normal);
+ m_projections.push_back(btReducedVector(dof, indices, vecs1));
+ }
+ }
+ for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
+ {
+ const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face;
+ btVector3 bary = m_faceRigidConstraints[i][j].getContact()->m_bary;
+ btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset;
+ for (int k = 0; k < 3; ++k)
+ {
+ face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration);
+ }
+ if (m_faceRigidConstraints[i][j].m_static)
{
- m_projectionsDict.insert(index, units);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- for (int k = 0; k < 3; ++k)
+ for (int l = 0; l < 3; ++l)
{
- projections.push_back(units[k]);
- }
- }
- }
- for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
- {
- int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index;
- if (m_projectionsDict.find(index) == NULL)
- {
- m_projectionsDict.insert(index, units);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- for (int k = 0; k < 3; ++k)
- {
- projections.push_back(units[k]);
- }
- }
- }
- for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
- {
- int index = m_nodeRigidConstraints[i][j].m_node->index;
- if (m_nodeRigidConstraints[i][j].m_static)
- {
- if (m_projectionsDict.find(index) == NULL)
- {
- m_projectionsDict.insert(index, units);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
+
+ btReducedVector rv(dof);
for (int k = 0; k < 3; ++k)
{
- projections.push_back(units[k]);
+ rv.m_indices.push_back(face->m_n[k]->index);
+ btVector3 v(0,0,0);
+ v[l] = bary[k];
+ rv.m_vecs.push_back(v);
+ rv.sort();
}
+ m_projections.push_back(rv);
}
}
else
{
- if (m_projectionsDict.find(index) == NULL)
- {
- btAlignedObjectArray<btVector3> projections;
- projections.push_back(m_nodeRigidConstraints[i][j].m_normal);
- m_projectionsDict.insert(index, projections);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- projections.push_back(m_nodeRigidConstraints[i][j].m_normal);
- }
- }
- }
- for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
- {
- const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face;
- for (int k = 0; k < 3; ++k)
- {
- const btSoftBody::Node* node = face->m_n[k];
- int index = node->index;
- if (m_faceRigidConstraints[i][j].m_static)
+ btReducedVector rv(dof);
+ for (int k = 0; k < 3; ++k)
{
- if (m_projectionsDict.find(index) == NULL)
- {
- m_projectionsDict.insert(index, units);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- for (int k = 0; k < 3; ++k)
- {
- projections.push_back(units[k]);
- }
- }
- }
- else
- {
- if (m_projectionsDict.find(index) == NULL)
- {
- btAlignedObjectArray<btVector3> projections;
- projections.push_back(m_faceRigidConstraints[i][j].m_normal);
- m_projectionsDict.insert(index, projections);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- projections.push_back(m_faceRigidConstraints[i][j].m_normal);
- }
+ rv.m_indices.push_back(face->m_n[k]->index);
+ rv.m_vecs.push_back(bary[k] * m_faceRigidConstraints[i][j].m_normal);
+ rv.sort();
}
+ m_projections.push_back(rv);
}
}
- for (int j = 0; j < m_deformableConstraints[i].size(); ++j)
- {
- const btSoftBody::Face* face = m_deformableConstraints[i][j].m_face;
- for (int k = 0; k < 3; ++k)
- {
- const btSoftBody::Node* node = face->m_n[k];
- int index = node->index;
- if (m_deformableConstraints[i][j].m_static)
- {
- if (m_projectionsDict.find(index) == NULL)
- {
- m_projectionsDict.insert(index, units);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- for (int k = 0; k < 3; ++k)
- {
- projections.push_back(units[k]);
- }
- }
- }
- else
- {
- if (m_projectionsDict.find(index) == NULL)
- {
- btAlignedObjectArray<btVector3> projections;
- projections.push_back(m_deformableConstraints[i][j].m_normal);
- m_projectionsDict.insert(index, projections);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- projections.push_back(m_deformableConstraints[i][j].m_normal);
- }
- }
- }
+ }
+ btModifiedGramSchmidt<btReducedVector> mgs(m_projections);
+ mgs.solve();
+ m_projections = mgs.m_out;
+#endif
+}
+
+void btDeformableContactProjection::checkConstraints(const TVStack& x)
+{
+ for (int i = 0; i < m_lagrangeMultipliers.size(); ++i)
+ {
+ btVector3 d(0,0,0);
+ const LagrangeMultiplier& lm = m_lagrangeMultipliers[i];
+ for (int j = 0; j < lm.m_num_constraints; ++j)
+ {
+ for (int k = 0; k < lm.m_num_nodes; ++k)
+ {
+ d[j] += lm.m_weights[k] * x[lm.m_indices[k]].dot(lm.m_dirs[j]);
+ }
+ }
+ printf("d = %f, %f, %f\n",d[0],d[1],d[2]);
+ }
+}
+
+void btDeformableContactProjection::setLagrangeMultiplier()
+{
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ for (int j = 0; j < m_staticConstraints[i].size(); ++j)
+ {
+ int index = m_staticConstraints[i][j].m_node->index;
+ m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY;
+ LagrangeMultiplier lm;
+ lm.m_num_nodes = 1;
+ lm.m_indices[0] = index;
+ lm.m_weights[0] = 1.0;
+ lm.m_num_constraints = 3;
+ lm.m_dirs[0] = btVector3(1,0,0);
+ lm.m_dirs[1] = btVector3(0,1,0);
+ lm.m_dirs[2] = btVector3(0,0,1);
+ m_lagrangeMultipliers.push_back(lm);
+ }
+ for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j)
+ {
+ int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index;
+ m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY;
+ LagrangeMultiplier lm;
+ lm.m_num_nodes = 1;
+ lm.m_indices[0] = index;
+ lm.m_weights[0] = 1.0;
+ lm.m_num_constraints = 3;
+ lm.m_dirs[0] = btVector3(1,0,0);
+ lm.m_dirs[1] = btVector3(0,1,0);
+ lm.m_dirs[2] = btVector3(0,0,1);
+ m_lagrangeMultipliers.push_back(lm);
+ }
+ for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j)
+ {
+ int index = m_nodeRigidConstraints[i][j].m_node->index;
+ m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset;
+ LagrangeMultiplier lm;
+ lm.m_num_nodes = 1;
+ lm.m_indices[0] = index;
+ lm.m_weights[0] = 1.0;
+ if (m_nodeRigidConstraints[i][j].m_static)
+ {
+ lm.m_num_constraints = 3;
+ lm.m_dirs[0] = btVector3(1,0,0);
+ lm.m_dirs[1] = btVector3(0,1,0);
+ lm.m_dirs[2] = btVector3(0,0,1);
+ }
+ else
+ {
+ lm.m_num_constraints = 1;
+ lm.m_dirs[0] = m_nodeRigidConstraints[i][j].m_normal;
+ }
+ m_lagrangeMultipliers.push_back(lm);
+ }
+ for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j)
+ {
+ const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face;
- const btSoftBody::Node* node = m_deformableConstraints[i][j].m_node;
- int index = node->index;
- if (m_deformableConstraints[i][j].m_static)
+ btVector3 bary = m_faceRigidConstraints[i][j].getContact()->m_bary;
+ btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset;
+ LagrangeMultiplier lm;
+ lm.m_num_nodes = 3;
+ for (int k = 0; k<3; ++k)
{
- if (m_projectionsDict.find(index) == NULL)
- {
- m_projectionsDict.insert(index, units);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- for (int k = 0; k < 3; ++k)
- {
- projections.push_back(units[k]);
- }
- }
+ face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration);
+ lm.m_indices[k] = face->m_n[k]->index;
+ lm.m_weights[k] = bary[k];
+ }
+ if (m_faceRigidConstraints[i][j].m_static)
+ {
+ lm.m_num_constraints = 3;
+ lm.m_dirs[0] = btVector3(1,0,0);
+ lm.m_dirs[1] = btVector3(0,1,0);
+ lm.m_dirs[2] = btVector3(0,0,1);
}
else
{
- if (m_projectionsDict.find(index) == NULL)
- {
- btAlignedObjectArray<btVector3> projections;
- projections.push_back(m_deformableConstraints[i][j].m_normal);
- m_projectionsDict.insert(index, projections);
- }
- else
- {
- btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index];
- projections.push_back(m_deformableConstraints[i][j].m_normal);
- }
+ lm.m_num_constraints = 1;
+ lm.m_dirs[0] = m_faceRigidConstraints[i][j].m_normal;
}
+ m_lagrangeMultipliers.push_back(lm);
}
}
}
-
+//
void btDeformableContactProjection::applyDynamicFriction(TVStack& f)
{
for (int i = 0; i < m_softBodies.size(); ++i)
@@ -502,7 +622,12 @@ void btDeformableContactProjection::reinitialize(bool nodeUpdated)
m_faceRigidConstraints[i].clear();
m_deformableConstraints[i].clear();
}
- m_projectionsDict.clear();
+#ifndef USE_MGS
+ m_projectionsDict.clear();
+#else
+ m_projections.clear();
+#endif
+ m_lagrangeMultipliers.clear();
}
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h
index 3c4490765e..8d7e94d4fb 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h
@@ -21,30 +21,37 @@
#include "BulletDynamics/Featherstone/btMultiBodyConstraint.h"
#include "btDeformableContactConstraint.h"
#include "LinearMath/btHashMap.h"
+#include "LinearMath/btReducedVector.h"
+#include "LinearMath/btModifiedGramSchmidt.h"
#include <vector>
+
+struct LagrangeMultiplier
+{
+ int m_num_constraints; // Number of constraints
+ int m_num_nodes; // Number of nodes in these constraints
+ btScalar m_weights[3]; // weights of the nodes involved, same size as m_num_nodes
+ btVector3 m_dirs[3]; // Constraint directions, same size of m_num_constraints;
+ int m_indices[3]; // indices of the nodes involved, same size as m_num_nodes;
+};
+
+
class btDeformableContactProjection
{
public:
typedef btAlignedObjectArray<btVector3> TVStack;
btAlignedObjectArray<btSoftBody *>& m_softBodies;
-
-// // map from node index to static constraint
-// btHashMap<btHashInt, btDeformableStaticConstraint> m_staticConstraints;
-// // map from node index to node rigid constraint
-// btHashMap<btHashInt, btAlignedObjectArray<btDeformableNodeRigidContactConstraint> > m_nodeRigidConstraints;
-// // map from node index to face rigid constraint
-// btHashMap<btHashInt, btAlignedObjectArray<btDeformableFaceRigidContactConstraint*> > m_faceRigidConstraints;
-// // map from node index to deformable constraint
-// btHashMap<btHashInt, btAlignedObjectArray<btDeformableFaceNodeContactConstraint*> > m_deformableConstraints;
-// // map from node index to node anchor constraint
-// btHashMap<btHashInt, btDeformableNodeAnchorConstraint> m_nodeAnchorConstraints;
// all constraints involving face
btAlignedObjectArray<btDeformableContactConstraint*> m_allFaceConstraints;
-
+#ifndef USE_MGS
// map from node index to projection directions
btHashMap<btHashInt, btAlignedObjectArray<btVector3> > m_projectionsDict;
-
+#else
+ btAlignedObjectArray<btReducedVector> m_projections;
+#endif
+
+ btAlignedObjectArray<LagrangeMultiplier> m_lagrangeMultipliers;
+
// map from node index to static constraint
btAlignedObjectArray<btAlignedObjectArray<btDeformableStaticConstraint> > m_staticConstraints;
// map from node index to node rigid constraint
@@ -56,6 +63,8 @@ public:
// map from node index to node anchor constraint
btAlignedObjectArray<btAlignedObjectArray<btDeformableNodeAnchorConstraint> > m_nodeAnchorConstraints;
+ bool m_useStrainLimiting;
+
btDeformableContactProjection(btAlignedObjectArray<btSoftBody *>& softBodies)
: m_softBodies(softBodies)
{
@@ -72,13 +81,10 @@ public:
virtual void applyDynamicFriction(TVStack& f);
// update and solve the constraints
- virtual btScalar update(btCollisionObject** deformableBodies,int numDeformableBodies);
-
- // solve the position error using split impulse
- virtual btScalar solveSplitImpulse(const btContactSolverInfo& infoGlobal);
+ virtual btScalar update(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal);
// Add constraints to m_constraints. In addition, the constraints that each vertex own are recorded in m_constraintsDict.
- virtual void setConstraints();
+ virtual void setConstraints(const btContactSolverInfo& infoGlobal);
// Set up projections for each vertex by adding the projection direction to
virtual void setProjection();
@@ -86,5 +92,9 @@ public:
virtual void reinitialize(bool nodeUpdated);
virtual void splitImpulseSetup(const btContactSolverInfo& infoGlobal);
+
+ virtual void setLagrangeMultiplier();
+
+ void checkConstraints(const TVStack& x);
};
#endif /* btDeformableContactProjection_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h
index c2a26338e7..2d042df729 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h
@@ -114,6 +114,8 @@ public:
{
}
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){}
+
virtual btDeformableLagrangianForceType getForceType()
{
return BT_COROTATED_FORCE;
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h
index 33e5a8564a..13ee3eacb6 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h
@@ -50,6 +50,8 @@ public:
{
}
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){}
+
virtual void addScaledGravityForce(btScalar scale, TVStack& force)
{
int numNodes = getNumNodes();
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h
index 64e80e23b3..0b6447442d 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h
@@ -26,7 +26,8 @@ enum btDeformableLagrangianForceType
BT_MASSSPRING_FORCE = 2,
BT_COROTATED_FORCE = 3,
BT_NEOHOOKEAN_FORCE = 4,
- BT_LINEAR_ELASTICITY_FORCE = 5
+ BT_LINEAR_ELASTICITY_FORCE = 5,
+ BT_MOUSE_PICKING_FORCE = 6
};
static inline double randomDouble(double low, double high)
@@ -53,6 +54,9 @@ public:
// add damping df
virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) = 0;
+ // build diagonal of A matrix
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) = 0;
+
// add elastic df
virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) = 0;
@@ -85,6 +89,11 @@ public:
m_softBodies.push_back(psb);
}
+ virtual void removeSoftBody(btSoftBody* psb)
+ {
+ m_softBodies.remove(psb);
+ }
+
virtual void setIndices(const btAlignedObjectArray<btSoftBody::Node*>* nodes)
{
m_nodes = nodes;
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h
index 54b4e4481d..b128df92cc 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h
@@ -149,6 +149,52 @@ public:
}
}
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA)
+ {
+ // implicit damping force differential
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (!psb->isActive())
+ {
+ continue;
+ }
+ btScalar scaled_k_damp = m_dampingStiffness * scale;
+ for (int j = 0; j < psb->m_links.size(); ++j)
+ {
+ const btSoftBody::Link& link = psb->m_links[j];
+ btSoftBody::Node* node1 = link.m_n[0];
+ btSoftBody::Node* node2 = link.m_n[1];
+ size_t id1 = node1->index;
+ size_t id2 = node2->index;
+ if (m_momentum_conserving)
+ {
+ if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON)
+ {
+ btVector3 dir = (node2->m_x - node1->m_x).normalized();
+ for (int d = 0; d < 3; ++d)
+ {
+ if (node1->m_im > 0)
+ diagA[id1][d] -= scaled_k_damp * dir[d] * dir[d];
+ if (node2->m_im > 0)
+ diagA[id2][d] -= scaled_k_damp * dir[d] * dir[d];
+ }
+ }
+ }
+ else
+ {
+ for (int d = 0; d < 3; ++d)
+ {
+ if (node1->m_im > 0)
+ diagA[id1][d] -= scaled_k_damp;
+ if (node2->m_im > 0)
+ diagA[id2][d] -= scaled_k_damp;
+ }
+ }
+ }
+ }
+ }
+
virtual double totalElasticEnergy(btScalar dt)
{
double energy = 0;
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h
new file mode 100644
index 0000000000..07c10935f4
--- /dev/null
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h
@@ -0,0 +1,145 @@
+/*
+ Written by Xuchen Han <xuchenhan2015@u.northwestern.edu>
+
+ Bullet Continuous Collision Detection and Physics Library
+ Copyright (c) 2019 Google Inc. http://bulletphysics.org
+ This software is provided 'as-is', without any express or implied warranty.
+ In no event will the authors be held liable for any damages arising from the use of this software.
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it freely,
+ subject to the following restrictions:
+ 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#ifndef BT_MOUSE_PICKING_FORCE_H
+#define BT_MOUSE_PICKING_FORCE_H
+
+#include "btDeformableLagrangianForce.h"
+
+class btDeformableMousePickingForce : public btDeformableLagrangianForce
+{
+ // If true, the damping force will be in the direction of the spring
+ // If false, the damping force will be in the direction of the velocity
+ btScalar m_elasticStiffness, m_dampingStiffness;
+ const btSoftBody::Face& m_face;
+ btVector3 m_mouse_pos;
+ btScalar m_maxForce;
+public:
+ typedef btAlignedObjectArray<btVector3> TVStack;
+ btDeformableMousePickingForce(btScalar k, btScalar d, const btSoftBody::Face& face, btVector3 mouse_pos, btScalar maxForce = 0.3) : m_elasticStiffness(k), m_dampingStiffness(d), m_face(face), m_mouse_pos(mouse_pos), m_maxForce(maxForce)
+ {
+ }
+
+ virtual void addScaledForces(btScalar scale, TVStack& force)
+ {
+ addScaledDampingForce(scale, force);
+ addScaledElasticForce(scale, force);
+ }
+
+ virtual void addScaledExplicitForce(btScalar scale, TVStack& force)
+ {
+ addScaledElasticForce(scale, force);
+ }
+
+ virtual void addScaledDampingForce(btScalar scale, TVStack& force)
+ {
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 v_diff = m_face.m_n[i]->m_v;
+ btVector3 scaled_force = scale * m_dampingStiffness * v_diff;
+ if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized();
+ scaled_force = scale * m_dampingStiffness * v_diff.dot(dir) * dir;
+ }
+ force[m_face.m_n[i]->index] -= scaled_force;
+ }
+ }
+
+ virtual void addScaledElasticForce(btScalar scale, TVStack& force)
+ {
+ btScalar scaled_stiffness = scale * m_elasticStiffness;
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos);
+ btVector3 scaled_force = scaled_stiffness * dir;
+ if (scaled_force.safeNorm() > m_maxForce)
+ {
+ scaled_force.safeNormalize();
+ scaled_force *= m_maxForce;
+ }
+ force[m_face.m_n[i]->index] -= scaled_force;
+ }
+ }
+
+ virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df)
+ {
+ btScalar scaled_k_damp = m_dampingStiffness * scale;
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 local_scaled_df = scaled_k_damp * dv[m_face.m_n[i]->index];
+ if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized();
+ local_scaled_df= scaled_k_damp * dv[m_face.m_n[i]->index].dot(dir) * dir;
+ }
+ df[m_face.m_n[i]->index] -= local_scaled_df;
+ }
+ }
+
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){}
+
+ virtual double totalElasticEnergy(btScalar dt)
+ {
+ double energy = 0;
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos);
+ btVector3 scaled_force = m_elasticStiffness * dir;
+ if (scaled_force.safeNorm() > m_maxForce)
+ {
+ scaled_force.safeNormalize();
+ scaled_force *= m_maxForce;
+ }
+ energy += 0.5 * scaled_force.dot(dir);
+ }
+ return energy;
+ }
+
+ virtual double totalDampingEnergy(btScalar dt)
+ {
+ double energy = 0;
+ for (int i = 0; i < 3; ++i)
+ {
+ btVector3 v_diff = m_face.m_n[i]->m_v;
+ btVector3 scaled_force = m_dampingStiffness * v_diff;
+ if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON)
+ {
+ btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized();
+ scaled_force = m_dampingStiffness * v_diff.dot(dir) * dir;
+ }
+ energy -= scaled_force.dot(m_face.m_n[i]->m_v) / dt;
+ }
+ return energy;
+ }
+
+ virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
+ {
+ //TODO
+ }
+
+ void setMousePos(const btVector3& p)
+ {
+ m_mouse_pos = p;
+ }
+
+ virtual btDeformableLagrangianForceType getForceType()
+ {
+ return BT_MOUSE_PICKING_FORCE;
+ }
+
+};
+
+#endif /* btMassSpring_h */
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp
index 06f95d69f6..c8cc47923e 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp
@@ -32,7 +32,7 @@ btScalar btDeformableMultiBodyConstraintSolver::solveDeformableGroupIterations(b
m_leastSquaresResidual = solveSingleIteration(iteration, bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer);
// solver body velocity -> rigid body velocity
solverBodyWriteBack(infoGlobal);
- btScalar deformableResidual = m_deformableSolver->solveContactConstraints(deformableBodies,numDeformableBodies);
+ btScalar deformableResidual = m_deformableSolver->solveContactConstraints(deformableBodies,numDeformableBodies, infoGlobal);
// update rigid body velocity in rigid/deformable contact
m_leastSquaresResidual = btMax(m_leastSquaresResidual, deformableResidual);
// solver body velocity <- rigid body velocity
@@ -112,7 +112,7 @@ void btDeformableMultiBodyConstraintSolver::solveGroupCacheFriendlySplitImpulseI
if (infoGlobal.m_splitImpulse)
{
{
- m_deformableSolver->splitImpulseSetup(infoGlobal);
+// m_deformableSolver->splitImpulseSetup(infoGlobal);
for (iteration = 0; iteration < infoGlobal.m_numIterations; iteration++)
{
btScalar leastSquaresResidual = 0.f;
@@ -127,8 +127,8 @@ void btDeformableMultiBodyConstraintSolver::solveGroupCacheFriendlySplitImpulseI
leastSquaresResidual = btMax(leastSquaresResidual, residual * residual);
}
// solve the position correction between deformable and rigid/multibody
- btScalar residual = m_deformableSolver->solveSplitImpulse(infoGlobal);
- leastSquaresResidual = btMax(leastSquaresResidual, residual * residual);
+// btScalar residual = m_deformableSolver->solveSplitImpulse(infoGlobal);
+// leastSquaresResidual = btMax(leastSquaresResidual, residual * residual);
}
if (leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || iteration >= (infoGlobal.m_numIterations - 1))
{
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp
index 618e5c0d7b..6b742978ef 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp
@@ -22,7 +22,6 @@ Call internalStepSimulation multiple times, to achieve 240Hz (4 steps of 60Hz).
2. Detect discrete collisions between rigid and deformable bodies at position x_{n+1}^* = x_n + dt * v_{n+1}^*.
3a. Solve all constraints, including LCP. Contact, position correction due to numerical drift, friction, and anchors for deformable.
- TODO: add option for positional drift correction (using vel_target += erp * pos_error/dt
3b. 5 Newton steps (multiple step). Conjugent Gradient solves linear system. Deformable Damping: Then velocities of deformable bodies v_{n+1} are solved in
M(v_{n+1} - v_{n+1}^*) = damping_force * dt / mass,
@@ -58,14 +57,20 @@ m_deformableBodySolver(deformableBodySolver), m_solverCallback(0)
m_sbi.water_density = 0;
m_sbi.water_offset = 0;
m_sbi.water_normal = btVector3(0, 0, 0);
- m_sbi.m_gravity.setValue(0, -10, 0);
+ m_sbi.m_gravity.setValue(0, -9.8, 0);
m_internalTime = 0.0;
m_implicit = false;
m_lineSearch = false;
- m_selfCollision = true;
+ m_useProjection = true;
+ m_ccdIterations = 5;
m_solverDeformableBodyIslandCallback = new DeformableBodyInplaceSolverIslandCallback(constraintSolver, dispatcher);
}
+btDeformableMultiBodyDynamicsWorld::~btDeformableMultiBodyDynamicsWorld()
+{
+ delete m_solverDeformableBodyIslandCallback;
+}
+
void btDeformableMultiBodyDynamicsWorld::internalSingleStepSimulation(btScalar timeStep)
{
BT_PROFILE("internalSingleStepSimulation");
@@ -74,20 +79,16 @@ void btDeformableMultiBodyDynamicsWorld::internalSingleStepSimulation(btScalar t
(*m_internalPreTickCallback)(this, timeStep);
}
reinitialize(timeStep);
+
// add gravity to velocity of rigid and multi bodys
applyRigidBodyGravity(timeStep);
///apply gravity and explicit force to velocity, predict motion
predictUnconstraintMotion(timeStep);
- ///perform collision detection
+ ///perform collision detection that involves rigid/multi bodies
btMultiBodyDynamicsWorld::performDiscreteCollisionDetection();
- if (m_selfCollision)
- {
- softBodySelfCollision();
- }
-
btMultiBodyDynamicsWorld::calculateSimulationIslands();
beforeSolverCallbacks(timeStep);
@@ -96,7 +97,13 @@ void btDeformableMultiBodyDynamicsWorld::internalSingleStepSimulation(btScalar t
solveConstraints(timeStep);
afterSolverCallbacks(timeStep);
-
+
+ performDeformableCollisionDetection();
+
+ applyRepulsionForce(timeStep);
+
+ performGeometricCollisions(timeStep);
+
integrateTransforms(timeStep);
///update vehicle simulation
@@ -107,6 +114,27 @@ void btDeformableMultiBodyDynamicsWorld::internalSingleStepSimulation(btScalar t
// ///////////////////////////////
}
+void btDeformableMultiBodyDynamicsWorld::performDeformableCollisionDetection()
+{
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ m_softBodies[i]->m_softSoftCollision = true;
+ }
+
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ for (int j = i; j < m_softBodies.size(); ++j)
+ {
+ m_softBodies[i]->defaultCollisionHandler(m_softBodies[j]);
+ }
+ }
+
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ m_softBodies[i]->m_softSoftCollision = false;
+ }
+}
+
void btDeformableMultiBodyDynamicsWorld::updateActivationState(btScalar timeStep)
{
for (int i = 0; i < m_softBodies.size(); i++)
@@ -131,10 +159,106 @@ void btDeformableMultiBodyDynamicsWorld::updateActivationState(btScalar timeStep
btMultiBodyDynamicsWorld::updateActivationState(timeStep);
}
+void btDeformableMultiBodyDynamicsWorld::applyRepulsionForce(btScalar timeStep)
+{
+ BT_PROFILE("btDeformableMultiBodyDynamicsWorld::applyRepulsionForce");
+ for (int i = 0; i < m_softBodies.size(); i++)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (psb->isActive())
+ {
+ psb->applyRepulsionForce(timeStep, true);
+ }
+ }
+}
+
+void btDeformableMultiBodyDynamicsWorld::performGeometricCollisions(btScalar timeStep)
+{
+ BT_PROFILE("btDeformableMultiBodyDynamicsWorld::performGeometricCollisions");
+ // refit the BVH tree for CCD
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (psb->isActive())
+ {
+ m_softBodies[i]->updateFaceTree(true, false);
+ m_softBodies[i]->updateNodeTree(true, false);
+ for (int j = 0; j < m_softBodies[i]->m_faces.size(); ++j)
+ {
+ btSoftBody::Face& f = m_softBodies[i]->m_faces[j];
+ f.m_n0 = (f.m_n[1]->m_x - f.m_n[0]->m_x).cross(f.m_n[2]->m_x - f.m_n[0]->m_x);
+ }
+ }
+ }
+
+ // clear contact points & update DBVT
+ for (int r = 0; r < m_ccdIterations; ++r)
+ {
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (psb->isActive())
+ {
+ // clear contact points in the previous iteration
+ psb->m_faceNodeContacts.clear();
+
+ // update m_q and normals for CCD calculation
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ psb->m_nodes[j].m_q = psb->m_nodes[j].m_x + timeStep * psb->m_nodes[j].m_v;
+ }
+ for (int j = 0; j < psb->m_faces.size(); ++j)
+ {
+ btSoftBody::Face& f = psb->m_faces[j];
+ f.m_n1 = (f.m_n[1]->m_q - f.m_n[0]->m_q).cross(f.m_n[2]->m_q - f.m_n[0]->m_q);
+ f.m_vn = (f.m_n[1]->m_v - f.m_n[0]->m_v).cross(f.m_n[2]->m_v - f.m_n[0]->m_v) * timeStep * timeStep;
+ }
+ }
+ }
+
+ // apply CCD to register new contact points
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ for (int j = i; j < m_softBodies.size(); ++j)
+ {
+ btSoftBody* psb1 = m_softBodies[i];
+ btSoftBody* psb2 = m_softBodies[j];
+ if (psb1->isActive() && psb2->isActive())
+ {
+ m_softBodies[i]->geometricCollisionHandler(m_softBodies[j]);
+ }
+ }
+ }
+
+ int penetration_count = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (psb->isActive())
+ {
+ penetration_count += psb->m_faceNodeContacts.size();
+ }
+ }
+ if (penetration_count == 0)
+ {
+ break;
+ }
+
+ // apply inelastic impulse
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ if (psb->isActive())
+ {
+ psb->applyRepulsionForce(timeStep, false);
+ }
+ }
+ }
+}
void btDeformableMultiBodyDynamicsWorld::softBodySelfCollision()
{
- m_deformableBodySolver->updateSoftBodies();
+ BT_PROFILE("btDeformableMultiBodyDynamicsWorld::softBodySelfCollision");
for (int i = 0; i < m_softBodies.size(); i++)
{
btSoftBody* psb = m_softBodies[i];
@@ -192,8 +316,6 @@ void btDeformableMultiBodyDynamicsWorld::integrateTransforms(btScalar timeStep)
}
}
node.m_x = node.m_x + timeStep * node.m_v;
- node.m_v -= node.m_vsplit;
- node.m_vsplit.setZero();
node.m_q = node.m_x;
node.m_vn = node.m_v;
}
@@ -255,6 +377,7 @@ void btDeformableMultiBodyDynamicsWorld::integrateTransforms(btScalar timeStep)
void btDeformableMultiBodyDynamicsWorld::solveConstraints(btScalar timeStep)
{
+ BT_PROFILE("btDeformableMultiBodyDynamicsWorld::solveConstraints");
// save v_{n+1}^* velocity after explicit forces
m_deformableBodySolver->backupVelocity();
@@ -265,8 +388,11 @@ void btDeformableMultiBodyDynamicsWorld::solveConstraints(btScalar timeStep)
solveContactConstraints();
// set up the directions in which the velocity does not change in the momentum solve
- m_deformableBodySolver->m_objective->m_projection.setProjection();
-
+ if (m_useProjection)
+ m_deformableBodySolver->m_objective->m_projection.setProjection();
+ else
+ m_deformableBodySolver->m_objective->m_projection.setLagrangeMultiplier();
+
// for explicit scheme, m_backupVelocity = v_{n+1}^*
// for implicit scheme, m_backupVelocity = v_n
// Here, set dv = v_{n+1} - v_n for nodes in contact
@@ -280,7 +406,7 @@ void btDeformableMultiBodyDynamicsWorld::solveConstraints(btScalar timeStep)
void btDeformableMultiBodyDynamicsWorld::setupConstraints()
{
// set up constraints between multibody and deformable bodies
- m_deformableBodySolver->setConstraints();
+ m_deformableBodySolver->setConstraints(m_solverInfo);
// set up constraints among multibodies
{
@@ -403,6 +529,17 @@ void btDeformableMultiBodyDynamicsWorld::reinitialize(btScalar timeStep)
dispatchInfo.m_stepCount = 0;
dispatchInfo.m_debugDraw = btMultiBodyDynamicsWorld::getDebugDrawer();
btMultiBodyDynamicsWorld::getSolverInfo().m_timeStep = timeStep;
+ if (m_useProjection)
+ {
+ m_deformableBodySolver->m_useProjection = true;
+// m_deformableBodySolver->m_objective->m_projection.m_useStrainLimiting = true;
+ m_deformableBodySolver->m_objective->m_preconditioner = m_deformableBodySolver->m_objective->m_massPreconditioner;
+ }
+ else
+ {
+ m_deformableBodySolver->m_objective->m_preconditioner = m_deformableBodySolver->m_objective->m_KKTPreconditioner;
+ }
+
}
@@ -566,6 +703,24 @@ void btDeformableMultiBodyDynamicsWorld::addForce(btSoftBody* psb, btDeformableL
}
}
+void btDeformableMultiBodyDynamicsWorld::removeForce(btSoftBody* psb, btDeformableLagrangianForce* force)
+{
+ btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf;
+ int removed_index = -1;
+ for (int i = 0; i < forces.size(); ++i)
+ {
+ if (forces[i]->getForceType() == force->getForceType())
+ {
+ forces[i]->removeSoftBody(psb);
+ if (forces[i]->m_softBodies.size() == 0)
+ removed_index = i;
+ break;
+ }
+ }
+ if (removed_index >= 0)
+ forces.removeAtIndex(removed_index);
+}
+
void btDeformableMultiBodyDynamicsWorld::removeSoftBody(btSoftBody* body)
{
m_softBodies.remove(body);
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h
index 7630385767..76b58a0378 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h
@@ -46,10 +46,10 @@ class btDeformableMultiBodyDynamicsWorld : public btMultiBodyDynamicsWorld
bool m_drawClusterTree;
btSoftBodyWorldInfo m_sbi;
btScalar m_internalTime;
- int m_contact_iterations;
+ int m_ccdIterations;
bool m_implicit;
bool m_lineSearch;
- bool m_selfCollision;
+ bool m_useProjection;
DeformableBodyInplaceSolverIslandCallback* m_solverDeformableBodyIslandCallback;
typedef void (*btSolverCallback)(btScalar time, btDeformableMultiBodyDynamicsWorld* world);
@@ -80,9 +80,7 @@ public:
m_solverCallback = cb;
}
- virtual ~btDeformableMultiBodyDynamicsWorld()
- {
- }
+ virtual ~btDeformableMultiBodyDynamicsWorld();
virtual btMultiBodyDynamicsWorld* getMultiBodyDynamicsWorld()
{
@@ -133,6 +131,8 @@ public:
void addForce(btSoftBody* psb, btDeformableLagrangianForce* force);
+ void removeForce(btSoftBody* psb, btDeformableLagrangianForce* force);
+
void removeSoftBody(btSoftBody* body);
void removeCollisionObject(btCollisionObject* collisionObject);
@@ -142,6 +142,8 @@ public:
void setupConstraints();
+ void performDeformableCollisionDetection();
+
void solveMultiBodyConstraints();
void solveContactConstraints();
@@ -159,7 +161,151 @@ public:
{
m_lineSearch = lineSearch;
}
+
+ void applyRepulsionForce(btScalar timeStep);
+
+ void performGeometricCollisions(btScalar timeStep);
+
+ struct btDeformableSingleRayCallback : public btBroadphaseRayCallback
+ {
+ btVector3 m_rayFromWorld;
+ btVector3 m_rayToWorld;
+ btTransform m_rayFromTrans;
+ btTransform m_rayToTrans;
+ btVector3 m_hitNormal;
+
+ const btDeformableMultiBodyDynamicsWorld* m_world;
+ btCollisionWorld::RayResultCallback& m_resultCallback;
+
+ btDeformableSingleRayCallback(const btVector3& rayFromWorld, const btVector3& rayToWorld, const btDeformableMultiBodyDynamicsWorld* world, btCollisionWorld::RayResultCallback& resultCallback)
+ : m_rayFromWorld(rayFromWorld),
+ m_rayToWorld(rayToWorld),
+ m_world(world),
+ m_resultCallback(resultCallback)
+ {
+ m_rayFromTrans.setIdentity();
+ m_rayFromTrans.setOrigin(m_rayFromWorld);
+ m_rayToTrans.setIdentity();
+ m_rayToTrans.setOrigin(m_rayToWorld);
+
+ btVector3 rayDir = (rayToWorld - rayFromWorld);
+
+ rayDir.normalize();
+ ///what about division by zero? --> just set rayDirection[i] to INF/1e30
+ m_rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[0];
+ m_rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[1];
+ m_rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[2];
+ m_signs[0] = m_rayDirectionInverse[0] < 0.0;
+ m_signs[1] = m_rayDirectionInverse[1] < 0.0;
+ m_signs[2] = m_rayDirectionInverse[2] < 0.0;
+
+ m_lambda_max = rayDir.dot(m_rayToWorld - m_rayFromWorld);
+ }
+
+ virtual bool process(const btBroadphaseProxy* proxy)
+ {
+ ///terminate further ray tests, once the closestHitFraction reached zero
+ if (m_resultCallback.m_closestHitFraction == btScalar(0.f))
+ return false;
+
+ btCollisionObject* collisionObject = (btCollisionObject*)proxy->m_clientObject;
+
+ //only perform raycast if filterMask matches
+ if (m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle()))
+ {
+ //RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject();
+ //btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+#if 0
+#ifdef RECALCULATE_AABB
+ btVector3 collisionObjectAabbMin,collisionObjectAabbMax;
+ collisionObject->getCollisionShape()->getAabb(collisionObject->getWorldTransform(),collisionObjectAabbMin,collisionObjectAabbMax);
+#else
+ //getBroadphase()->getAabb(collisionObject->getBroadphaseHandle(),collisionObjectAabbMin,collisionObjectAabbMax);
+ const btVector3& collisionObjectAabbMin = collisionObject->getBroadphaseHandle()->m_aabbMin;
+ const btVector3& collisionObjectAabbMax = collisionObject->getBroadphaseHandle()->m_aabbMax;
+#endif
+#endif
+ //btScalar hitLambda = m_resultCallback.m_closestHitFraction;
+ //culling already done by broadphase
+ //if (btRayAabb(m_rayFromWorld,m_rayToWorld,collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,m_hitNormal))
+ {
+ m_world->rayTestSingle(m_rayFromTrans, m_rayToTrans,
+ collisionObject,
+ collisionObject->getCollisionShape(),
+ collisionObject->getWorldTransform(),
+ m_resultCallback);
+ }
+ }
+ return true;
+ }
+ };
+
+
+ void rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const
+ {
+ BT_PROFILE("rayTest");
+ /// use the broadphase to accelerate the search for objects, based on their aabb
+ /// and for each object with ray-aabb overlap, perform an exact ray test
+ btDeformableSingleRayCallback rayCB(rayFromWorld, rayToWorld, this, resultCallback);
+
+#ifndef USE_BRUTEFORCE_RAYBROADPHASE
+ m_broadphasePairCache->rayTest(rayFromWorld, rayToWorld, rayCB);
+#else
+ for (int i = 0; i < this->getNumCollisionObjects(); i++)
+ {
+ rayCB.process(m_collisionObjects[i]->getBroadphaseHandle());
+ }
+#endif //USE_BRUTEFORCE_RAYBROADPHASE
+ }
+
+ void rayTestSingle(const btTransform& rayFromTrans, const btTransform& rayToTrans,
+ btCollisionObject* collisionObject,
+ const btCollisionShape* collisionShape,
+ const btTransform& colObjWorldTransform,
+ RayResultCallback& resultCallback) const
+ {
+ if (collisionShape->isSoftBody())
+ {
+ btSoftBody* softBody = btSoftBody::upcast(collisionObject);
+ if (softBody)
+ {
+ btSoftBody::sRayCast softResult;
+ if (softBody->rayFaceTest(rayFromTrans.getOrigin(), rayToTrans.getOrigin(), softResult))
+ {
+ if (softResult.fraction <= resultCallback.m_closestHitFraction)
+ {
+ btCollisionWorld::LocalShapeInfo shapeInfo;
+ shapeInfo.m_shapePart = 0;
+ shapeInfo.m_triangleIndex = softResult.index;
+ // get the normal
+ btVector3 rayDir = rayToTrans.getOrigin() - rayFromTrans.getOrigin();
+ btVector3 normal = -rayDir;
+ normal.normalize();
+ {
+ normal = softBody->m_faces[softResult.index].m_normal;
+ if (normal.dot(rayDir) > 0)
+ {
+ // normal always point toward origin of the ray
+ normal = -normal;
+ }
+ }
+
+ btCollisionWorld::LocalRayResult rayResult(collisionObject,
+ &shapeInfo,
+ normal,
+ softResult.fraction);
+ bool normalInWorldSpace = true;
+ resultCallback.addSingleResult(rayResult, normalInWorldSpace);
+ }
+ }
+ }
+ }
+ else
+ {
+ btCollisionWorld::rayTestSingle(rayFromTrans, rayToTrans, collisionObject, collisionShape, colObjWorldTransform, resultCallback);
+ }
+ }
};
#endif //BT_DEFORMABLE_MULTIBODY_DYNAMICS_WORLD_H
diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h
index 3d06e304d2..d89bc4aca4 100644
--- a/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h
+++ b/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h
@@ -24,21 +24,65 @@ class btDeformableNeoHookeanForce : public btDeformableLagrangianForce
{
public:
typedef btAlignedObjectArray<btVector3> TVStack;
- btScalar m_mu, m_lambda;
+ btScalar m_mu, m_lambda; // Lame Parameters
+ btScalar m_E, m_nu; // Young's modulus and Poisson ratio
btScalar m_mu_damp, m_lambda_damp;
btDeformableNeoHookeanForce(): m_mu(1), m_lambda(1)
{
btScalar damping = 0.05;
m_mu_damp = damping * m_mu;
m_lambda_damp = damping * m_lambda;
+ updateYoungsModulusAndPoissonRatio();
}
btDeformableNeoHookeanForce(btScalar mu, btScalar lambda, btScalar damping = 0.05): m_mu(mu), m_lambda(lambda)
{
m_mu_damp = damping * m_mu;
m_lambda_damp = damping * m_lambda;
+ updateYoungsModulusAndPoissonRatio();
}
-
+
+ void updateYoungsModulusAndPoissonRatio()
+ {
+ // conversion from Lame Parameters to Young's modulus and Poisson ratio
+ // https://en.wikipedia.org/wiki/Lam%C3%A9_parameters
+ m_E = m_mu * (3*m_lambda + 2*m_mu)/(m_lambda + m_mu);
+ m_nu = m_lambda * 0.5 / (m_mu + m_lambda);
+ }
+
+ void updateLameParameters()
+ {
+ // conversion from Young's modulus and Poisson ratio to Lame Parameters
+ // https://en.wikipedia.org/wiki/Lam%C3%A9_parameters
+ m_mu = m_E * 0.5 / (1 + m_nu);
+ m_lambda = m_E * m_nu / ((1 + m_nu) * (1- 2*m_nu));
+ }
+
+ void setYoungsModulus(btScalar E)
+ {
+ m_E = E;
+ updateLameParameters();
+ }
+
+ void setPoissonRatio(btScalar nu)
+ {
+ m_nu = nu;
+ updateLameParameters();
+ }
+
+ void setDamping(btScalar damping)
+ {
+ m_mu_damp = damping * m_mu;
+ m_lambda_damp = damping * m_lambda;
+ }
+
+ void setLameParameters(btScalar mu, btScalar lambda)
+ {
+ m_mu = mu;
+ m_lambda = lambda;
+ updateYoungsModulusAndPoissonRatio();
+ }
+
virtual void addScaledForces(btScalar scale, TVStack& force)
{
addScaledDampingForce(scale, force);
@@ -269,6 +313,8 @@ public:
}
}
+ virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){}
+
virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df)
{
int numNodes = getNumNodes();
diff --git a/thirdparty/bullet/BulletSoftBody/btPreconditioner.h b/thirdparty/bullet/BulletSoftBody/btPreconditioner.h
index d712420381..c2db448ef8 100644
--- a/thirdparty/bullet/BulletSoftBody/btPreconditioner.h
+++ b/thirdparty/bullet/BulletSoftBody/btPreconditioner.h
@@ -68,12 +68,221 @@ public:
virtual void operator()(const TVStack& x, TVStack& b)
{
btAssert(b.size() == x.size());
- btAssert(m_inv_mass.size() == x.size());
- for (int i = 0; i < b.size(); ++i)
+ btAssert(m_inv_mass.size() <= x.size());
+ for (int i = 0; i < m_inv_mass.size(); ++i)
{
b[i] = x[i] * m_inv_mass[i];
}
+ for (int i = m_inv_mass.size(); i < b.size(); ++i)
+ {
+ b[i] = x[i];
+ }
+ }
+};
+
+
+class KKTPreconditioner : public Preconditioner
+{
+ const btAlignedObjectArray<btSoftBody *>& m_softBodies;
+ const btDeformableContactProjection& m_projections;
+ const btAlignedObjectArray<btDeformableLagrangianForce*>& m_lf;
+ TVStack m_inv_A, m_inv_S;
+ const btScalar& m_dt;
+ const bool& m_implicit;
+public:
+ KKTPreconditioner(const btAlignedObjectArray<btSoftBody *>& softBodies, const btDeformableContactProjection& projections, const btAlignedObjectArray<btDeformableLagrangianForce*>& lf, const btScalar& dt, const bool& implicit)
+ : m_softBodies(softBodies)
+ , m_projections(projections)
+ , m_lf(lf)
+ , m_dt(dt)
+ , m_implicit(implicit)
+ {
+ }
+
+ virtual void reinitialize(bool nodeUpdated)
+ {
+ if (nodeUpdated)
+ {
+ int num_nodes = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ num_nodes += psb->m_nodes.size();
+ }
+ m_inv_A.resize(num_nodes);
+ }
+ buildDiagonalA(m_inv_A);
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+// printf("A[%d] = %f, %f, %f \n", i, m_inv_A[i][0], m_inv_A[i][1], m_inv_A[i][2]);
+ for (int d = 0; d < 3; ++d)
+ {
+ m_inv_A[i][d] = (m_inv_A[i][d] == 0) ? 0.0 : 1.0/ m_inv_A[i][d];
+ }
+ }
+ m_inv_S.resize(m_projections.m_lagrangeMultipliers.size());
+// printf("S.size() = %d \n", m_inv_S.size());
+ buildDiagonalS(m_inv_A, m_inv_S);
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+// printf("S[%d] = %f, %f, %f \n", i, m_inv_S[i][0], m_inv_S[i][1], m_inv_S[i][2]);
+ for (int d = 0; d < 3; ++d)
+ {
+ m_inv_S[i][d] = (m_inv_S[i][d] == 0) ? 0.0 : 1.0/ m_inv_S[i][d];
+ }
+ }
+ }
+
+ void buildDiagonalA(TVStack& diagA) const
+ {
+ size_t counter = 0;
+ for (int i = 0; i < m_softBodies.size(); ++i)
+ {
+ btSoftBody* psb = m_softBodies[i];
+ for (int j = 0; j < psb->m_nodes.size(); ++j)
+ {
+ const btSoftBody::Node& node = psb->m_nodes[j];
+ diagA[counter] = (node.m_im == 0) ? btVector3(0,0,0) : btVector3(1.0/node.m_im, 1.0 / node.m_im, 1.0 / node.m_im);
+ ++counter;
+ }
+ }
+ if (m_implicit)
+ {
+ printf("implicit not implemented\n");
+ btAssert(false);
+ }
+ for (int i = 0; i < m_lf.size(); ++i)
+ {
+ // add damping matrix
+ m_lf[i]->buildDampingForceDifferentialDiagonal(-m_dt, diagA);
+ }
+ }
+
+ void buildDiagonalS(const TVStack& inv_A, TVStack& diagS)
+ {
+ for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
+ {
+ // S[k,k] = e_k^T * C A_d^-1 C^T * e_k
+ const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
+ btVector3& t = diagS[c];
+ t.setZero();
+ for (int j = 0; j < lm.m_num_constraints; ++j)
+ {
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ for (int d = 0; d < 3; ++d)
+ {
+ t[j] += inv_A[lm.m_indices[i]][d] * lm.m_dirs[j][d] * lm.m_dirs[j][d] * lm.m_weights[i] * lm.m_weights[i];
+ }
+ }
+ }
+ }
+ }
+#define USE_FULL_PRECONDITIONER
+#ifndef USE_FULL_PRECONDITIONER
+ virtual void operator()(const TVStack& x, TVStack& b)
+ {
+ btAssert(b.size() == x.size());
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ b[i] = x[i] * m_inv_A[i];
+ }
+ int offset = m_inv_A.size();
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ b[i+offset] = x[i+offset] * m_inv_S[i];
+ }
+ }
+#else
+ virtual void operator()(const TVStack& x, TVStack& b)
+ {
+ btAssert(b.size() == x.size());
+ int offset = m_inv_A.size();
+
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ b[i] = x[i] * m_inv_A[i];
+ }
+
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ b[i+offset].setZero();
+ }
+
+ for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
+ {
+ const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
+ // C * x
+ for (int d = 0; d < lm.m_num_constraints; ++d)
+ {
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ b[offset+c][d] += lm.m_weights[i] * b[lm.m_indices[i]].dot(lm.m_dirs[d]);
+ }
+ }
+ }
+
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ b[i+offset] = b[i+offset] * m_inv_S[i];
+ }
+
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ b[i].setZero();
+ }
+
+ for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
+ {
+ // C^T * lambda
+ const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ for (int j = 0; j < lm.m_num_constraints; ++j)
+ {
+ b[lm.m_indices[i]] += b[offset+c][j] * lm.m_weights[i] * lm.m_dirs[j];
+ }
+ }
+ }
+
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ b[i] = (x[i] - b[i]) * m_inv_A[i];
+ }
+
+ TVStack t;
+ t.resize(b.size());
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ t[i+offset] = x[i+offset] * m_inv_S[i];
+ }
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ t[i].setZero();
+ }
+ for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c)
+ {
+ // C^T * lambda
+ const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c];
+ for (int i = 0; i < lm.m_num_nodes; ++i)
+ {
+ for (int j = 0; j < lm.m_num_constraints; ++j)
+ {
+ t[lm.m_indices[i]] += t[offset+c][j] * lm.m_weights[i] * lm.m_dirs[j];
+ }
+ }
+ }
+ for (int i = 0; i < m_inv_A.size(); ++i)
+ {
+ b[i] += t[i] * m_inv_A[i];
+ }
+
+ for (int i = 0; i < m_inv_S.size(); ++i)
+ {
+ b[i+offset] -= x[i+offset] * m_inv_S[i];
+ }
}
+#endif
};
#endif /* BT_PRECONDITIONER_H */
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp b/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp
index 2a458b1d80..81b846d7f8 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp
@@ -18,6 +18,7 @@ subject to the following restrictions:
#include "BulletSoftBody/btSoftBodySolvers.h"
#include "btSoftBodyData.h"
#include "LinearMath/btSerializer.h"
+#include "LinearMath/btImplicitQRSVD.h"
#include "LinearMath/btAlignedAllocator.h"
#include "BulletDynamics/Featherstone/btMultiBodyLinkCollider.h"
#include "BulletDynamics/Featherstone/btMultiBodyConstraint.h"
@@ -25,6 +26,107 @@ subject to the following restrictions:
#include "BulletCollision/CollisionShapes/btTriangleShape.h"
#include <iostream>
//
+static inline btDbvtNode* buildTreeBottomUp(btAlignedObjectArray<btDbvtNode*>& leafNodes, btAlignedObjectArray<btAlignedObjectArray<int> >& adj)
+{
+ int N = leafNodes.size();
+ if (N == 0)
+ {
+ return NULL;
+ }
+ while (N > 1)
+ {
+ btAlignedObjectArray<bool> marked;
+ btAlignedObjectArray<btDbvtNode*> newLeafNodes;
+ btAlignedObjectArray<std::pair<int,int> > childIds;
+ btAlignedObjectArray<btAlignedObjectArray<int> > newAdj;
+ marked.resize(N);
+ for (int i = 0; i < N; ++i)
+ marked[i] = false;
+
+ // pair adjacent nodes into new(parent) node
+ for (int i = 0; i < N; ++i)
+ {
+ if (marked[i])
+ continue;
+ bool merged = false;
+ for (int j = 0; j < adj[i].size(); ++j)
+ {
+ int n = adj[i][j];
+ if (!marked[adj[i][j]])
+ {
+ btDbvtNode* node = new (btAlignedAlloc(sizeof(btDbvtNode), 16)) btDbvtNode();
+ node->parent = NULL;
+ node->childs[0] = leafNodes[i];
+ node->childs[1] = leafNodes[n];
+ leafNodes[i]->parent = node;
+ leafNodes[n]->parent = node;
+ newLeafNodes.push_back(node);
+ childIds.push_back(std::make_pair(i,n));
+ merged = true;
+ marked[n] = true;
+ break;
+ }
+ }
+ if (!merged)
+ {
+ newLeafNodes.push_back(leafNodes[i]);
+ childIds.push_back(std::make_pair(i,-1));
+ }
+ marked[i] = true;
+ }
+ // update adjacency matrix
+ newAdj.resize(newLeafNodes.size());
+ for (int i = 0; i < newLeafNodes.size(); ++i)
+ {
+ for (int j = i+1; j < newLeafNodes.size(); ++j)
+ {
+ bool neighbor = false;
+ const btAlignedObjectArray<int>& leftChildNeighbors = adj[childIds[i].first];
+ for (int k = 0; k < leftChildNeighbors.size(); ++k)
+ {
+ if (leftChildNeighbors[k] == childIds[j].first || leftChildNeighbors[k] == childIds[j].second)
+ {
+ neighbor = true;
+ break;
+ }
+ }
+ if (!neighbor && childIds[i].second != -1)
+ {
+ const btAlignedObjectArray<int>& rightChildNeighbors = adj[childIds[i].second];
+ for (int k = 0; k < rightChildNeighbors.size(); ++k)
+ {
+ if (rightChildNeighbors[k] == childIds[j].first || rightChildNeighbors[k] == childIds[j].second)
+ {
+ neighbor = true;
+ break;
+ }
+ }
+ }
+ if (neighbor)
+ {
+ newAdj[i].push_back(j);
+ newAdj[j].push_back(i);
+ }
+ }
+ }
+ leafNodes = newLeafNodes;
+ //this assignment leaks memory, the assignment doesn't do a deep copy, for now a manual copy
+ //adj = newAdj;
+ adj.clear();
+ adj.resize(newAdj.size());
+ for (int i = 0; i < newAdj.size(); i++)
+ {
+ for (int j = 0; j < newAdj[i].size(); j++)
+ {
+ adj[i].push_back(newAdj[i][j]);
+ }
+ }
+ N = leafNodes.size();
+ }
+ return leafNodes[0];
+}
+
+//
btSoftBody::btSoftBody(btSoftBodyWorldInfo* worldInfo, int node_count, const btVector3* x, const btScalar* m)
: m_softBodySolver(0), m_worldInfo(worldInfo)
{
@@ -41,6 +143,7 @@ btSoftBody::btSoftBody(btSoftBodyWorldInfo* worldInfo, int node_count, const btV
/* Nodes */
const btScalar margin = getCollisionShape()->getMargin();
m_nodes.resize(node_count);
+ m_X.resize(node_count);
for (int i = 0, ni = node_count; i < ni; ++i)
{
Node& n = m_nodes[i];
@@ -51,8 +154,11 @@ btSoftBody::btSoftBody(btSoftBodyWorldInfo* worldInfo, int node_count, const btV
n.m_im = n.m_im > 0 ? 1 / n.m_im : 0;
n.m_leaf = m_ndbvt.insert(btDbvtVolume::FromCR(n.m_x, margin), &n);
n.m_material = pm;
+ m_X[i] = n.m_x;
}
updateBounds();
+ setCollisionQuadrature(3);
+ m_fdbvnt = 0;
}
btSoftBody::btSoftBody(btSoftBodyWorldInfo* worldInfo)
@@ -111,15 +217,18 @@ void btSoftBody::initDefaults()
m_collisionShape = new btSoftBodyCollisionShape(this);
m_collisionShape->setMargin(0.25f);
- m_initialWorldTransform.setIdentity();
+ m_worldTransform.setIdentity();
m_windVelocity = btVector3(0, 0, 0);
m_restLengthScale = btScalar(1.0);
- m_dampingCoefficient = 1;
- m_sleepingThreshold = 0.1;
- m_useFaceContact = true;
+ m_dampingCoefficient = 1.0;
+ m_sleepingThreshold = .4;
m_useSelfCollision = false;
- m_collisionFlags = 0;
+ m_collisionFlags = 0;
+ m_softSoftCollision = false;
+ m_maxSpeedSquared = 0;
+ m_repulsionStiffness = 0.5;
+ m_fdbvnt = 0;
}
//
@@ -134,6 +243,8 @@ btSoftBody::~btSoftBody()
btAlignedFree(m_materials[i]);
for (i = 0; i < m_joints.size(); ++i)
btAlignedFree(m_joints[i]);
+ if (m_fdbvnt)
+ delete m_fdbvnt;
}
//
@@ -897,6 +1008,71 @@ void btSoftBody::setVolumeDensity(btScalar density)
}
//
+btVector3 btSoftBody::getLinearVelocity()
+{
+ btVector3 total_momentum = btVector3(0,0,0);
+ for (int i = 0; i < m_nodes.size(); ++i)
+ {
+ btScalar mass = m_nodes[i].m_im == 0 ? 0 : 1.0/m_nodes[i].m_im;
+ total_momentum += mass * m_nodes[i].m_v;
+ }
+ btScalar total_mass = getTotalMass();
+ return total_mass == 0 ? total_momentum : total_momentum / total_mass;
+}
+
+//
+void btSoftBody::setLinearVelocity(const btVector3& linVel)
+{
+ btVector3 old_vel = getLinearVelocity();
+ btVector3 diff = linVel - old_vel;
+ for (int i = 0; i < m_nodes.size(); ++i)
+ m_nodes[i].m_v += diff;
+}
+
+//
+void btSoftBody::setAngularVelocity(const btVector3& angVel)
+{
+ btVector3 old_vel = getLinearVelocity();
+ btVector3 com = getCenterOfMass();
+ for (int i = 0; i < m_nodes.size(); ++i)
+ {
+ m_nodes[i].m_v = angVel.cross(m_nodes[i].m_x - com) + old_vel;
+ }
+}
+
+//
+btTransform btSoftBody::getRigidTransform()
+{
+ btVector3 t = getCenterOfMass();
+ btMatrix3x3 S;
+ S.setZero();
+ // get rotation that minimizes L2 difference: \sum_i || RX_i + t - x_i ||
+ for (int i = 0; i < m_nodes.size(); ++i)
+ {
+ S += OuterProduct(m_X[i], t-m_nodes[i].m_x);
+ }
+ btVector3 sigma;
+ btMatrix3x3 U,V;
+ singularValueDecomposition(S,U,sigma,V);
+ btMatrix3x3 R = V * U.transpose();
+ btTransform trs;
+ trs.setIdentity();
+ trs.setOrigin(t);
+ trs.setBasis(R);
+ return trs;
+}
+
+//
+void btSoftBody::transformTo(const btTransform& trs)
+{
+ // get the current best rigid fit
+ btTransform current_transform = getRigidTransform();
+ // apply transform in material space
+ btTransform new_transform = trs * current_transform.inverse();
+ transform(new_transform);
+}
+
+//
void btSoftBody::transform(const btTransform& trs)
{
const btScalar margin = getCollisionShape()->getMargin();
@@ -916,7 +1092,6 @@ void btSoftBody::transform(const btTransform& trs)
updateNormals();
updateBounds();
updateConstants();
- m_initialWorldTransform = trs;
}
//
@@ -1834,6 +2009,25 @@ bool btSoftBody::rayTest(const btVector3& rayFrom,
return (rayTest(rayFrom, rayTo, results.fraction, results.feature, results.index, false) != 0);
}
+bool btSoftBody::rayFaceTest(const btVector3& rayFrom,
+ const btVector3& rayTo,
+ sRayCast& results)
+{
+ if (m_faces.size() == 0)
+ return false;
+ else
+ {
+ if (m_fdbvt.empty())
+ initializeFaceTree();
+ }
+
+ results.body = this;
+ results.fraction = 1.f;
+ results.index = -1;
+
+ return (rayFaceTest(rayFrom, rayTo, results.fraction, results.index) != 0);
+}
+
//
void btSoftBody::setSolver(eSolverPresets::_ preset)
{
@@ -2339,15 +2533,160 @@ int btSoftBody::rayTest(const btVector3& rayFrom, const btVector3& rayTo,
return (cnt);
}
+int btSoftBody::rayFaceTest(const btVector3& rayFrom, const btVector3& rayTo,
+ btScalar& mint, int& index) const
+{
+ int cnt = 0;
+ { /* Use dbvt */
+ RayFromToCaster collider(rayFrom, rayTo, mint);
+
+ btDbvt::rayTest(m_fdbvt.m_root, rayFrom, rayTo, collider);
+ if (collider.m_face)
+ {
+ mint = collider.m_mint;
+ index = (int)(collider.m_face - &m_faces[0]);
+ cnt = 1;
+ }
+ }
+ return (cnt);
+}
+
+
//
+static inline btDbvntNode* copyToDbvnt(const btDbvtNode* n)
+{
+ if (n == 0)
+ return 0;
+ btDbvntNode* root = new btDbvntNode(n);
+ if (n->isinternal())
+ {
+ btDbvntNode* c0 = copyToDbvnt(n->childs[0]);
+ root->childs[0] = c0;
+ btDbvntNode* c1 = copyToDbvnt(n->childs[1]);
+ root->childs[1] = c1;
+ }
+ return root;
+}
+
+static inline void calculateNormalCone(btDbvntNode* root)
+{
+ if (!root)
+ return;
+ if (root->isleaf())
+ {
+ const btSoftBody::Face* face = (btSoftBody::Face*)root->data;
+ root->normal = face->m_normal;
+ root->angle = 0;
+ }
+ else
+ {
+ btVector3 n0(0,0,0), n1(0,0,0);
+ btScalar a0 = 0, a1 = 0;
+ if (root->childs[0])
+ {
+ calculateNormalCone(root->childs[0]);
+ n0 = root->childs[0]->normal;
+ a0 = root->childs[0]->angle;
+ }
+ if (root->childs[1])
+ {
+ calculateNormalCone(root->childs[1]);
+ n1 = root->childs[1]->normal;
+ a1 = root->childs[1]->angle;
+ }
+ root->normal = (n0+n1).safeNormalize();
+ root->angle = btMax(a0,a1) + btAngle(n0, n1)*0.5;
+ }
+}
+
void btSoftBody::initializeFaceTree()
{
+ BT_PROFILE("btSoftBody::initializeFaceTree");
m_fdbvt.clear();
+ // create leaf nodes;
+ btAlignedObjectArray<btDbvtNode*> leafNodes;
+ leafNodes.resize(m_faces.size());
for (int i = 0; i < m_faces.size(); ++i)
{
Face& f = m_faces[i];
- f.m_leaf = m_fdbvt.insert(VolumeOf(f, 0), &f);
+ ATTRIBUTE_ALIGNED16(btDbvtVolume) vol = VolumeOf(f, 0);
+ btDbvtNode* node = new (btAlignedAlloc(sizeof(btDbvtNode), 16)) btDbvtNode();
+ node->parent = NULL;
+ node->data = &f;
+ node->childs[1] = 0;
+ node->volume = vol;
+ leafNodes[i] = node;
+ f.m_leaf = node;
}
+ btAlignedObjectArray<btAlignedObjectArray<int> > adj;
+ adj.resize(m_faces.size());
+ // construct the adjacency list for triangles
+ for (int i = 0; i < adj.size(); ++i)
+ {
+ for (int j = i+1; j < adj.size(); ++j)
+ {
+ int dup = 0;
+ for (int k = 0; k < 3; ++k)
+ {
+ for (int l = 0; l < 3; ++l)
+ {
+ if (m_faces[i].m_n[k] == m_faces[j].m_n[l])
+ {
+ ++dup;
+ break;
+ }
+ }
+ if (dup == 2)
+ {
+ adj[i].push_back(j);
+ adj[j].push_back(i);
+ }
+ }
+ }
+ }
+ m_fdbvt.m_root = buildTreeBottomUp(leafNodes, adj);
+ if (m_fdbvnt)
+ delete m_fdbvnt;
+ m_fdbvnt = copyToDbvnt(m_fdbvt.m_root);
+ updateFaceTree(false, false);
+ rebuildNodeTree();
+}
+
+//
+void btSoftBody::rebuildNodeTree()
+{
+ m_ndbvt.clear();
+ btAlignedObjectArray<btDbvtNode*> leafNodes;
+ leafNodes.resize(m_nodes.size());
+ for (int i = 0; i < m_nodes.size(); ++i)
+ {
+ Node& n = m_nodes[i];
+ ATTRIBUTE_ALIGNED16(btDbvtVolume) vol = btDbvtVolume::FromCR(n.m_x, 0);
+ btDbvtNode* node = new (btAlignedAlloc(sizeof(btDbvtNode), 16)) btDbvtNode();
+ node->parent = NULL;
+ node->data = &n;
+ node->childs[1] = 0;
+ node->volume = vol;
+ leafNodes[i] = node;
+ n.m_leaf = node;
+ }
+ btAlignedObjectArray<btAlignedObjectArray<int> > adj;
+ adj.resize(m_nodes.size());
+ btAlignedObjectArray<int> old_id;
+ old_id.resize(m_nodes.size());
+ for (int i = 0; i < m_nodes.size(); ++i)
+ old_id[i] = m_nodes[i].index;
+ for (int i = 0; i < m_nodes.size(); ++i)
+ m_nodes[i].index = i;
+ for (int i = 0; i < m_links.size(); ++i)
+ {
+ Link& l = m_links[i];
+ adj[l.m_n[0]->index].push_back(l.m_n[1]->index);
+ adj[l.m_n[1]->index].push_back(l.m_n[0]->index);
+ }
+ m_ndbvt.m_root = buildTreeBottomUp(leafNodes, adj);
+ for (int i = 0; i < m_nodes.size(); ++i)
+ m_nodes[i].index = old_id[i];
}
//
@@ -2403,10 +2742,9 @@ bool btSoftBody::checkDeformableContact(const btCollisionObjectWrapper* colObjWr
const btCollisionObject* tmpCollisionObj = colObjWrap->getCollisionObject();
// use the position x_{n+1}^* = x_n + dt * v_{n+1}^* where v_{n+1}^* = v_n + dtg for collision detect
// but resolve contact at x_n
-// btTransform wtr = (predict) ?
-// (colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform()*(*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform())
-// : colObjWrap->getWorldTransform();
- const btTransform& wtr = colObjWrap->getWorldTransform();
+ btTransform wtr = (predict) ?
+ (colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform()*(*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform())
+ : colObjWrap->getWorldTransform();
btScalar dst =
m_worldInfo->m_sparsesdf.Evaluate(
wtr.invXform(x),
@@ -2457,7 +2795,6 @@ bool btSoftBody::checkDeformableFaceContact(const btCollisionObjectWrapper* colO
btTransform wtr = (predict) ?
(colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform()*(*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform())
: colObjWrap->getWorldTransform();
-// const btTransform& wtr = colObjWrap->getWorldTransform();
btScalar dst;
//#define USE_QUADRATURE 1
@@ -2476,6 +2813,7 @@ bool btSoftBody::checkDeformableFaceContact(const btCollisionObjectWrapper* colO
nrm,
margin);
nrm = wtr.getBasis() * nrm;
+ cti.m_colObj = colObjWrap->getCollisionObject();
// use cached contact point
}
else
@@ -2492,10 +2830,11 @@ bool btSoftBody::checkDeformableFaceContact(const btCollisionObjectWrapper* colO
contact_point = results.witnesses[0];
getBarycentric(contact_point, f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary);
nrm = results.normal;
+ cti.m_colObj = colObjWrap->getCollisionObject();
for (int i = 0; i < 3; ++i)
f.m_pcontact[i] = bary[i];
}
-
+ return (dst < 0);
#endif
// use collision quadrature point
@@ -2505,7 +2844,11 @@ bool btSoftBody::checkDeformableFaceContact(const btCollisionObjectWrapper* colO
btVector3 local_nrm;
for (int q = 0; q < m_quads.size(); ++q)
{
- btVector3 p = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, m_quads[q]);
+ btVector3 p;
+ if (predict)
+ p = BaryEval(f.m_n[0]->m_q, f.m_n[1]->m_q, f.m_n[2]->m_q, m_quads[q]);
+ else
+ p = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, m_quads[q]);
btScalar local_dst = m_worldInfo->m_sparsesdf.Evaluate(
wtr.invXform(p),
shp,
@@ -2513,43 +2856,83 @@ bool btSoftBody::checkDeformableFaceContact(const btCollisionObjectWrapper* colO
margin);
if (local_dst < dst)
{
+ if (local_dst < 0 && predict)
+ return true;
dst = local_dst;
contact_point = p;
bary = m_quads[q];
- nrm = wtr.getBasis() * local_nrm;
+ nrm = local_nrm;
+ }
+ if (!predict)
+ {
+ cti.m_colObj = colObjWrap->getCollisionObject();
+ cti.m_normal = wtr.getBasis() * nrm;
+ cti.m_offset = dst;
}
}
+ return (dst < 0);
}
#endif
+// // regular face contact
+// {
+// btGjkEpaSolver2::sResults results;
+// btTransform triangle_transform;
+// triangle_transform.setIdentity();
+// triangle_transform.setOrigin(f.m_n[0]->m_x);
+// btTriangleShape triangle(btVector3(0,0,0), f.m_n[1]->m_x-f.m_n[0]->m_x, f.m_n[2]->m_x-f.m_n[0]->m_x);
+// btVector3 guess(0,0,0);
+// if (predict)
+// {
+// triangle_transform.setOrigin(f.m_n[0]->m_q);
+// triangle = btTriangleShape(btVector3(0,0,0), f.m_n[1]->m_q-f.m_n[0]->m_q, f.m_n[2]->m_q-f.m_n[0]->m_q);
+// }
+// const btConvexShape* csh = static_cast<const btConvexShape*>(shp);
+//// btGjkEpaSolver2::SignedDistance(&triangle, triangle_transform, csh, wtr, guess, results);
+//// dst = results.distance - margin;
+//// contact_point = results.witnesses[0];
+// btGjkEpaSolver2::Penetration(&triangle, triangle_transform, csh, wtr, guess, results);
+// if (results.status == btGjkEpaSolver2::sResults::Separated)
+// return false;
+// dst = results.distance - margin;
+// contact_point = results.witnesses[1];
+// getBarycentric(contact_point, f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary);
+// nrm = results.normal;
+// for (int i = 0; i < 3; ++i)
+// f.m_pcontact[i] = bary[i];
+// }
+//
+// if (!predict)
+// {
+// cti.m_colObj = colObjWrap->getCollisionObject();
+// cti.m_normal = nrm;
+// cti.m_offset = dst;
+// }
+//
+
// regular face contact
{
btGjkEpaSolver2::sResults results;
btTransform triangle_transform;
triangle_transform.setIdentity();
- triangle_transform.setOrigin(f.m_n[0]->m_x);
- btTriangleShape triangle(btVector3(0,0,0), f.m_n[1]->m_x-f.m_n[0]->m_x, f.m_n[2]->m_x-f.m_n[0]->m_x);
+ triangle_transform.setOrigin(f.m_n[0]->m_q);
+ btTriangleShape triangle(btVector3(0,0,0), f.m_n[1]->m_q-f.m_n[0]->m_q, f.m_n[2]->m_q-f.m_n[0]->m_q);
btVector3 guess(0,0,0);
const btConvexShape* csh = static_cast<const btConvexShape*>(shp);
btGjkEpaSolver2::SignedDistance(&triangle, triangle_transform, csh, wtr, guess, results);
- dst = results.distance - margin;
+ dst = results.distance-csh->getMargin();
+ dst -= margin;
+ if (dst >= 0)
+ return false;
contact_point = results.witnesses[0];
- getBarycentric(contact_point, f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary);
+ getBarycentric(contact_point, f.m_n[0]->m_q, f.m_n[1]->m_q, f.m_n[2]->m_q, bary);
+ btVector3 curr = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary);
nrm = results.normal;
- for (int i = 0; i < 3; ++i)
- f.m_pcontact[i] = bary[i];
- }
-
- if (!predict)
- {
cti.m_colObj = colObjWrap->getCollisionObject();
cti.m_normal = nrm;
- cti.m_offset = dst;
+ cti.m_offset = dst + (curr - contact_point).dot(nrm);
}
-
- if (dst < 0)
- return true;
- return (false);
+ return (dst < 0);
}
//
@@ -3075,6 +3458,7 @@ void btSoftBody::setSpringStiffness(btScalar k)
{
m_links[i].Feature::m_material->m_kLST = k;
}
+ m_repulsionStiffness = k;
}
void btSoftBody::initializeDmInverse()
@@ -3372,18 +3756,39 @@ void btSoftBody::setMaxStress(btScalar maxStress)
//
void btSoftBody::interpolateRenderMesh()
{
- for (int i = 0; i < m_renderNodes.size(); ++i)
- {
- Node& n = m_renderNodes[i];
- n.m_x.setZero();
- for (int j = 0; j < 4; ++j)
- {
- if (m_renderNodesParents[i].size())
+ if (m_z.size() > 0)
+ {
+ for (int i = 0; i < m_renderNodes.size(); ++i)
+ {
+ const Node* p0 = m_renderNodesParents[i][0];
+ const Node* p1 = m_renderNodesParents[i][1];
+ const Node* p2 = m_renderNodesParents[i][2];
+ btVector3 normal = btCross(p1->m_x - p0->m_x, p2->m_x - p0->m_x);
+ btVector3 unit_normal = normal.normalized();
+ Node& n = m_renderNodes[i];
+ n.m_x.setZero();
+ for (int j = 0; j < 3; ++j)
{
n.m_x += m_renderNodesParents[i][j]->m_x * m_renderNodesInterpolationWeights[i][j];
}
- }
- }
+ n.m_x += m_z[i] * unit_normal;
+ }
+ }
+ else
+ {
+ for (int i = 0; i < m_renderNodes.size(); ++i)
+ {
+ Node& n = m_renderNodes[i];
+ n.m_x.setZero();
+ for (int j = 0; j < 4; ++j)
+ {
+ if (m_renderNodesParents[i].size())
+ {
+ n.m_x += m_renderNodesParents[i][j]->m_x * m_renderNodesInterpolationWeights[i][j];
+ }
+ }
+ }
+ }
}
void btSoftBody::setCollisionQuadrature(int N)
@@ -3649,13 +4054,10 @@ void btSoftBody::defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap
break;
case fCollision::SDF_RD:
{
-
btRigidBody* prb1 = (btRigidBody*)btRigidBody::upcast(pcoWrap->getCollisionObject());
if (pcoWrap->getCollisionObject()->isActive() || this->isActive())
{
const btTransform wtr = pcoWrap->getWorldTransform();
-// const btTransform ctr = pcoWrap->getWorldTransform();
-// const btScalar timemargin = (wtr.getOrigin() - ctr.getOrigin()).length();
const btScalar timemargin = 0;
const btScalar basemargin = getCollisionShape()->getMargin();
btVector3 mins;
@@ -3667,22 +4069,25 @@ void btSoftBody::defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap
maxs);
volume = btDbvtVolume::FromMM(mins, maxs);
volume.Expand(btVector3(basemargin, basemargin, basemargin));
- btSoftColliders::CollideSDF_RD docollideNode;
- docollideNode.psb = this;
- docollideNode.m_colObj1Wrap = pcoWrap;
- docollideNode.m_rigidBody = prb1;
- docollideNode.dynmargin = basemargin + timemargin;
- docollideNode.stamargin = basemargin;
- m_ndbvt.collideTV(m_ndbvt.m_root, volume, docollideNode);
-
- if (this->m_useFaceContact)
+ if (m_cfg.collisions & fCollision::SDF_RDN)
+ {
+ btSoftColliders::CollideSDF_RD docollideNode;
+ docollideNode.psb = this;
+ docollideNode.m_colObj1Wrap = pcoWrap;
+ docollideNode.m_rigidBody = prb1;
+ docollideNode.dynmargin = basemargin + timemargin;
+ docollideNode.stamargin = basemargin;
+ m_ndbvt.collideTV(m_ndbvt.m_root, volume, docollideNode);
+ }
+
+ if (((pcoWrap->getCollisionObject()->getInternalType() == CO_RIGID_BODY) && (m_cfg.collisions & fCollision::SDF_RDF)) || ((pcoWrap->getCollisionObject()->getInternalType() == CO_FEATHERSTONE_LINK) && (m_cfg.collisions & fCollision::SDF_MDF)))
{
btSoftColliders::CollideSDF_RDF docollideFace;
docollideFace.psb = this;
docollideFace.m_colObj1Wrap = pcoWrap;
docollideFace.m_rigidBody = prb1;
- docollideFace.dynmargin = basemargin + timemargin;
- docollideFace.stamargin = basemargin;
+ docollideFace.dynmargin = basemargin + timemargin;
+ docollideFace.stamargin = basemargin;
m_fdbvt.collideTV(m_fdbvt.m_root, volume, docollideFace);
}
}
@@ -3691,51 +4096,6 @@ void btSoftBody::defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap
}
}
-static inline btDbvntNode* copyToDbvnt(const btDbvtNode* n)
-{
- if (n == 0)
- return 0;
- btDbvntNode* root = new btDbvntNode(n);
- if (n->isinternal())
- {
- btDbvntNode* c0 = copyToDbvnt(n->childs[0]);
- root->childs[0] = c0;
- btDbvntNode* c1 = copyToDbvnt(n->childs[1]);
- root->childs[1] = c1;
- }
- return root;
-}
-
-static inline void calculateNormalCone(btDbvntNode* root)
-{
- if (!root)
- return;
- if (root->isleaf())
- {
- const btSoftBody::Face* face = (btSoftBody::Face*)root->data;
- root->normal = face->m_normal;
- root->angle = 0;
- }
- else
- {
- btVector3 n0(0,0,0), n1(0,0,0);
- btScalar a0 = 0, a1 = 0;
- if (root->childs[0])
- {
- calculateNormalCone(root->childs[0]);
- n0 = root->childs[0]->normal;
- a0 = root->childs[0]->angle;
- }
- if (root->childs[1])
- {
- calculateNormalCone(root->childs[1]);
- n1 = root->childs[1]->normal;
- a1 = root->childs[1]->angle;
- }
- root->normal = (n0+n1).safeNormalize();
- root->angle = btMax(a0,a1) + btAngle(n0, n1)*0.5;
- }
-}
//
void btSoftBody::defaultCollisionHandler(btSoftBody* psb)
{
@@ -3779,6 +4139,8 @@ void btSoftBody::defaultCollisionHandler(btSoftBody* psb)
break;
case fCollision::VF_DD:
{
+ if (!psb->m_softSoftCollision)
+ return;
if (psb->isActive() || this->isActive())
{
if (this != psb)
@@ -3797,6 +4159,7 @@ void btSoftBody::defaultCollisionHandler(btSoftBody* psb)
docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root,
docollide.psb[1]->m_fdbvt.m_root,
docollide);
+
/* psb1 nodes vs psb0 faces */
if (this->m_tetras.size() > 0)
docollide.useFaceNormal = true;
@@ -3812,20 +4175,17 @@ void btSoftBody::defaultCollisionHandler(btSoftBody* psb)
{
if (psb->useSelfCollision())
{
- btSoftColliders::CollideFF_DD docollide;
- docollide.mrg = getCollisionShape()->getMargin() +
- psb->getCollisionShape()->getMargin();
- docollide.psb[0] = this;
- docollide.psb[1] = psb;
- if (this->m_tetras.size() > 0)
- docollide.useFaceNormal = true;
- else
- docollide.useFaceNormal = false;
- /* psb0 faces vs psb0 faces */
- btDbvntNode* root = copyToDbvnt(this->m_fdbvt.m_root);
- calculateNormalCone(root);
- this->m_fdbvt.selfCollideT(root,docollide);
- delete root;
+ btSoftColliders::CollideFF_DD docollide;
+ docollide.mrg = 2*getCollisionShape()->getMargin();
+ docollide.psb[0] = this;
+ docollide.psb[1] = psb;
+ if (this->m_tetras.size() > 0)
+ docollide.useFaceNormal = true;
+ else
+ docollide.useFaceNormal = false;
+ /* psb0 faces vs psb0 faces */
+ calculateNormalCone(this->m_fdbvnt);
+ this->m_fdbvt.selfCollideT(m_fdbvnt,docollide);
}
}
}
@@ -3837,6 +4197,58 @@ void btSoftBody::defaultCollisionHandler(btSoftBody* psb)
}
}
+void btSoftBody::geometricCollisionHandler(btSoftBody* psb)
+{
+ if (psb->isActive() || this->isActive())
+ {
+ if (this != psb)
+ {
+ btSoftColliders::CollideCCD docollide;
+ /* common */
+ docollide.mrg = SAFE_EPSILON; // for rounding error instead of actual margin
+ docollide.dt = psb->m_sst.sdt;
+ /* psb0 nodes vs psb1 faces */
+ if (psb->m_tetras.size() > 0)
+ docollide.useFaceNormal = true;
+ else
+ docollide.useFaceNormal = false;
+ docollide.psb[0] = this;
+ docollide.psb[1] = psb;
+ docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root,
+ docollide.psb[1]->m_fdbvt.m_root,
+ docollide);
+ /* psb1 nodes vs psb0 faces */
+ if (this->m_tetras.size() > 0)
+ docollide.useFaceNormal = true;
+ else
+ docollide.useFaceNormal = false;
+ docollide.psb[0] = psb;
+ docollide.psb[1] = this;
+ docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root,
+ docollide.psb[1]->m_fdbvt.m_root,
+ docollide);
+ }
+ else
+ {
+ if (psb->useSelfCollision())
+ {
+ btSoftColliders::CollideCCD docollide;
+ docollide.mrg = SAFE_EPSILON;
+ docollide.psb[0] = this;
+ docollide.psb[1] = psb;
+ docollide.dt = psb->m_sst.sdt;
+ if (this->m_tetras.size() > 0)
+ docollide.useFaceNormal = true;
+ else
+ docollide.useFaceNormal = false;
+ /* psb0 faces vs psb0 faces */
+ calculateNormalCone(this->m_fdbvnt); // should compute this outside of this scope
+ this->m_fdbvt.selfCollideT(m_fdbvnt,docollide);
+ }
+ }
+ }
+}
+
void btSoftBody::setWindVelocity(const btVector3& velocity)
{
m_windVelocity = velocity;
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBody.h b/thirdparty/bullet/BulletSoftBody/btSoftBody.h
index 2b048c1118..6a55eccbd2 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBody.h
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBody.h
@@ -35,6 +35,8 @@ subject to the following restrictions:
//#else
#define btSoftBodyData btSoftBodyFloatData
#define btSoftBodyDataName "btSoftBodyFloatData"
+static const btScalar OVERLAP_REDUCTION_FACTOR = 0.1;
+static unsigned long seed = 243703;
//#endif //BT_USE_DOUBLE_PRECISION
class btBroadphaseInterface;
@@ -161,14 +163,18 @@ public:
RVSmask = 0x000f, ///Rigid versus soft mask
SDF_RS = 0x0001, ///SDF based rigid vs soft
CL_RS = 0x0002, ///Cluster vs convex rigid vs soft
- SDF_RD = 0x0003, ///DF based rigid vs deformable
- SDF_RDF = 0x0004, ///DF based rigid vs deformable faces
+ SDF_RD = 0x0004, ///rigid vs deformable
- SVSmask = 0x00F0, ///Rigid versus soft mask
+ SVSmask = 0x00f0, ///Rigid versus soft mask
VF_SS = 0x0010, ///Vertex vs face soft vs soft handling
CL_SS = 0x0020, ///Cluster vs cluster soft vs soft handling
CL_SELF = 0x0040, ///Cluster soft body self collision
- VF_DD = 0x0050, ///Vertex vs face soft vs soft handling
+ VF_DD = 0x0080, ///Vertex vs face soft vs soft handling
+
+ RVDFmask = 0x0f00, /// Rigid versus deformable face mask
+ SDF_RDF = 0x0100, /// GJK based Rigid vs. deformable face
+ SDF_MDF = 0x0200, /// GJK based Multibody vs. deformable face
+ SDF_RDN = 0x0400, /// SDF based Rigid vs. deformable node
/* presets */
Default = SDF_RS,
END
@@ -257,13 +263,13 @@ public:
btVector3 m_x; // Position
btVector3 m_q; // Previous step position/Test position
btVector3 m_v; // Velocity
- btVector3 m_vsplit; // Temporary Velocity in addintion to velocity used in split impulse
btVector3 m_vn; // Previous step velocity
btVector3 m_f; // Force accumulator
btVector3 m_n; // Normal
btScalar m_im; // 1/mass
btScalar m_area; // Area
btDbvtNode* m_leaf; // Leaf data
+ btScalar m_penetration; // depth of penetration
int m_battach : 1; // Attached
int index;
};
@@ -289,6 +295,7 @@ public:
btScalar m_ra; // Rest area
btDbvtNode* m_leaf; // Leaf data
btVector4 m_pcontact; // barycentric weights of the persistent contact
+ btVector3 m_n0, m_n1, m_vn;
int m_index;
};
/* Tetra */
@@ -717,6 +724,15 @@ public:
/* SolverState */
struct SolverState
{
+ //if you add new variables, always initialize them!
+ SolverState()
+ :sdt(0),
+ isdt(0),
+ velmrg(0),
+ radmrg(0),
+ updmrg(0)
+ {
+ }
btScalar sdt; // dt*timescale
btScalar isdt; // 1/sdt
btScalar velmrg; // velocity margin
@@ -796,22 +812,24 @@ public:
bool m_bUpdateRtCst; // Update runtime constants
btDbvt m_ndbvt; // Nodes tree
btDbvt m_fdbvt; // Faces tree
+ btDbvntNode* m_fdbvnt; // Faces tree with normals
btDbvt m_cdbvt; // Clusters tree
tClusterArray m_clusters; // Clusters
- btScalar m_dampingCoefficient; // Damping Coefficient
- btScalar m_sleepingThreshold;
- btScalar m_maxSpeedSquared;
- bool m_useFaceContact;
- btAlignedObjectArray<btVector3> m_quads; // quadrature points for collision detection
-
- btAlignedObjectArray<btVector4> m_renderNodesInterpolationWeights;
- btAlignedObjectArray<btAlignedObjectArray<const btSoftBody::Node*> > m_renderNodesParents;
- bool m_useSelfCollision;
+ btScalar m_dampingCoefficient; // Damping Coefficient
+ btScalar m_sleepingThreshold;
+ btScalar m_maxSpeedSquared;
+ btAlignedObjectArray<btVector3> m_quads; // quadrature points for collision detection
+ btScalar m_repulsionStiffness;
+ btAlignedObjectArray<btVector3> m_X; // initial positions
+
+ btAlignedObjectArray<btVector4> m_renderNodesInterpolationWeights;
+ btAlignedObjectArray<btAlignedObjectArray<const btSoftBody::Node*> > m_renderNodesParents;
+ btAlignedObjectArray<btScalar> m_z; // vertical distance used in extrapolation
+ bool m_useSelfCollision;
+ bool m_softSoftCollision;
btAlignedObjectArray<bool> m_clusterConnectivity; //cluster connectivity, for self-collision
- btTransform m_initialWorldTransform;
-
btVector3 m_windVelocity;
btScalar m_restLengthScale;
@@ -843,11 +861,6 @@ public:
{
m_dampingCoefficient = damping_coeff;
}
-
- void setUseFaceContact(bool useFaceContact)
- {
- m_useFaceContact = false;
- }
///@todo: avoid internal softbody shape hack and move collision code to collision library
virtual void setCollisionShape(btCollisionShape* collisionShape)
@@ -957,6 +970,16 @@ public:
void setVolumeMass(btScalar mass);
/* Set volume density (using tetrahedrons) */
void setVolumeDensity(btScalar density);
+ /* Get the linear velocity of the center of mass */
+ btVector3 getLinearVelocity();
+ /* Set the linear velocity of the center of mass */
+ void setLinearVelocity(const btVector3& linVel);
+ /* Set the angular velocity of the center of mass */
+ void setAngularVelocity(const btVector3& angVel);
+ /* Get best fit rigid transform */
+ btTransform getRigidTransform();
+ /* Transform to given pose */
+ void transformTo(const btTransform& trs);
/* Transform */
void transform(const btTransform& trs);
/* Translate */
@@ -1023,6 +1046,11 @@ public:
bool rayTest(const btVector3& rayFrom,
const btVector3& rayTo,
sRayCast& results);
+ bool rayFaceTest(const btVector3& rayFrom,
+ const btVector3& rayTo,
+ sRayCast& results);
+ int rayFaceTest(const btVector3& rayFrom, const btVector3& rayTo,
+ btScalar& mint, int& index) const;
/* Solver presets */
void setSolver(eSolverPresets::_ preset);
/* predictMotion */
@@ -1120,6 +1148,7 @@ public:
int rayTest(const btVector3& rayFrom, const btVector3& rayTo,
btScalar& mint, eFeature::_& feature, int& index, bool bcountonly) const;
void initializeFaceTree();
+ void rebuildNodeTree();
btVector3 evaluateCom() const;
bool checkDeformableContact(const btCollisionObjectWrapper* colObjWrap, const btVector3& x, btScalar margin, btSoftBody::sCti& cti, bool predict = false) const;
bool checkDeformableFaceContact(const btCollisionObjectWrapper* colObjWrap, Face& f, btVector3& contact_point, btVector3& bary, btScalar margin, btSoftBody::sCti& cti, bool predict = false) const;
@@ -1152,7 +1181,180 @@ public:
static void VSolve_Links(btSoftBody* psb, btScalar kst);
static psolver_t getSolver(ePSolver::_ solver);
static vsolver_t getSolver(eVSolver::_ solver);
+ void geometricCollisionHandler(btSoftBody* psb);
+#define SAFE_EPSILON SIMD_EPSILON*100.0
+ void updateNode(btDbvtNode* node, bool use_velocity, bool margin)
+ {
+ if (node->isleaf())
+ {
+ btSoftBody::Node* n = (btSoftBody::Node*)(node->data);
+ ATTRIBUTE_ALIGNED16(btDbvtVolume) vol;
+ btScalar pad = margin ? m_sst.radmrg : SAFE_EPSILON; // use user defined margin or margin for floating point precision
+ if (use_velocity)
+ {
+ btVector3 points[2] = {n->m_x, n->m_x + m_sst.sdt * n->m_v};
+ vol = btDbvtVolume::FromPoints(points, 2);
+ vol.Expand(btVector3(pad, pad, pad));
+ }
+ else
+ {
+ vol = btDbvtVolume::FromCR(n->m_x, pad);
+ }
+ node->volume = vol;
+ return;
+ }
+ else
+ {
+ updateNode(node->childs[0], use_velocity, margin);
+ updateNode(node->childs[1], use_velocity, margin);
+ ATTRIBUTE_ALIGNED16(btDbvtVolume) vol;
+ Merge(node->childs[0]->volume, node->childs[1]->volume, vol);
+ node->volume = vol;
+ }
+ }
+
+ void updateNodeTree(bool use_velocity, bool margin)
+ {
+ if (m_ndbvt.m_root)
+ updateNode(m_ndbvt.m_root, use_velocity, margin);
+ }
+
+ template <class DBVTNODE> // btDbvtNode or btDbvntNode
+ void updateFace(DBVTNODE* node, bool use_velocity, bool margin)
+ {
+ if (node->isleaf())
+ {
+ btSoftBody::Face* f = (btSoftBody::Face*)(node->data);
+ btScalar pad = margin ? m_sst.radmrg : SAFE_EPSILON; // use user defined margin or margin for floating point precision
+ ATTRIBUTE_ALIGNED16(btDbvtVolume) vol;
+ if (use_velocity)
+ {
+ btVector3 points[6] = {f->m_n[0]->m_x, f->m_n[0]->m_x + m_sst.sdt * f->m_n[0]->m_v,
+ f->m_n[1]->m_x, f->m_n[1]->m_x + m_sst.sdt * f->m_n[1]->m_v,
+ f->m_n[2]->m_x, f->m_n[2]->m_x + m_sst.sdt * f->m_n[2]->m_v};
+ vol = btDbvtVolume::FromPoints(points, 6);
+ }
+ else
+ {
+ btVector3 points[3] = {f->m_n[0]->m_x,
+ f->m_n[1]->m_x,
+ f->m_n[2]->m_x};
+ vol = btDbvtVolume::FromPoints(points, 3);
+ }
+ vol.Expand(btVector3(pad, pad, pad));
+ node->volume = vol;
+ return;
+ }
+ else
+ {
+ updateFace(node->childs[0], use_velocity, margin);
+ updateFace(node->childs[1], use_velocity, margin);
+ ATTRIBUTE_ALIGNED16(btDbvtVolume) vol;
+ Merge(node->childs[0]->volume, node->childs[1]->volume, vol);
+ node->volume = vol;
+ }
+ }
+ void updateFaceTree(bool use_velocity, bool margin)
+ {
+ if (m_fdbvt.m_root)
+ updateFace(m_fdbvt.m_root, use_velocity, margin);
+ if (m_fdbvnt)
+ updateFace(m_fdbvnt, use_velocity, margin);
+ }
+
+ template <typename T>
+ static inline T BaryEval(const T& a,
+ const T& b,
+ const T& c,
+ const btVector3& coord)
+ {
+ return (a * coord.x() + b * coord.y() + c * coord.z());
+ }
+ void applyRepulsionForce(btScalar timeStep, bool applySpringForce)
+ {
+ btAlignedObjectArray<int> indices;
+ {
+ // randomize the order of repulsive force
+ indices.resize(m_faceNodeContacts.size());
+ for (int i = 0; i < m_faceNodeContacts.size(); ++i)
+ indices[i] = i;
+#define NEXTRAND (seed = (1664525L * seed + 1013904223L) & 0xffffffff)
+ int i, ni;
+
+ for (i = 0, ni = indices.size(); i < ni; ++i)
+ {
+ btSwap(indices[i], indices[NEXTRAND % ni]);
+ }
+ }
+ for (int k = 0; k < m_faceNodeContacts.size(); ++k)
+ {
+ int i = indices[k];
+ btSoftBody::DeformableFaceNodeContact& c = m_faceNodeContacts[i];
+ btSoftBody::Node* node = c.m_node;
+ btSoftBody::Face* face = c.m_face;
+ const btVector3& w = c.m_bary;
+ const btVector3& n = c.m_normal;
+ btVector3 l = node->m_x - BaryEval(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, w);
+ btScalar d = c.m_margin - n.dot(l);
+ d = btMax(btScalar(0),d);
+
+ const btVector3& va = node->m_v;
+ btVector3 vb = BaryEval(face->m_n[0]->m_v, face->m_n[1]->m_v, face->m_n[2]->m_v, w);
+ btVector3 vr = va - vb;
+ const btScalar vn = btDot(vr, n); // dn < 0 <==> opposing
+ if (vn > OVERLAP_REDUCTION_FACTOR * d / timeStep)
+ continue;
+ btVector3 vt = vr - vn*n;
+ btScalar I = 0;
+ btScalar mass = node->m_im == 0 ? 0 : btScalar(1)/node->m_im;
+ if (applySpringForce)
+ I = -btMin(m_repulsionStiffness * timeStep * d, mass * (OVERLAP_REDUCTION_FACTOR * d / timeStep - vn));
+ if (vn < 0)
+ I += 0.5 * mass * vn;
+ btScalar face_penetration = 0, node_penetration = node->m_penetration;
+ for (int i = 0; i < 3; ++i)
+ face_penetration = btMax(face_penetration, face->m_n[i]->m_penetration);
+ btScalar I_tilde = .5 *I /(1.0+w.length2());
+
+// double the impulse if node or face is constrained.
+ if (face_penetration > 0 || node_penetration > 0)
+ I_tilde *= 2.0;
+ if (face_penetration <= node_penetration)
+ {
+ for (int j = 0; j < 3; ++j)
+ face->m_n[j]->m_v += w[j]*n*I_tilde*node->m_im;
+ }
+ if (face_penetration >= node_penetration)
+ {
+ node->m_v -= I_tilde*node->m_im*n;
+ }
+
+ // apply frictional impulse
+ btScalar vt_norm = vt.safeNorm();
+ if (vt_norm > SIMD_EPSILON)
+ {
+ btScalar delta_vn = -2 * I * node->m_im;
+ btScalar mu = c.m_friction;
+ btScalar vt_new = btMax(btScalar(1) - mu * delta_vn / (vt_norm + SIMD_EPSILON), btScalar(0))*vt_norm;
+ I = 0.5 * mass * (vt_norm-vt_new);
+ vt.safeNormalize();
+ I_tilde = .5 *I /(1.0+w.length2());
+// double the impulse if node or face is constrained.
+// if (face_penetration > 0 || node_penetration > 0)
+// I_tilde *= 2.0;
+ if (face_penetration <= node_penetration)
+ {
+ for (int j = 0; j < 3; ++j)
+ face->m_n[j]->m_v += w[j] * vt * I_tilde * (face->m_n[j])->m_im;
+ }
+ if (face_penetration >= node_penetration)
+ {
+ node->m_v -= I_tilde * node->m_im * vt;
+ }
+ }
+ }
+ }
virtual int calculateSerializeBufferSize() const;
///fills the dataBuffer and returns the struct name (and 0 on failure)
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp
index 649d6f58cf..c1a87c7d57 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp
@@ -1300,13 +1300,23 @@ btSoftBody* btSoftBodyHelpers::CreateFromVtkFile(btSoftBodyWorldInfo& worldInfo,
}
else if (reading_tets)
{
+ int d;
+ ss >> d;
+ if (d != 4)
+ {
+ printf("Load deformable failed: Only Tetrahedra are supported in VTK file.\n");
+ fs.close();
+ return 0;
+ }
ss.ignore(128, ' '); // ignore "4"
Index tet;
tet.resize(4);
for (size_t i = 0; i < 4; i++)
{
ss >> tet[i];
+ printf("%d ", tet[i]);
}
+ printf("\n");
indices[indices_count++] = tet;
}
}
@@ -1500,10 +1510,27 @@ void btSoftBodyHelpers::getBarycentricWeights(const btVector3& a, const btVector
bary = btVector4(va6*v6, vb6*v6, vc6*v6, vd6*v6);
}
+// Given a simplex with vertices a,b,c, find the barycentric weights of p in this simplex. bary[3] = 0.
+void btSoftBodyHelpers::getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& p, btVector4& bary)
+{
+ btVector3 v0 = b - a, v1 = c - a, v2 = p - a;
+ btScalar d00 = btDot(v0, v0);
+ btScalar d01 = btDot(v0, v1);
+ btScalar d11 = btDot(v1, v1);
+ btScalar d20 = btDot(v2, v0);
+ btScalar d21 = btDot(v2, v1);
+ btScalar invDenom = 1.0 / (d00 * d11 - d01 * d01);
+ bary[1] = (d11 * d20 - d01 * d21) * invDenom;
+ bary[2] = (d00 * d21 - d01 * d20) * invDenom;
+ bary[0] = 1.0 - bary[1] - bary[2];
+ bary[3] = 0;
+}
+
// Iterate through all render nodes to find the simulation tetrahedron that contains the render node and record the barycentric weights
// If the node is not inside any tetrahedron, assign it to the tetrahedron in which the node has the least negative barycentric weight
void btSoftBodyHelpers::interpolateBarycentricWeights(btSoftBody* psb)
{
+ psb->m_z.resize(0);
psb->m_renderNodesInterpolationWeights.resize(psb->m_renderNodes.size());
psb->m_renderNodesParents.resize(psb->m_renderNodes.size());
for (int i = 0; i < psb->m_renderNodes.size(); ++i)
@@ -1513,7 +1540,6 @@ void btSoftBodyHelpers::interpolateBarycentricWeights(btSoftBody* psb)
btVector4 optimal_bary;
btScalar min_bary_weight = -1e3;
btAlignedObjectArray<const btSoftBody::Node*> optimal_parents;
- bool found = false;
for (int j = 0; j < psb->m_tetras.size(); ++j)
{
const btSoftBody::Tetra& t = psb->m_tetras[j];
@@ -1544,3 +1570,55 @@ void btSoftBodyHelpers::interpolateBarycentricWeights(btSoftBody* psb)
psb->m_renderNodesParents[i] = optimal_parents;
}
}
+
+
+// Iterate through all render nodes to find the simulation triangle that's closest to the node in the barycentric sense.
+void btSoftBodyHelpers::extrapolateBarycentricWeights(btSoftBody* psb)
+{
+ psb->m_renderNodesInterpolationWeights.resize(psb->m_renderNodes.size());
+ psb->m_renderNodesParents.resize(psb->m_renderNodes.size());
+ psb->m_z.resize(psb->m_renderNodes.size());
+ for (int i = 0; i < psb->m_renderNodes.size(); ++i)
+ {
+ const btVector3& p = psb->m_renderNodes[i].m_x;
+ btVector4 bary;
+ btVector4 optimal_bary;
+ btScalar min_bary_weight = -SIMD_INFINITY;
+ btAlignedObjectArray<const btSoftBody::Node*> optimal_parents;
+ btScalar dist = 0, optimal_dist = 0;
+ for (int j = 0; j < psb->m_faces.size(); ++j)
+ {
+ const btSoftBody::Face& f = psb->m_faces[j];
+ btVector3 n = btCross(f.m_n[1]->m_x - f.m_n[0]->m_x, f.m_n[2]->m_x - f.m_n[0]->m_x);
+ btVector3 unit_n = n.normalized();
+ dist = (p-f.m_n[0]->m_x).dot(unit_n);
+ btVector3 proj_p = p - dist*unit_n;
+ getBarycentricWeights(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, proj_p, bary);
+ btScalar new_min_bary_weight = bary[0];
+ for (int k = 1; k < 3; ++k)
+ {
+ new_min_bary_weight = btMin(new_min_bary_weight, bary[k]);
+ }
+
+ // p is out of the current best triangle, we found a traingle that's better
+ bool better_than_closest_outisde = (new_min_bary_weight > min_bary_weight && min_bary_weight<0.);
+ // p is inside of the current best triangle, we found a triangle that's better
+ bool better_than_best_inside = (new_min_bary_weight>=0 && min_bary_weight>=0 && btFabs(dist)<btFabs(optimal_dist));
+
+ if (better_than_closest_outisde || better_than_best_inside)
+ {
+ btAlignedObjectArray<const btSoftBody::Node*> parents;
+ parents.push_back(f.m_n[0]);
+ parents.push_back(f.m_n[1]);
+ parents.push_back(f.m_n[2]);
+ optimal_parents = parents;
+ optimal_bary = bary;
+ optimal_dist = dist;
+ min_bary_weight = new_min_bary_weight;
+ }
+ }
+ psb->m_renderNodesInterpolationWeights[i] = optimal_bary;
+ psb->m_renderNodesParents[i] = optimal_parents;
+ psb->m_z[i] = optimal_dist;
+ }
+}
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h
index b20f2f6d62..abe1870890 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h
@@ -148,8 +148,12 @@ struct btSoftBodyHelpers
static void getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d, const btVector3& p, btVector4& bary);
+ static void getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& p, btVector4& bary);
+
static void interpolateBarycentricWeights(btSoftBody* psb);
+ static void extrapolateBarycentricWeights(btSoftBody* psb);
+
static void generateBoundaryFaces(btSoftBody* psb);
static void duplicateFaces(const char* filename, const btSoftBody* psb);
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h b/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h
index cde4746d58..b9ebc95b6b 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h
+++ b/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h
@@ -18,7 +18,6 @@ subject to the following restrictions:
#define _BT_SOFT_BODY_INTERNALS_H
#include "btSoftBody.h"
-
#include "LinearMath/btQuickprof.h"
#include "LinearMath/btPolarDecomposition.h"
#include "BulletCollision/BroadphaseCollision/btBroadphaseInterface.h"
@@ -29,9 +28,10 @@ subject to the following restrictions:
#include "BulletDynamics/Featherstone/btMultiBodyConstraint.h"
#include <string.h> //for memset
#include <cmath>
+#include "poly34.h"
// Given a multibody link, a contact point and a contact direction, fill in the jacobian data needed to calculate the velocity change given an impulse in the contact direction
-static void findJacobian(const btMultiBodyLinkCollider* multibodyLinkCol,
+static SIMD_FORCE_INLINE void findJacobian(const btMultiBodyLinkCollider* multibodyLinkCol,
btMultiBodyJacobianData& jacobianData,
const btVector3& contact_point,
const btVector3& dir)
@@ -44,7 +44,7 @@ static void findJacobian(const btMultiBodyLinkCollider* multibodyLinkCol,
multibodyLinkCol->m_multiBody->fillContactJacobianMultiDof(multibodyLinkCol->m_link, contact_point, dir, jac, jacobianData.scratch_r, jacobianData.scratch_v, jacobianData.scratch_m);
multibodyLinkCol->m_multiBody->calcAccelerationDeltasMultiDof(&jacobianData.m_jacobians[0], &jacobianData.m_deltaVelocitiesUnitImpulse[0], jacobianData.scratch_r, jacobianData.scratch_v);
}
-static btVector3 generateUnitOrthogonalVector(const btVector3& u)
+static SIMD_FORCE_INLINE btVector3 generateUnitOrthogonalVector(const btVector3& u)
{
btScalar ux = u.getX();
btScalar uy = u.getY();
@@ -62,6 +62,571 @@ static btVector3 generateUnitOrthogonalVector(const btVector3& u)
v.normalize();
return v;
}
+
+static SIMD_FORCE_INLINE bool proximityTest(const btVector3& x1, const btVector3& x2, const btVector3& x3, const btVector3& x4, const btVector3& normal, const btScalar& mrg, btVector3& bary)
+{
+ btVector3 x43 = x4-x3;
+ if (std::abs(x43.dot(normal)) > mrg)
+ return false;
+ btVector3 x13 = x1-x3;
+ btVector3 x23 = x2-x3;
+ btScalar a11 = x13.length2();
+ btScalar a22 = x23.length2();
+ btScalar a12 = x13.dot(x23);
+ btScalar b1 = x13.dot(x43);
+ btScalar b2 = x23.dot(x43);
+ btScalar det = a11*a22 - a12*a12;
+ if (det < SIMD_EPSILON)
+ return false;
+ btScalar w1 = (b1*a22-b2*a12)/det;
+ btScalar w2 = (b2*a11-b1*a12)/det;
+ btScalar w3 = 1-w1-w2;
+ btScalar delta = mrg / std::sqrt(0.5*std::abs(x13.cross(x23).safeNorm()));
+ bary = btVector3(w1,w2,w3);
+ for (int i = 0; i < 3; ++i)
+ {
+ if (bary[i] < -delta || bary[i] > 1+delta)
+ return false;
+ }
+ return true;
+}
+static const int KDOP_COUNT = 13;
+static btVector3 dop[KDOP_COUNT]={btVector3(1,0,0),
+ btVector3(0,1,0),
+ btVector3(0,0,1),
+ btVector3(1,1,0),
+ btVector3(1,0,1),
+ btVector3(0,1,1),
+ btVector3(1,-1,0),
+ btVector3(1,0,-1),
+ btVector3(0,1,-1),
+ btVector3(1,1,1),
+ btVector3(1,-1,1),
+ btVector3(1,1,-1),
+ btVector3(1,-1,-1)
+};
+
+static inline int getSign(const btVector3& n, const btVector3& x)
+{
+ btScalar d = n.dot(x);
+ if (d>SIMD_EPSILON)
+ return 1;
+ if (d<-SIMD_EPSILON)
+ return -1;
+ return 0;
+}
+
+static SIMD_FORCE_INLINE bool hasSeparatingPlane(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt)
+{
+ btVector3 hex[6] = {face->m_n[0]->m_x - node->m_x,
+ face->m_n[1]->m_x - node->m_x,
+ face->m_n[2]->m_x - node->m_x,
+ face->m_n[0]->m_x + dt*face->m_n[0]->m_v - node->m_x,
+ face->m_n[1]->m_x + dt*face->m_n[1]->m_v - node->m_x,
+ face->m_n[2]->m_x + dt*face->m_n[2]->m_v - node->m_x
+ };
+ btVector3 segment = dt*node->m_v;
+ for (int i = 0; i < KDOP_COUNT; ++i)
+ {
+ int s = getSign(dop[i], segment);
+ int j = 0;
+ for (; j < 6; ++j)
+ {
+ if (getSign(dop[i], hex[j]) == s)
+ break;
+ }
+ if (j == 6)
+ return true;
+ }
+ return false;
+}
+
+static SIMD_FORCE_INLINE bool nearZero(const btScalar& a)
+{
+ return (a>-SAFE_EPSILON && a<SAFE_EPSILON);
+}
+static SIMD_FORCE_INLINE bool sameSign(const btScalar& a, const btScalar& b)
+{
+ return (nearZero(a) || nearZero(b) || (a>SAFE_EPSILON && b>SAFE_EPSILON) || (a<-SAFE_EPSILON && b<-SAFE_EPSILON));
+}
+static SIMD_FORCE_INLINE bool diffSign(const btScalar& a, const btScalar& b)
+{
+ return !sameSign(a, b);
+}
+inline btScalar evaluateBezier2(const btScalar &p0, const btScalar &p1, const btScalar &p2, const btScalar &t, const btScalar &s)
+{
+ btScalar s2 = s*s;
+ btScalar t2 = t*t;
+
+ return p0*s2+p1*btScalar(2.0)*s*t+p2*t2;
+}
+inline btScalar evaluateBezier(const btScalar &p0, const btScalar &p1, const btScalar &p2, const btScalar &p3, const btScalar &t, const btScalar &s)
+{
+ btScalar s2 = s*s;
+ btScalar s3 = s2*s;
+ btScalar t2 = t*t;
+ btScalar t3 = t2*t;
+
+ return p0*s3+p1*btScalar(3.0)*s2*t+p2*btScalar(3.0)*s*t2+p3*t3;
+}
+static SIMD_FORCE_INLINE bool getSigns(bool type_c, const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& t0, const btScalar& t1, btScalar &lt0, btScalar &lt1)
+{
+ if (sameSign(t0, t1)) {
+ lt0 = t0;
+ lt1 = t0;
+ return true;
+ }
+
+ if (type_c || diffSign(k0, k3)) {
+ btScalar ft = evaluateBezier(k0, k1, k2, k3, t0, -t1);
+ if (t0<-0)
+ ft = -ft;
+
+ if (sameSign(ft, k0)) {
+ lt0 = t1;
+ lt1 = t1;
+ }
+ else {
+ lt0 = t0;
+ lt1 = t0;
+ }
+ return true;
+ }
+
+ if (!type_c) {
+ btScalar ft = evaluateBezier(k0, k1, k2, k3, t0, -t1);
+ if (t0<-0)
+ ft = -ft;
+
+ if (diffSign(ft, k0)) {
+ lt0 = t0;
+ lt1 = t1;
+ return true;
+ }
+
+ btScalar fk = evaluateBezier2(k1-k0, k2-k1, k3-k2, t0, -t1);
+
+ if (sameSign(fk, k1-k0))
+ lt0 = lt1 = t1;
+ else
+ lt0 = lt1 = t0;
+
+ return true;
+ }
+ return false;
+}
+
+static SIMD_FORCE_INLINE void getBernsteinCoeff(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, btScalar& k0, btScalar& k1, btScalar& k2, btScalar& k3)
+{
+ const btVector3& n0 = face->m_n0;
+ const btVector3& n1 = face->m_n1;
+ btVector3 n_hat = n0 + n1 - face->m_vn;
+ btVector3 p0ma0 = node->m_x - face->m_n[0]->m_x;
+ btVector3 p1ma1 = node->m_q - face->m_n[0]->m_q;
+ k0 = (p0ma0).dot(n0) * 3.0;
+ k1 = (p0ma0).dot(n_hat) + (p1ma1).dot(n0);
+ k2 = (p1ma1).dot(n_hat) + (p0ma0).dot(n1);
+ k3 = (p1ma1).dot(n1) * 3.0;
+}
+
+static SIMD_FORCE_INLINE void polyDecomposition(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& j0, const btScalar& j1, const btScalar& j2, btScalar& u0, btScalar& u1, btScalar& v0, btScalar& v1)
+{
+ btScalar denom = 4.0 * (j1-j2) * (j1-j0) + (j2-j0) * (j2-j0);
+ u0 = (2.0*(j1-j2)*(3.0*k1-2.0*k0-k3) - (j0-j2)*(3.0*k2-2.0*k3-k0)) / denom;
+ u1 = (2.0*(j1-j0)*(3.0*k2-2.0*k3-k0) - (j2-j0)*(3.0*k1-2.0*k0-k3)) / denom;
+ v0 = k0-u0*j0;
+ v1 = k3-u1*j2;
+}
+
+static SIMD_FORCE_INLINE bool rootFindingLemma(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3)
+{
+ btScalar u0, u1, v0, v1;
+ btScalar j0 = 3.0*(k1-k0);
+ btScalar j1 = 3.0*(k2-k1);
+ btScalar j2 = 3.0*(k3-k2);
+ polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
+ if (sameSign(v0, v1))
+ {
+ btScalar Ypa = j0*(1.0-v0)*(1.0-v0) + 2.0*j1*v0*(1.0-v0) + j2*v0*v0; // Y'(v0)
+ if (sameSign(Ypa, j0))
+ {
+ return (diffSign(k0,v1));
+ }
+ }
+ return diffSign(k0,v0);
+}
+
+static SIMD_FORCE_INLINE void getJs(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Node* a, const btSoftBody::Node* b, const btSoftBody::Node* c, const btSoftBody::Node* p, const btScalar& dt, btScalar& j0, btScalar& j1, btScalar& j2)
+{
+ const btVector3& a0 = a->m_x;
+ const btVector3& b0 = b->m_x;
+ const btVector3& c0 = c->m_x;
+ const btVector3& va = a->m_v;
+ const btVector3& vb = b->m_v;
+ const btVector3& vc = c->m_v;
+ const btVector3 a1 = a0 + dt*va;
+ const btVector3 b1 = b0 + dt*vb;
+ const btVector3 c1 = c0 + dt*vc;
+ btVector3 n0 = (b0-a0).cross(c0-a0);
+ btVector3 n1 = (b1-a1).cross(c1-a1);
+ btVector3 n_hat = n0+n1 - dt*dt*(vb-va).cross(vc-va);
+ const btVector3& p0 = p->m_x;
+ const btVector3& vp = p->m_v;
+ btVector3 p1 = p0 + dt*vp;
+ btVector3 m0 = (b0-p0).cross(c0-p0);
+ btVector3 m1 = (b1-p1).cross(c1-p1);
+ btVector3 m_hat = m0+m1 - dt*dt*(vb-vp).cross(vc-vp);
+ btScalar l0 = m0.dot(n0);
+ btScalar l1 = 0.25 * (m0.dot(n_hat) + m_hat.dot(n0));
+ btScalar l2 = btScalar(1)/btScalar(6)*(m0.dot(n1) + m_hat.dot(n_hat) + m1.dot(n0));
+ btScalar l3 = 0.25 * (m_hat.dot(n1) + m1.dot(n_hat));
+ btScalar l4 = m1.dot(n1);
+
+ btScalar k1p = 0.25 * k0 + 0.75 * k1;
+ btScalar k2p = 0.5 * k1 + 0.5 * k2;
+ btScalar k3p = 0.75 * k2 + 0.25 * k3;
+
+ btScalar s0 = (l1 * k0 - l0 * k1p)*4.0;
+ btScalar s1 = (l2 * k0 - l0 * k2p)*2.0;
+ btScalar s2 = (l3 * k0 - l0 * k3p)*btScalar(4)/btScalar(3);
+ btScalar s3 = l4 * k0 - l0 * k3;
+
+ j0 = (s1*k0 - s0*k1) * 3.0;
+ j1 = (s2*k0 - s0*k2) * 1.5;
+ j2 = (s3*k0 - s0*k3);
+}
+
+static SIMD_FORCE_INLINE bool signDetermination1Internal(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& u0, const btScalar& u1, const btScalar& v0, const btScalar& v1)
+{
+ btScalar Yu0 = k0*(1.0-u0)*(1.0-u0)*(1.0-u0) + 3.0*k1*u0*(1.0-u0)*(1.0-u0) + 3.0*k2*u0*u0*(1.0-u0) + k3*u0*u0*u0; // Y(u0)
+ btScalar Yv0 = k0*(1.0-v0)*(1.0-v0)*(1.0-v0) + 3.0*k1*v0*(1.0-v0)*(1.0-v0) + 3.0*k2*v0*v0*(1.0-v0) + k3*v0*v0*v0; // Y(v0)
+
+ btScalar sign_Ytp = (u0 > u1) ? Yu0 : -Yu0;
+ btScalar L = sameSign(sign_Ytp, k0) ? u1 : u0;
+ sign_Ytp = (v0 > v1) ? Yv0 : -Yv0;
+ btScalar K = (sameSign(sign_Ytp,k0)) ? v1 : v0;
+ return diffSign(L,K);
+}
+
+static SIMD_FORCE_INLINE bool signDetermination2Internal(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& j0, const btScalar& j1, const btScalar& j2, const btScalar& u0, const btScalar& u1, const btScalar& v0, const btScalar& v1)
+{
+ btScalar Yu0 = k0*(1.0-u0)*(1.0-u0)*(1.0-u0) + 3.0*k1*u0*(1.0-u0)*(1.0-u0) + 3.0*k2*u0*u0*(1.0-u0) + k3*u0*u0*u0; // Y(u0)
+ btScalar sign_Ytp = (u0 > u1) ? Yu0 : -Yu0, L1, L2;
+ if (diffSign(sign_Ytp,k0))
+ {
+ L1 = u0;
+ L2 = u1;
+ }
+ else
+ {
+ btScalar Yp_u0 = j0*(1.0-u0)*(1.0-u0) + 2.0*j1*(1.0-u0)*u0 + j2*u0*u0;
+ if (sameSign(Yp_u0,j0))
+ {
+ L1 = u1;
+ L2 = u1;
+ }
+ else
+ {
+ L1 = u0;
+ L2 = u0;
+ }
+ }
+ btScalar Yv0 = k0*(1.0-v0)*(1.0-v0)*(1.0-v0) + 3.0*k1*v0*(1.0-v0)*(1.0-v0) + 3.0*k2*v0*v0*(1.0-v0) + k3*v0*v0*v0; // Y(uv0)
+ sign_Ytp = (v0 > v1) ? Yv0 : -Yv0;
+ btScalar K1, K2;
+ if (diffSign(sign_Ytp,k0))
+ {
+ K1 = v0;
+ K2 = v1;
+ }
+ else
+ {
+ btScalar Yp_v0 = j0*(1.0-v0)*(1.0-v0) + 2.0*j1*(1.0-v0)*v0 + j2*v0*v0;
+ if (sameSign(Yp_v0,j0))
+ {
+ K1 = v1;
+ K2 = v1;
+ }
+ else
+ {
+ K1 = v0;
+ K2 = v0;
+ }
+ }
+ return (diffSign(K1, L1) || diffSign(L2, K2));
+}
+
+static SIMD_FORCE_INLINE bool signDetermination1(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt)
+{
+ btScalar j0, j1, j2, u0, u1, v0, v1;
+ // p1
+ getJs(k0,k1,k2,k3,face->m_n[0], face->m_n[1], face->m_n[2], node, dt, j0, j1, j2);
+ if (nearZero(j0+j2-j1*2.0))
+ {
+ btScalar lt0, lt1;
+ getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
+ if (!signDetermination1Internal(k0,k1,k2,k3,u0,u1,v0,v1))
+ return false;
+ }
+ // p2
+ getJs(k0,k1,k2,k3,face->m_n[1], face->m_n[2], face->m_n[0], node, dt, j0, j1, j2);
+ if (nearZero(j0+j2-j1*2.0))
+ {
+ btScalar lt0, lt1;
+ getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
+ if (!signDetermination1Internal(k0,k1,k2,k3,u0,u1,v0,v1))
+ return false;
+ }
+ // p3
+ getJs(k0,k1,k2,k3,face->m_n[2], face->m_n[0], face->m_n[1], node, dt, j0, j1, j2);
+ if (nearZero(j0+j2-j1*2.0))
+ {
+ btScalar lt0, lt1;
+ getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
+ if (!signDetermination1Internal(k0,k1,k2,k3,u0,u1,v0,v1))
+ return false;
+ }
+ return true;
+}
+
+static SIMD_FORCE_INLINE bool signDetermination2(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt)
+{
+ btScalar j0, j1, j2, u0, u1, v0, v1;
+ // p1
+ getJs(k0,k1,k2,k3,face->m_n[0], face->m_n[1], face->m_n[2], node, dt, j0, j1, j2);
+ if (nearZero(j0+j2-j1*2.0))
+ {
+ btScalar lt0, lt1;
+ bool bt0 = true, bt1=true;
+ getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ bt0 = false;
+ if (lt1 < -SAFE_EPSILON)
+ bt1 = false;
+ if (!bt0 && !bt1)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
+ if (!signDetermination2Internal(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1))
+ return false;
+ }
+ // p2
+ getJs(k0,k1,k2,k3,face->m_n[1], face->m_n[2], face->m_n[0], node, dt, j0, j1, j2);
+ if (nearZero(j0+j2-j1*2.0))
+ {
+ btScalar lt0, lt1;
+ bool bt0=true, bt1=true;
+ getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ bt0 = false;
+ if (lt1 < -SAFE_EPSILON)
+ bt1 = false;
+ if (!bt0 && !bt1)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
+ if (!signDetermination2Internal(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1))
+ return false;
+ }
+ // p3
+ getJs(k0,k1,k2,k3,face->m_n[2], face->m_n[0], face->m_n[1], node, dt, j0, j1, j2);
+ if (nearZero(j0+j2-j1*2.0))
+ {
+ btScalar lt0, lt1;
+ bool bt0=true, bt1=true;
+ getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1);
+ if (lt0 < -SAFE_EPSILON)
+ bt0 = false;
+ if (lt1 < -SAFE_EPSILON)
+ bt1 = false;
+ if (!bt0 && !bt1)
+ return false;
+ }
+ else
+ {
+ polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1);
+ if (!signDetermination2Internal(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1))
+ return false;
+ }
+ return true;
+}
+
+static SIMD_FORCE_INLINE bool coplanarAndInsideTest(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt)
+{
+ // Coplanar test
+ if (diffSign(k1-k0, k3-k2))
+ {
+ // Case b:
+ if (sameSign(k0, k3) && !rootFindingLemma(k0,k1,k2,k3))
+ return false;
+ // inside test
+ return signDetermination2(k0, k1, k2, k3, face, node, dt);
+ }
+ else
+ {
+ // Case c:
+ if (sameSign(k0, k3))
+ return false;
+ // inside test
+ return signDetermination1(k0, k1, k2, k3, face, node, dt);
+ }
+ return false;
+}
+static SIMD_FORCE_INLINE bool conservativeCulling(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& mrg)
+{
+ if (k0 > mrg && k1 > mrg && k2 > mrg && k3 > mrg)
+ return true;
+ if (k0 < -mrg && k1 < -mrg && k2 < -mrg && k3 < -mrg)
+ return true;
+ return false;
+}
+
+static SIMD_FORCE_INLINE bool bernsteinVFTest(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& mrg, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt)
+{
+ if (conservativeCulling(k0, k1, k2, k3, mrg))
+ return false;
+ return coplanarAndInsideTest(k0, k1, k2, k3, face, node, dt);
+}
+
+static SIMD_FORCE_INLINE void deCasteljau(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& t0, btScalar& k10, btScalar& k20, btScalar& k30, btScalar& k21, btScalar& k12)
+{
+ k10 = k0*(1.0-t0) + k1*t0;
+ btScalar k11 = k1*(1.0-t0) + k2*t0;
+ k12 = k2*(1.0-t0) + k3*t0;
+ k20 = k10*(1.0-t0) + k11*t0;
+ k21 = k11*(1.0-t0) + k12*t0;
+ k30 = k20*(1.0-t0) + k21*t0;
+}
+static SIMD_FORCE_INLINE bool bernsteinVFTest(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, const btScalar& mrg)
+{
+ btScalar k0, k1, k2, k3;
+ getBernsteinCoeff(face, node, dt, k0, k1, k2, k3);
+ if (conservativeCulling(k0, k1, k2, k3, mrg))
+ return false;
+ return true;
+ if (diffSign(k2-2.0*k1+k0, k3-2.0*k2+k1))
+ {
+ btScalar k10, k20, k30, k21, k12;
+ btScalar t0 = (k2-2.0*k1+k0)/(k0-3.0*k1+3.0*k2-k3);
+ deCasteljau(k0, k1, k2, k3, t0, k10, k20, k30, k21, k12);
+ return bernsteinVFTest(k0, k10, k20, k30, mrg, face, node, dt) || bernsteinVFTest(k30, k21, k12, k3, mrg, face, node, dt);
+ }
+ return coplanarAndInsideTest(k0, k1, k2, k3, face, node, dt);
+}
+
+static SIMD_FORCE_INLINE bool continuousCollisionDetection(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, const btScalar& mrg, btVector3& bary)
+{
+ if (hasSeparatingPlane(face, node, dt))
+ return false;
+ btVector3 x21 = face->m_n[1]->m_x - face->m_n[0]->m_x;
+ btVector3 x31 = face->m_n[2]->m_x - face->m_n[0]->m_x;
+ btVector3 x41 = node->m_x - face->m_n[0]->m_x;
+ btVector3 v21 = face->m_n[1]->m_v - face->m_n[0]->m_v;
+ btVector3 v31 = face->m_n[2]->m_v - face->m_n[0]->m_v;
+ btVector3 v41 = node->m_v - face->m_n[0]->m_v;
+ btVector3 a = x21.cross(x31);
+ btVector3 b = x21.cross(v31) + v21.cross(x31);
+ btVector3 c = v21.cross(v31);
+ btVector3 d = x41;
+ btVector3 e = v41;
+ btScalar a0 = a.dot(d);
+ btScalar a1 = a.dot(e) + b.dot(d);
+ btScalar a2 = c.dot(d) + b.dot(e);
+ btScalar a3 = c.dot(e);
+ btScalar eps = SAFE_EPSILON;
+ int num_roots = 0;
+ btScalar roots[3];
+ if (std::abs(a3) < eps)
+ {
+ // cubic term is zero
+ if (std::abs(a2) < eps)
+ {
+ if (std::abs(a1) < eps)
+ {
+ if (std::abs(a0) < eps)
+ {
+ num_roots = 2;
+ roots[0] = 0;
+ roots[1] = dt;
+ }
+ }
+ else
+ {
+ num_roots = 1;
+ roots[0] = -a0/a1;
+ }
+ }
+ else
+ {
+ num_roots = SolveP2(roots, a1/a2, a0/a2);
+ }
+ }
+ else
+ {
+ num_roots = SolveP3(roots, a2/a3, a1/a3, a0/a3);
+ }
+// std::sort(roots, roots+num_roots);
+ if (num_roots > 1)
+ {
+ if (roots[0] > roots[1])
+ btSwap(roots[0], roots[1]);
+ }
+ if (num_roots > 2)
+ {
+ if (roots[0] > roots[2])
+ btSwap(roots[0], roots[2]);
+ if (roots[1] > roots[2])
+ btSwap(roots[1], roots[2]);
+ }
+ for (int r = 0; r < num_roots; ++r)
+ {
+ double root = roots[r];
+ if (root <= 0)
+ continue;
+ if (root > dt + SIMD_EPSILON)
+ return false;
+ btVector3 x1 = face->m_n[0]->m_x + root * face->m_n[0]->m_v;
+ btVector3 x2 = face->m_n[1]->m_x + root * face->m_n[1]->m_v;
+ btVector3 x3 = face->m_n[2]->m_x + root * face->m_n[2]->m_v;
+ btVector3 x4 = node->m_x + root * node->m_v;
+ btVector3 normal = (x2-x1).cross(x3-x1);
+ normal.safeNormalize();
+ if (proximityTest(x1, x2, x3, x4, normal, mrg, bary))
+ return true;
+ }
+ return false;
+}
+static SIMD_FORCE_INLINE bool bernsteinCCD(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, const btScalar& mrg, btVector3& bary)
+{
+ if (!bernsteinVFTest(face, node, dt, mrg))
+ return false;
+ if (!continuousCollisionDetection(face, node, dt, 1e-6, bary))
+ return false;
+ return true;
+}
+
//
// btSymMatrix
//
@@ -373,6 +938,26 @@ static inline btMatrix3x3 OuterProduct(const btScalar* v1,const btScalar* v2,con
return (m);
}
+static inline btMatrix3x3 OuterProduct(const btVector3& v1,const btVector3& v2)
+{
+ btMatrix3x3 m;
+ btScalar a11 = v1[0] * v2[0];
+ btScalar a12 = v1[0] * v2[1];
+ btScalar a13 = v1[0] * v2[2];
+
+ btScalar a21 = v1[1] * v2[0];
+ btScalar a22 = v1[1] * v2[1];
+ btScalar a23 = v1[1] * v2[2];
+
+ btScalar a31 = v1[2] * v2[0];
+ btScalar a32 = v1[2] * v2[1];
+ btScalar a33 = v1[2] * v2[2];
+ m[0] = btVector3(a11, a12, a13);
+ m[1] = btVector3(a21, a22, a23);
+ m[2] = btVector3(a31, a32, a33);
+ return (m);
+}
+
//
static inline btMatrix3x3 Add(const btMatrix3x3& a,
@@ -1070,8 +1655,8 @@ struct btSoftColliders
if (!n.m_battach)
{
- // check for collision at x_{n+1}^* as well at x_n
- if (psb->checkDeformableContact(m_colObj1Wrap, n.m_x, m, c.m_cti, /*predict = */ true) || psb->checkDeformableContact(m_colObj1Wrap, n.m_q, m, c.m_cti, /*predict = */ true))
+ // check for collision at x_{n+1}^*
+ if (psb->checkDeformableContact(m_colObj1Wrap, n.m_q, m, c.m_cti, /*predict = */ true))
{
const btScalar ima = n.m_im;
// todo: collision between multibody and fixed deformable node will be missed.
@@ -1159,7 +1744,6 @@ struct btSoftColliders
btSoftBody::Node* n0 = f.m_n[0];
btSoftBody::Node* n1 = f.m_n[1];
btSoftBody::Node* n2 = f.m_n[2];
-
const btScalar m = (n0->m_im > 0 && n1->m_im > 0 && n2->m_im > 0 )? dynmargin : stamargin;
btSoftBody::DeformableFaceRigidContact c;
btVector3 contact_point;
@@ -1174,18 +1758,19 @@ struct btSoftColliders
if (ms > 0)
{
// resolve contact at x_n
- psb->checkDeformableFaceContact(m_colObj1Wrap, f, contact_point, bary, m, c.m_cti, /*predict = */ false);
+// psb->checkDeformableFaceContact(m_colObj1Wrap, f, contact_point, bary, m, c.m_cti, /*predict = */ false);
btSoftBody::sCti& cti = c.m_cti;
c.m_contactPoint = contact_point;
c.m_bary = bary;
// todo xuchenhan@: this is assuming mass of all vertices are the same. Need to modify if mass are different for distinct vertices
c.m_weights = btScalar(2)/(btScalar(1) + bary.length2()) * bary;
c.m_face = &f;
+ // friction is handled by the nodes to prevent sticking
+// const btScalar fc = 0;
const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction();
// the effective inverse mass of the face as in https://graphics.stanford.edu/papers/cloth-sig02/cloth.pdf
ima = bary.getX()*c.m_weights.getX() * n0->m_im + bary.getY()*c.m_weights.getY() * n1->m_im + bary.getZ()*c.m_weights.getZ() * n2->m_im;
-
c.m_c2 = ima;
c.m_c3 = fc;
c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR;
@@ -1316,19 +1901,11 @@ struct btSoftColliders
{
btSoftBody::Node* node = (btSoftBody::Node*)lnode->data;
btSoftBody::Face* face = (btSoftBody::Face*)lface->data;
-
- btVector3 o = node->m_x;
- btVector3 p;
- btScalar d = SIMD_INFINITY;
- ProjectOrigin(face->m_n[0]->m_x - o,
- face->m_n[1]->m_x - o,
- face->m_n[2]->m_x - o,
- p, d);
- const btScalar m = mrg + (o - node->m_q).safeNorm() * 2;
- if (d < (m * m))
+ btVector3 bary;
+ if (proximityTest(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, node->m_x, face->m_normal, mrg, bary))
{
const btSoftBody::Node* n[] = {face->m_n[0], face->m_n[1], face->m_n[2]};
- const btVector3 w = BaryCoord(n[0]->m_x, n[1]->m_x, n[2]->m_x, p + o);
+ const btVector3 w = bary;
const btScalar ma = node->m_im;
btScalar mb = BaryEval(n[0]->m_im, n[1]->m_im, n[2]->m_im, w);
if ((n[0]->m_im <= 0) ||
@@ -1341,20 +1918,14 @@ struct btSoftColliders
if (ms > 0)
{
btSoftBody::DeformableFaceNodeContact c;
- if (useFaceNormal)
- c.m_normal = face->m_normal;
- else
- c.m_normal = p / -btSqrt(d);
+ c.m_normal = face->m_normal;
+ if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
+ c.m_normal = -face->m_normal;
c.m_margin = mrg;
c.m_node = node;
c.m_face = face;
c.m_bary = w;
- // todo xuchenhan@: this is assuming mass of all vertices are the same. Need to modify if mass are different for distinct vertices
- c.m_weights = btScalar(2)/(btScalar(1) + w.length2()) * w;
c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
- // the effective inverse mass of the face as in https://graphics.stanford.edu/papers/cloth-sig02/cloth.pdf
- c.m_imf = c.m_bary[0]*c.m_weights[0] * n[0]->m_im + c.m_bary[1]*c.m_weights[1] * n[1]->m_im + c.m_bary[2]*c.m_weights[2] * n[2]->m_im;
- c.m_c0 = btScalar(1)/(ma + c.m_imf);
psb[0]->m_faceNodeContacts.push_back(c);
}
}
@@ -1372,69 +1943,152 @@ struct btSoftColliders
void Process(const btDbvntNode* lface1,
const btDbvntNode* lface2)
{
- btSoftBody::Face* f = (btSoftBody::Face*)lface1->data;
- btSoftBody::Face* face = (btSoftBody::Face*)lface2->data;
+ btSoftBody::Face* f1 = (btSoftBody::Face*)lface1->data;
+ btSoftBody::Face* f2 = (btSoftBody::Face*)lface2->data;
+ if (f1 != f2)
+ {
+ Repel(f1, f2);
+ Repel(f2, f1);
+ }
+ }
+ void Repel(btSoftBody::Face* f1, btSoftBody::Face* f2)
+ {
+ //#define REPEL_NEIGHBOR 1
+#ifndef REPEL_NEIGHBOR
for (int node_id = 0; node_id < 3; ++node_id)
{
- btSoftBody::Node* node = f->m_n[node_id];
- bool skip = false;
+ btSoftBody::Node* node = f1->m_n[node_id];
for (int i = 0; i < 3; ++i)
{
- if (face->m_n[i] == node)
+ if (f2->m_n[i] == node)
+ return;
+ }
+ }
+#endif
+ bool skip = false;
+ for (int node_id = 0; node_id < 3; ++node_id)
+ {
+ btSoftBody::Node* node = f1->m_n[node_id];
+#ifdef REPEL_NEIGHBOR
+ for (int i = 0; i < 3; ++i)
+ {
+ if (f2->m_n[i] == node)
{
skip = true;
break;
}
}
if (skip)
+ {
+ skip = false;
+ continue;
+ }
+#endif
+ btSoftBody::Face* face = f2;
+ btVector3 bary;
+ if (!proximityTest(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, node->m_x, face->m_normal, mrg, bary))
continue;
- btVector3 o = node->m_x;
- btVector3 p;
- btScalar d = SIMD_INFINITY;
- ProjectOrigin(face->m_n[0]->m_x - o,
- face->m_n[1]->m_x - o,
- face->m_n[2]->m_x - o,
- p, d);
- const btScalar m = mrg + (o - node->m_q).safeNorm() * 2;
- if (d < (m * m))
+ btSoftBody::DeformableFaceNodeContact c;
+ c.m_normal = face->m_normal;
+ if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
+ c.m_normal = -face->m_normal;
+ c.m_margin = mrg;
+ c.m_node = node;
+ c.m_face = face;
+ c.m_bary = bary;
+ c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
+ psb[0]->m_faceNodeContacts.push_back(c);
+ }
+ }
+ btSoftBody* psb[2];
+ btScalar mrg;
+ bool useFaceNormal;
+ };
+
+ struct CollideCCD : btDbvt::ICollide
+ {
+ void Process(const btDbvtNode* lnode,
+ const btDbvtNode* lface)
+ {
+ btSoftBody::Node* node = (btSoftBody::Node*)lnode->data;
+ btSoftBody::Face* face = (btSoftBody::Face*)lface->data;
+ btVector3 bary;
+ if (bernsteinCCD(face, node, dt, SAFE_EPSILON, bary))
+ {
+ btSoftBody::DeformableFaceNodeContact c;
+ c.m_normal = face->m_normal;
+ if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
+ c.m_normal = -face->m_normal;
+ c.m_node = node;
+ c.m_face = face;
+ c.m_bary = bary;
+ c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
+ psb[0]->m_faceNodeContacts.push_back(c);
+ }
+ }
+ void Process(const btDbvntNode* lface1,
+ const btDbvntNode* lface2)
+ {
+ btSoftBody::Face* f1 = (btSoftBody::Face*)lface1->data;
+ btSoftBody::Face* f2 = (btSoftBody::Face*)lface2->data;
+ if (f1 != f2)
+ {
+ Repel(f1, f2);
+ Repel(f2, f1);
+ }
+ }
+ void Repel(btSoftBody::Face* f1, btSoftBody::Face* f2)
+ {
+ //#define REPEL_NEIGHBOR 1
+#ifndef REPEL_NEIGHBOR
+ for (int node_id = 0; node_id < 3; ++node_id)
+ {
+ btSoftBody::Node* node = f1->m_n[node_id];
+ for (int i = 0; i < 3; ++i)
{
- const btSoftBody::Node* n[] = {face->m_n[0], face->m_n[1], face->m_n[2]};
- const btVector3 w = BaryCoord(n[0]->m_x, n[1]->m_x, n[2]->m_x, p + o);
- const btScalar ma = node->m_im;
- btScalar mb = BaryEval(n[0]->m_im, n[1]->m_im, n[2]->m_im, w);
- if ((n[0]->m_im <= 0) ||
- (n[1]->m_im <= 0) ||
- (n[2]->m_im <= 0))
- {
- mb = 0;
- }
- const btScalar ms = ma + mb;
- if (ms > 0)
+ if (f2->m_n[i] == node)
+ return;
+ }
+ }
+#endif
+ bool skip = false;
+ for (int node_id = 0; node_id < 3; ++node_id)
+ {
+ btSoftBody::Node* node = f1->m_n[node_id];
+#ifdef REPEL_NEIGHBOR
+ for (int i = 0; i < 3; ++i)
+ {
+ if (f2->m_n[i] == node)
{
- btSoftBody::DeformableFaceNodeContact c;
- if (useFaceNormal)
- c.m_normal = face->m_normal;
- else
- c.m_normal = p / -btSqrt(d);
- c.m_margin = mrg;
- c.m_node = node;
- c.m_face = face;
- c.m_bary = w;
- // todo xuchenhan@: this is assuming mass of all vertices are the same. Need to modify if mass are different for distinct vertices
- c.m_weights = btScalar(2)/(btScalar(1) + w.length2()) * w;
- c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
- // the effective inverse mass of the face as in https://graphics.stanford.edu/papers/cloth-sig02/cloth.pdf
- c.m_imf = c.m_bary[0]*c.m_weights[0] * n[0]->m_im + c.m_bary[1]*c.m_weights[1] * n[1]->m_im + c.m_bary[2]*c.m_weights[2] * n[2]->m_im;
- c.m_c0 = btScalar(1)/(ma + c.m_imf);
- psb[0]->m_faceNodeContacts.push_back(c);
+ skip = true;
+ break;
}
}
+ if (skip)
+ {
+ skip = false;
+ continue;
+ }
+#endif
+ btSoftBody::Face* face = f2;
+ btVector3 bary;
+ if (bernsteinCCD(face, node, dt, SAFE_EPSILON, bary))
+ {
+ btSoftBody::DeformableFaceNodeContact c;
+ c.m_normal = face->m_normal;
+ if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0)
+ c.m_normal = -face->m_normal;
+ c.m_node = node;
+ c.m_face = face;
+ c.m_bary = bary;
+ c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF;
+ psb[0]->m_faceNodeContacts.push_back(c);
+ }
}
}
btSoftBody* psb[2];
- btScalar mrg;
+ btScalar dt, mrg;
bool useFaceNormal;
};
};
-
#endif //_BT_SOFT_BODY_INTERNALS_H
diff --git a/thirdparty/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp b/thirdparty/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp
index 56d8083f22..5b65216e4b 100644
--- a/thirdparty/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp
+++ b/thirdparty/bullet/BulletSoftBody/btSoftRigidCollisionAlgorithm.cpp
@@ -48,9 +48,10 @@ btSoftRigidCollisionAlgorithm::~btSoftRigidCollisionAlgorithm()
}
#include <stdio.h>
-
+#include "LinearMath/btQuickprof.h"
void btSoftRigidCollisionAlgorithm::processCollision(const btCollisionObjectWrapper* body0Wrap, const btCollisionObjectWrapper* body1Wrap, const btDispatcherInfo& dispatchInfo, btManifoldResult* resultOut)
{
+ BT_PROFILE("btSoftRigidCollisionAlgorithm::processCollision");
(void)dispatchInfo;
(void)resultOut;
//printf("btSoftRigidCollisionAlgorithm\n");
diff --git a/thirdparty/bullet/BulletSoftBody/poly34.cpp b/thirdparty/bullet/BulletSoftBody/poly34.cpp
new file mode 100644
index 0000000000..819d0c79f7
--- /dev/null
+++ b/thirdparty/bullet/BulletSoftBody/poly34.cpp
@@ -0,0 +1,419 @@
+// poly34.cpp : solution of cubic and quartic equation
+// (c) Khashin S.I. http://math.ivanovo.ac.ru/dalgebra/Khashin/index.html
+// khash2 (at) gmail.com
+// Thanks to Alexandr Rakhmanin <rakhmanin (at) gmail.com>
+// public domain
+//
+#include <math.h>
+
+#include "poly34.h" // solution of cubic and quartic equation
+#define TwoPi 6.28318530717958648
+const btScalar eps = SIMD_EPSILON;
+
+//=============================================================================
+// _root3, root3 from http://prografix.narod.ru
+//=============================================================================
+static SIMD_FORCE_INLINE btScalar _root3(btScalar x)
+{
+ btScalar s = 1.;
+ while (x < 1.) {
+ x *= 8.;
+ s *= 0.5;
+ }
+ while (x > 8.) {
+ x *= 0.125;
+ s *= 2.;
+ }
+ btScalar r = 1.5;
+ r -= 1. / 3. * (r - x / (r * r));
+ r -= 1. / 3. * (r - x / (r * r));
+ r -= 1. / 3. * (r - x / (r * r));
+ r -= 1. / 3. * (r - x / (r * r));
+ r -= 1. / 3. * (r - x / (r * r));
+ r -= 1. / 3. * (r - x / (r * r));
+ return r * s;
+}
+
+btScalar SIMD_FORCE_INLINE root3(btScalar x)
+{
+ if (x > 0)
+ return _root3(x);
+ else if (x < 0)
+ return -_root3(-x);
+ else
+ return 0.;
+}
+
+// x - array of size 2
+// return 2: 2 real roots x[0], x[1]
+// return 0: pair of complex roots: x[0]i*x[1]
+int SolveP2(btScalar* x, btScalar a, btScalar b)
+{ // solve equation x^2 + a*x + b = 0
+ btScalar D = 0.25 * a * a - b;
+ if (D >= 0) {
+ D = sqrt(D);
+ x[0] = -0.5 * a + D;
+ x[1] = -0.5 * a - D;
+ return 2;
+ }
+ x[0] = -0.5 * a;
+ x[1] = sqrt(-D);
+ return 0;
+}
+//---------------------------------------------------------------------------
+// x - array of size 3
+// In case 3 real roots: => x[0], x[1], x[2], return 3
+// 2 real roots: x[0], x[1], return 2
+// 1 real root : x[0], x[1] i*x[2], return 1
+int SolveP3(btScalar* x, btScalar a, btScalar b, btScalar c)
+{ // solve cubic equation x^3 + a*x^2 + b*x + c = 0
+ btScalar a2 = a * a;
+ btScalar q = (a2 - 3 * b) / 9;
+ if (q < 0)
+ q = eps;
+ btScalar r = (a * (2 * a2 - 9 * b) + 27 * c) / 54;
+ // equation x^3 + q*x + r = 0
+ btScalar r2 = r * r;
+ btScalar q3 = q * q * q;
+ btScalar A, B;
+ if (r2 <= (q3 + eps)) { //<<-- FIXED!
+ btScalar t = r / sqrt(q3);
+ if (t < -1)
+ t = -1;
+ if (t > 1)
+ t = 1;
+ t = acos(t);
+ a /= 3;
+ q = -2 * sqrt(q);
+ x[0] = q * cos(t / 3) - a;
+ x[1] = q * cos((t + TwoPi) / 3) - a;
+ x[2] = q * cos((t - TwoPi) / 3) - a;
+ return (3);
+ }
+ else {
+ //A =-pow(fabs(r)+sqrt(r2-q3),1./3);
+ A = -root3(fabs(r) + sqrt(r2 - q3));
+ if (r < 0)
+ A = -A;
+ B = (A == 0 ? 0 : q / A);
+
+ a /= 3;
+ x[0] = (A + B) - a;
+ x[1] = -0.5 * (A + B) - a;
+ x[2] = 0.5 * sqrt(3.) * (A - B);
+ if (fabs(x[2]) < eps) {
+ x[2] = x[1];
+ return (2);
+ }
+ return (1);
+ }
+} // SolveP3(btScalar *x,btScalar a,btScalar b,btScalar c) {
+//---------------------------------------------------------------------------
+// a>=0!
+void CSqrt(btScalar x, btScalar y, btScalar& a, btScalar& b) // returns: a+i*s = sqrt(x+i*y)
+{
+ btScalar r = sqrt(x * x + y * y);
+ if (y == 0) {
+ r = sqrt(r);
+ if (x >= 0) {
+ a = r;
+ b = 0;
+ }
+ else {
+ a = 0;
+ b = r;
+ }
+ }
+ else { // y != 0
+ a = sqrt(0.5 * (x + r));
+ b = 0.5 * y / a;
+ }
+}
+//---------------------------------------------------------------------------
+int SolveP4Bi(btScalar* x, btScalar b, btScalar d) // solve equation x^4 + b*x^2 + d = 0
+{
+ btScalar D = b * b - 4 * d;
+ if (D >= 0) {
+ btScalar sD = sqrt(D);
+ btScalar x1 = (-b + sD) / 2;
+ btScalar x2 = (-b - sD) / 2; // x2 <= x1
+ if (x2 >= 0) // 0 <= x2 <= x1, 4 real roots
+ {
+ btScalar sx1 = sqrt(x1);
+ btScalar sx2 = sqrt(x2);
+ x[0] = -sx1;
+ x[1] = sx1;
+ x[2] = -sx2;
+ x[3] = sx2;
+ return 4;
+ }
+ if (x1 < 0) // x2 <= x1 < 0, two pair of imaginary roots
+ {
+ btScalar sx1 = sqrt(-x1);
+ btScalar sx2 = sqrt(-x2);
+ x[0] = 0;
+ x[1] = sx1;
+ x[2] = 0;
+ x[3] = sx2;
+ return 0;
+ }
+ // now x2 < 0 <= x1 , two real roots and one pair of imginary root
+ btScalar sx1 = sqrt(x1);
+ btScalar sx2 = sqrt(-x2);
+ x[0] = -sx1;
+ x[1] = sx1;
+ x[2] = 0;
+ x[3] = sx2;
+ return 2;
+ }
+ else { // if( D < 0 ), two pair of compex roots
+ btScalar sD2 = 0.5 * sqrt(-D);
+ CSqrt(-0.5 * b, sD2, x[0], x[1]);
+ CSqrt(-0.5 * b, -sD2, x[2], x[3]);
+ return 0;
+ } // if( D>=0 )
+} // SolveP4Bi(btScalar *x, btScalar b, btScalar d) // solve equation x^4 + b*x^2 d
+//---------------------------------------------------------------------------
+#define SWAP(a, b) \
+{ \
+t = b; \
+b = a; \
+a = t; \
+}
+static void dblSort3(btScalar& a, btScalar& b, btScalar& c) // make: a <= b <= c
+{
+ btScalar t;
+ if (a > b)
+ SWAP(a, b); // now a<=b
+ if (c < b) {
+ SWAP(b, c); // now a<=b, b<=c
+ if (a > b)
+ SWAP(a, b); // now a<=b
+ }
+}
+//---------------------------------------------------------------------------
+int SolveP4De(btScalar* x, btScalar b, btScalar c, btScalar d) // solve equation x^4 + b*x^2 + c*x + d
+{
+ //if( c==0 ) return SolveP4Bi(x,b,d); // After that, c!=0
+ if (fabs(c) < 1e-14 * (fabs(b) + fabs(d)))
+ return SolveP4Bi(x, b, d); // After that, c!=0
+
+ int res3 = SolveP3(x, 2 * b, b * b - 4 * d, -c * c); // solve resolvent
+ // by Viet theorem: x1*x2*x3=-c*c not equals to 0, so x1!=0, x2!=0, x3!=0
+ if (res3 > 1) // 3 real roots,
+ {
+ dblSort3(x[0], x[1], x[2]); // sort roots to x[0] <= x[1] <= x[2]
+ // Note: x[0]*x[1]*x[2]= c*c > 0
+ if (x[0] > 0) // all roots are positive
+ {
+ btScalar sz1 = sqrt(x[0]);
+ btScalar sz2 = sqrt(x[1]);
+ btScalar sz3 = sqrt(x[2]);
+ // Note: sz1*sz2*sz3= -c (and not equal to 0)
+ if (c > 0) {
+ x[0] = (-sz1 - sz2 - sz3) / 2;
+ x[1] = (-sz1 + sz2 + sz3) / 2;
+ x[2] = (+sz1 - sz2 + sz3) / 2;
+ x[3] = (+sz1 + sz2 - sz3) / 2;
+ return 4;
+ }
+ // now: c<0
+ x[0] = (-sz1 - sz2 + sz3) / 2;
+ x[1] = (-sz1 + sz2 - sz3) / 2;
+ x[2] = (+sz1 - sz2 - sz3) / 2;
+ x[3] = (+sz1 + sz2 + sz3) / 2;
+ return 4;
+ } // if( x[0] > 0) // all roots are positive
+ // now x[0] <= x[1] < 0, x[2] > 0
+ // two pair of comlex roots
+ btScalar sz1 = sqrt(-x[0]);
+ btScalar sz2 = sqrt(-x[1]);
+ btScalar sz3 = sqrt(x[2]);
+
+ if (c > 0) // sign = -1
+ {
+ x[0] = -sz3 / 2;
+ x[1] = (sz1 - sz2) / 2; // x[0]i*x[1]
+ x[2] = sz3 / 2;
+ x[3] = (-sz1 - sz2) / 2; // x[2]i*x[3]
+ return 0;
+ }
+ // now: c<0 , sign = +1
+ x[0] = sz3 / 2;
+ x[1] = (-sz1 + sz2) / 2;
+ x[2] = -sz3 / 2;
+ x[3] = (sz1 + sz2) / 2;
+ return 0;
+ } // if( res3>1 ) // 3 real roots,
+ // now resoventa have 1 real and pair of compex roots
+ // x[0] - real root, and x[0]>0,
+ // x[1]i*x[2] - complex roots,
+ // x[0] must be >=0. But one times x[0]=~ 1e-17, so:
+ if (x[0] < 0)
+ x[0] = 0;
+ btScalar sz1 = sqrt(x[0]);
+ btScalar szr, szi;
+ CSqrt(x[1], x[2], szr, szi); // (szr+i*szi)^2 = x[1]+i*x[2]
+ if (c > 0) // sign = -1
+ {
+ x[0] = -sz1 / 2 - szr; // 1st real root
+ x[1] = -sz1 / 2 + szr; // 2nd real root
+ x[2] = sz1 / 2;
+ x[3] = szi;
+ return 2;
+ }
+ // now: c<0 , sign = +1
+ x[0] = sz1 / 2 - szr; // 1st real root
+ x[1] = sz1 / 2 + szr; // 2nd real root
+ x[2] = -sz1 / 2;
+ x[3] = szi;
+ return 2;
+} // SolveP4De(btScalar *x, btScalar b, btScalar c, btScalar d) // solve equation x^4 + b*x^2 + c*x + d
+//-----------------------------------------------------------------------------
+btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d) // one Newton step for x^4 + a*x^3 + b*x^2 + c*x + d
+{
+ btScalar fxs = ((4 * x + 3 * a) * x + 2 * b) * x + c; // f'(x)
+ if (fxs == 0)
+ return x; //return 1e99; <<-- FIXED!
+ btScalar fx = (((x + a) * x + b) * x + c) * x + d; // f(x)
+ return x - fx / fxs;
+}
+//-----------------------------------------------------------------------------
+// x - array of size 4
+// return 4: 4 real roots x[0], x[1], x[2], x[3], possible multiple roots
+// return 2: 2 real roots x[0], x[1] and complex x[2]i*x[3],
+// return 0: two pair of complex roots: x[0]i*x[1], x[2]i*x[3],
+int SolveP4(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d)
+{ // solve equation x^4 + a*x^3 + b*x^2 + c*x + d by Dekart-Euler method
+ // move to a=0:
+ btScalar d1 = d + 0.25 * a * (0.25 * b * a - 3. / 64 * a * a * a - c);
+ btScalar c1 = c + 0.5 * a * (0.25 * a * a - b);
+ btScalar b1 = b - 0.375 * a * a;
+ int res = SolveP4De(x, b1, c1, d1);
+ if (res == 4) {
+ x[0] -= a / 4;
+ x[1] -= a / 4;
+ x[2] -= a / 4;
+ x[3] -= a / 4;
+ }
+ else if (res == 2) {
+ x[0] -= a / 4;
+ x[1] -= a / 4;
+ x[2] -= a / 4;
+ }
+ else {
+ x[0] -= a / 4;
+ x[2] -= a / 4;
+ }
+ // one Newton step for each real root:
+ if (res > 0) {
+ x[0] = N4Step(x[0], a, b, c, d);
+ x[1] = N4Step(x[1], a, b, c, d);
+ }
+ if (res > 2) {
+ x[2] = N4Step(x[2], a, b, c, d);
+ x[3] = N4Step(x[3], a, b, c, d);
+ }
+ return res;
+}
+//-----------------------------------------------------------------------------
+#define F5(t) (((((t + a) * t + b) * t + c) * t + d) * t + e)
+//-----------------------------------------------------------------------------
+btScalar SolveP5_1(btScalar a, btScalar b, btScalar c, btScalar d, btScalar e) // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+{
+ int cnt;
+ if (fabs(e) < eps)
+ return 0;
+
+ btScalar brd = fabs(a); // brd - border of real roots
+ if (fabs(b) > brd)
+ brd = fabs(b);
+ if (fabs(c) > brd)
+ brd = fabs(c);
+ if (fabs(d) > brd)
+ brd = fabs(d);
+ if (fabs(e) > brd)
+ brd = fabs(e);
+ brd++; // brd - border of real roots
+
+ btScalar x0, f0; // less than root
+ btScalar x1, f1; // greater than root
+ btScalar x2, f2, f2s; // next values, f(x2), f'(x2)
+ btScalar dx = 0;
+
+ if (e < 0) {
+ x0 = 0;
+ x1 = brd;
+ f0 = e;
+ f1 = F5(x1);
+ x2 = 0.01 * brd;
+ } // positive root
+ else {
+ x0 = -brd;
+ x1 = 0;
+ f0 = F5(x0);
+ f1 = e;
+ x2 = -0.01 * brd;
+ } // negative root
+
+ if (fabs(f0) < eps)
+ return x0;
+ if (fabs(f1) < eps)
+ return x1;
+
+ // now x0<x1, f(x0)<0, f(x1)>0
+ // Firstly 10 bisections
+ for (cnt = 0; cnt < 10; cnt++) {
+ x2 = (x0 + x1) / 2; // next point
+ //x2 = x0 - f0*(x1 - x0) / (f1 - f0); // next point
+ f2 = F5(x2); // f(x2)
+ if (fabs(f2) < eps)
+ return x2;
+ if (f2 > 0) {
+ x1 = x2;
+ f1 = f2;
+ }
+ else {
+ x0 = x2;
+ f0 = f2;
+ }
+ }
+
+ // At each step:
+ // x0<x1, f(x0)<0, f(x1)>0.
+ // x2 - next value
+ // we hope that x0 < x2 < x1, but not necessarily
+ do {
+ if (cnt++ > 50)
+ break;
+ if (x2 <= x0 || x2 >= x1)
+ x2 = (x0 + x1) / 2; // now x0 < x2 < x1
+ f2 = F5(x2); // f(x2)
+ if (fabs(f2) < eps)
+ return x2;
+ if (f2 > 0) {
+ x1 = x2;
+ f1 = f2;
+ }
+ else {
+ x0 = x2;
+ f0 = f2;
+ }
+ f2s = (((5 * x2 + 4 * a) * x2 + 3 * b) * x2 + 2 * c) * x2 + d; // f'(x2)
+ if (fabs(f2s) < eps) {
+ x2 = 1e99;
+ continue;
+ }
+ dx = f2 / f2s;
+ x2 -= dx;
+ } while (fabs(dx) > eps);
+ return x2;
+} // SolveP5_1(btScalar a,btScalar b,btScalar c,btScalar d,btScalar e) // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+//-----------------------------------------------------------------------------
+int SolveP5(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d, btScalar e) // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+{
+ btScalar r = x[0] = SolveP5_1(a, b, c, d, e);
+ btScalar a1 = a + r, b1 = b + r * a1, c1 = c + r * b1, d1 = d + r * c1;
+ return 1 + SolveP4(x + 1, a1, b1, c1, d1);
+} // SolveP5(btScalar *x,btScalar a,btScalar b,btScalar c,btScalar d,btScalar e) // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+//-----------------------------------------------------------------------------
diff --git a/thirdparty/bullet/BulletSoftBody/poly34.h b/thirdparty/bullet/BulletSoftBody/poly34.h
new file mode 100644
index 0000000000..32ad5d7da5
--- /dev/null
+++ b/thirdparty/bullet/BulletSoftBody/poly34.h
@@ -0,0 +1,38 @@
+// poly34.h : solution of cubic and quartic equation
+// (c) Khashin S.I. http://math.ivanovo.ac.ru/dalgebra/Khashin/index.html
+// khash2 (at) gmail.com
+
+#ifndef POLY_34
+#define POLY_34
+#include "LinearMath/btScalar.h"
+// x - array of size 2
+// return 2: 2 real roots x[0], x[1]
+// return 0: pair of complex roots: x[0]i*x[1]
+int SolveP2(btScalar* x, btScalar a, btScalar b); // solve equation x^2 + a*x + b = 0
+
+// x - array of size 3
+// return 3: 3 real roots x[0], x[1], x[2]
+// return 1: 1 real root x[0] and pair of complex roots: x[1]i*x[2]
+int SolveP3(btScalar* x, btScalar a, btScalar b, btScalar c); // solve cubic equation x^3 + a*x^2 + b*x + c = 0
+
+// x - array of size 4
+// return 4: 4 real roots x[0], x[1], x[2], x[3], possible multiple roots
+// return 2: 2 real roots x[0], x[1] and complex x[2]i*x[3],
+// return 0: two pair of complex roots: x[0]i*x[1], x[2]i*x[3],
+int SolveP4(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d); // solve equation x^4 + a*x^3 + b*x^2 + c*x + d = 0 by Dekart-Euler method
+
+// x - array of size 5
+// return 5: 5 real roots x[0], x[1], x[2], x[3], x[4], possible multiple roots
+// return 3: 3 real roots x[0], x[1], x[2] and complex x[3]i*x[4],
+// return 1: 1 real root x[0] and two pair of complex roots: x[1]i*x[2], x[3]i*x[4],
+int SolveP5(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d, btScalar e); // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+
+//-----------------------------------------------------------------------------
+// And some additional functions for internal use.
+// Your may remove this definitions from here
+int SolveP4Bi(btScalar* x, btScalar b, btScalar d); // solve equation x^4 + b*x^2 + d = 0
+int SolveP4De(btScalar* x, btScalar b, btScalar c, btScalar d); // solve equation x^4 + b*x^2 + c*x + d = 0
+void CSqrt(btScalar x, btScalar y, btScalar& a, btScalar& b); // returns as a+i*s, sqrt(x+i*y)
+btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d); // one Newton step for x^4 + a*x^3 + b*x^2 + c*x + d
+btScalar SolveP5_1(btScalar a, btScalar b, btScalar c, btScalar d, btScalar e); // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0
+#endif
diff --git a/thirdparty/bullet/LinearMath/btImplicitQRSVD.h b/thirdparty/bullet/LinearMath/btImplicitQRSVD.h
index 7b4cfaf21e..aaedc964f6 100644
--- a/thirdparty/bullet/LinearMath/btImplicitQRSVD.h
+++ b/thirdparty/bullet/LinearMath/btImplicitQRSVD.h
@@ -41,7 +41,7 @@
#ifndef btImplicitQRSVD_h
#define btImplicitQRSVD_h
-
+#include <limits>
#include "btMatrix3x3.h"
class btMatrix2x2
{
@@ -753,7 +753,7 @@ inline int singularValueDecomposition(const btMatrix3x3& A,
btMatrix3x3& V,
btScalar tol = 128*std::numeric_limits<btScalar>::epsilon())
{
- using std::fabs;
+// using std::fabs;
btMatrix3x3 B = A;
U.setIdentity();
V.setIdentity();
diff --git a/thirdparty/bullet/LinearMath/btMatrix3x3.h b/thirdparty/bullet/LinearMath/btMatrix3x3.h
index cc33a68664..9c90fee1d2 100644
--- a/thirdparty/bullet/LinearMath/btMatrix3x3.h
+++ b/thirdparty/bullet/LinearMath/btMatrix3x3.h
@@ -26,10 +26,12 @@ subject to the following restrictions:
#endif
#if defined(BT_USE_SSE)
+#define v0000 (_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f))
#define v1000 (_mm_set_ps(0.0f, 0.0f, 0.0f, 1.0f))
#define v0100 (_mm_set_ps(0.0f, 0.0f, 1.0f, 0.0f))
#define v0010 (_mm_set_ps(0.0f, 1.0f, 0.0f, 0.0f))
#elif defined(BT_USE_NEON)
+const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0000) = {0.0f, 0.0f, 0.0f, 0.0f};
const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
@@ -330,6 +332,20 @@ public:
btScalar(0.0), btScalar(0.0), btScalar(1.0));
#endif
}
+
+ /**@brief Set the matrix to the identity */
+ void setZero()
+ {
+#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON)
+ m_el[0] = v0000;
+ m_el[1] = v0000;
+ m_el[2] = v0000;
+#else
+ setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0),
+ btScalar(0.0), btScalar(0.0), btScalar(0.0),
+ btScalar(0.0), btScalar(0.0), btScalar(0.0));
+#endif
+ }
static const btMatrix3x3& getIdentity()
{
diff --git a/thirdparty/bullet/LinearMath/btMatrixX.h b/thirdparty/bullet/LinearMath/btMatrixX.h
index 961c94dc63..bb0f0dd259 100644
--- a/thirdparty/bullet/LinearMath/btMatrixX.h
+++ b/thirdparty/bullet/LinearMath/btMatrixX.h
@@ -346,10 +346,9 @@ struct btMatrixX
T dotProd = 0;
{
{
- int r = rows();
int c = cols();
- for (int k = 0; k < cols(); k++)
+ for (int k = 0; k < c; k++)
{
T w = (*this)(i, k);
if (other(k, j) != 0.f)
diff --git a/thirdparty/bullet/LinearMath/btModifiedGramSchmidt.h b/thirdparty/bullet/LinearMath/btModifiedGramSchmidt.h
new file mode 100644
index 0000000000..33bab8d650
--- /dev/null
+++ b/thirdparty/bullet/LinearMath/btModifiedGramSchmidt.h
@@ -0,0 +1,83 @@
+//
+// btModifiedGramSchmidt.h
+// LinearMath
+//
+// Created by Xuchen Han on 4/4/20.
+//
+
+#ifndef btModifiedGramSchmidt_h
+#define btModifiedGramSchmidt_h
+
+#include "btReducedVector.h"
+#include "btAlignedObjectArray.h"
+#include <iostream>
+#include <cmath>
+template<class TV>
+class btModifiedGramSchmidt
+{
+public:
+ btAlignedObjectArray<TV> m_in;
+ btAlignedObjectArray<TV> m_out;
+
+ btModifiedGramSchmidt(const btAlignedObjectArray<TV>& vecs): m_in(vecs)
+ {
+ m_out.resize(0);
+ }
+
+ void solve()
+ {
+ m_out.resize(m_in.size());
+ for (int i = 0; i < m_in.size(); ++i)
+ {
+// printf("========= starting %d ==========\n", i);
+ TV v(m_in[i]);
+// v.print();
+ for (int j = 0; j < i; ++j)
+ {
+ v = v - v.proj(m_out[j]);
+// v.print();
+ }
+ v.normalize();
+ m_out[i] = v;
+// v.print();
+ }
+ }
+
+ void test()
+ {
+ std::cout << SIMD_EPSILON << std::endl;
+ printf("=======inputs=========\n");
+ for (int i = 0; i < m_out.size(); ++i)
+ {
+ m_in[i].print();
+ }
+ printf("=======output=========\n");
+ for (int i = 0; i < m_out.size(); ++i)
+ {
+ m_out[i].print();
+ }
+ btScalar eps = SIMD_EPSILON;
+ for (int i = 0; i < m_out.size(); ++i)
+ {
+ for (int j = 0; j < m_out.size(); ++j)
+ {
+ if (i == j)
+ {
+ if (std::abs(1.0-m_out[i].dot(m_out[j])) > eps)// && std::abs(m_out[i].dot(m_out[j])) > eps)
+ {
+ printf("vec[%d] is not unit, norm squared = %f\n", i,m_out[i].dot(m_out[j]));
+ }
+ }
+ else
+ {
+ if (std::abs(m_out[i].dot(m_out[j])) > eps)
+ {
+ printf("vec[%d] and vec[%d] is not orthogonal, dot product = %f\n", i, j, m_out[i].dot(m_out[j]));
+ }
+ }
+ }
+ }
+ }
+};
+template class btModifiedGramSchmidt<btReducedVector>;
+#endif /* btModifiedGramSchmidt_h */
diff --git a/thirdparty/bullet/LinearMath/btReducedVector.cpp b/thirdparty/bullet/LinearMath/btReducedVector.cpp
new file mode 100644
index 0000000000..1539584e7e
--- /dev/null
+++ b/thirdparty/bullet/LinearMath/btReducedVector.cpp
@@ -0,0 +1,170 @@
+//
+// btReducedVector.cpp
+// LinearMath
+//
+// Created by Xuchen Han on 4/4/20.
+//
+#include <stdio.h>
+#include "btReducedVector.h"
+#include <cmath>
+
+// returns the projection of this onto other
+btReducedVector btReducedVector::proj(const btReducedVector& other) const
+{
+ btReducedVector ret(m_sz);
+ btScalar other_length2 = other.length2();
+ if (other_length2 < SIMD_EPSILON)
+ {
+ return ret;
+ }
+ return other*(this->dot(other))/other_length2;
+}
+
+void btReducedVector::normalize()
+{
+ if (this->length2() < SIMD_EPSILON)
+ {
+ m_indices.clear();
+ m_vecs.clear();
+ return;
+ }
+ *this /= std::sqrt(this->length2());
+}
+
+bool btReducedVector::testAdd() const
+{
+ int sz = 5;
+ btAlignedObjectArray<int> id1;
+ id1.push_back(1);
+ id1.push_back(3);
+ btAlignedObjectArray<btVector3> v1;
+ v1.push_back(btVector3(1,0,1));
+ v1.push_back(btVector3(3,1,5));
+ btAlignedObjectArray<int> id2;
+ id2.push_back(2);
+ id2.push_back(3);
+ id2.push_back(5);
+ btAlignedObjectArray<btVector3> v2;
+ v2.push_back(btVector3(2,3,1));
+ v2.push_back(btVector3(3,4,9));
+ v2.push_back(btVector3(0,4,0));
+ btAlignedObjectArray<int> id3;
+ id3.push_back(1);
+ id3.push_back(2);
+ id3.push_back(3);
+ id3.push_back(5);
+ btAlignedObjectArray<btVector3> v3;
+ v3.push_back(btVector3(1,0,1));
+ v3.push_back(btVector3(2,3,1));
+ v3.push_back(btVector3(6,5,14));
+ v3.push_back(btVector3(0,4,0));
+ btReducedVector rv1(sz, id1, v1);
+ btReducedVector rv2(sz, id2, v2);
+ btReducedVector ans(sz, id3, v3);
+ bool ret = ((ans == rv1+rv2) && (ans == rv2+rv1));
+ if (!ret)
+ printf("btReducedVector testAdd failed\n");
+ return ret;
+}
+
+bool btReducedVector::testMinus() const
+{
+ int sz = 5;
+ btAlignedObjectArray<int> id1;
+ id1.push_back(1);
+ id1.push_back(3);
+ btAlignedObjectArray<btVector3> v1;
+ v1.push_back(btVector3(1,0,1));
+ v1.push_back(btVector3(3,1,5));
+ btAlignedObjectArray<int> id2;
+ id2.push_back(2);
+ id2.push_back(3);
+ id2.push_back(5);
+ btAlignedObjectArray<btVector3> v2;
+ v2.push_back(btVector3(2,3,1));
+ v2.push_back(btVector3(3,4,9));
+ v2.push_back(btVector3(0,4,0));
+ btAlignedObjectArray<int> id3;
+ id3.push_back(1);
+ id3.push_back(2);
+ id3.push_back(3);
+ id3.push_back(5);
+ btAlignedObjectArray<btVector3> v3;
+ v3.push_back(btVector3(-1,-0,-1));
+ v3.push_back(btVector3(2,3,1));
+ v3.push_back(btVector3(0,3,4));
+ v3.push_back(btVector3(0,4,0));
+ btReducedVector rv1(sz, id1, v1);
+ btReducedVector rv2(sz, id2, v2);
+ btReducedVector ans(sz, id3, v3);
+ bool ret = (ans == rv2-rv1);
+ if (!ret)
+ printf("btReducedVector testMinus failed\n");
+ return ret;
+}
+
+bool btReducedVector::testDot() const
+{
+ int sz = 5;
+ btAlignedObjectArray<int> id1;
+ id1.push_back(1);
+ id1.push_back(3);
+ btAlignedObjectArray<btVector3> v1;
+ v1.push_back(btVector3(1,0,1));
+ v1.push_back(btVector3(3,1,5));
+ btAlignedObjectArray<int> id2;
+ id2.push_back(2);
+ id2.push_back(3);
+ id2.push_back(5);
+ btAlignedObjectArray<btVector3> v2;
+ v2.push_back(btVector3(2,3,1));
+ v2.push_back(btVector3(3,4,9));
+ v2.push_back(btVector3(0,4,0));
+ btReducedVector rv1(sz, id1, v1);
+ btReducedVector rv2(sz, id2, v2);
+ btScalar ans = 58;
+ bool ret = (ans == rv2.dot(rv1) && ans == rv1.dot(rv2));
+ ans = 14+16+9+16+81;
+ ret &= (ans==rv2.dot(rv2));
+
+ if (!ret)
+ printf("btReducedVector testDot failed\n");
+ return ret;
+}
+
+bool btReducedVector::testMultiply() const
+{
+ int sz = 5;
+ btAlignedObjectArray<int> id1;
+ id1.push_back(1);
+ id1.push_back(3);
+ btAlignedObjectArray<btVector3> v1;
+ v1.push_back(btVector3(1,0,1));
+ v1.push_back(btVector3(3,1,5));
+ btScalar s = 2;
+ btReducedVector rv1(sz, id1, v1);
+ btAlignedObjectArray<int> id2;
+ id2.push_back(1);
+ id2.push_back(3);
+ btAlignedObjectArray<btVector3> v2;
+ v2.push_back(btVector3(2,0,2));
+ v2.push_back(btVector3(6,2,10));
+ btReducedVector ans(sz, id2, v2);
+ bool ret = (ans == rv1*s);
+ if (!ret)
+ printf("btReducedVector testMultiply failed\n");
+ return ret;
+}
+
+void btReducedVector::test() const
+{
+ bool ans = testAdd() && testMinus() && testDot() && testMultiply();
+ if (ans)
+ {
+ printf("All tests passed\n");
+ }
+ else
+ {
+ printf("Tests failed\n");
+ }
+}
diff --git a/thirdparty/bullet/LinearMath/btReducedVector.h b/thirdparty/bullet/LinearMath/btReducedVector.h
new file mode 100644
index 0000000000..83b5e581e5
--- /dev/null
+++ b/thirdparty/bullet/LinearMath/btReducedVector.h
@@ -0,0 +1,320 @@
+//
+// btReducedVectors.h
+// BulletLinearMath
+//
+// Created by Xuchen Han on 4/4/20.
+//
+#ifndef btReducedVectors_h
+#define btReducedVectors_h
+#include "btVector3.h"
+#include "btMatrix3x3.h"
+#include "btAlignedObjectArray.h"
+#include <stdio.h>
+#include <vector>
+#include <algorithm>
+struct TwoInts
+{
+ int a,b;
+};
+inline bool operator<(const TwoInts& A, const TwoInts& B)
+{
+ return A.b < B.b;
+}
+
+
+// A helper vector type used for CG projections
+class btReducedVector
+{
+public:
+ btAlignedObjectArray<int> m_indices;
+ btAlignedObjectArray<btVector3> m_vecs;
+ int m_sz; // all m_indices value < m_sz
+public:
+ btReducedVector():m_sz(0)
+ {
+ m_indices.resize(0);
+ m_vecs.resize(0);
+ m_indices.clear();
+ m_vecs.clear();
+ }
+
+ btReducedVector(int sz): m_sz(sz)
+ {
+ m_indices.resize(0);
+ m_vecs.resize(0);
+ m_indices.clear();
+ m_vecs.clear();
+ }
+
+ btReducedVector(int sz, const btAlignedObjectArray<int>& indices, const btAlignedObjectArray<btVector3>& vecs): m_sz(sz), m_indices(indices), m_vecs(vecs)
+ {
+ }
+
+ void simplify()
+ {
+ btAlignedObjectArray<int> old_indices(m_indices);
+ btAlignedObjectArray<btVector3> old_vecs(m_vecs);
+ m_indices.resize(0);
+ m_vecs.resize(0);
+ m_indices.clear();
+ m_vecs.clear();
+ for (int i = 0; i < old_indices.size(); ++i)
+ {
+ if (old_vecs[i].length2() > SIMD_EPSILON)
+ {
+ m_indices.push_back(old_indices[i]);
+ m_vecs.push_back(old_vecs[i]);
+ }
+ }
+ }
+
+ btReducedVector operator+(const btReducedVector& other)
+ {
+ btReducedVector ret(m_sz);
+ int i=0, j=0;
+ while (i < m_indices.size() && j < other.m_indices.size())
+ {
+ if (m_indices[i] < other.m_indices[j])
+ {
+ ret.m_indices.push_back(m_indices[i]);
+ ret.m_vecs.push_back(m_vecs[i]);
+ ++i;
+ }
+ else if (m_indices[i] > other.m_indices[j])
+ {
+ ret.m_indices.push_back(other.m_indices[j]);
+ ret.m_vecs.push_back(other.m_vecs[j]);
+ ++j;
+ }
+ else
+ {
+ ret.m_indices.push_back(other.m_indices[j]);
+ ret.m_vecs.push_back(m_vecs[i] + other.m_vecs[j]);
+ ++i; ++j;
+ }
+ }
+ while (i < m_indices.size())
+ {
+ ret.m_indices.push_back(m_indices[i]);
+ ret.m_vecs.push_back(m_vecs[i]);
+ ++i;
+ }
+ while (j < other.m_indices.size())
+ {
+ ret.m_indices.push_back(other.m_indices[j]);
+ ret.m_vecs.push_back(other.m_vecs[j]);
+ ++j;
+ }
+ ret.simplify();
+ return ret;
+ }
+
+ btReducedVector operator-()
+ {
+ btReducedVector ret(m_sz);
+ for (int i = 0; i < m_indices.size(); ++i)
+ {
+ ret.m_indices.push_back(m_indices[i]);
+ ret.m_vecs.push_back(-m_vecs[i]);
+ }
+ ret.simplify();
+ return ret;
+ }
+
+ btReducedVector operator-(const btReducedVector& other)
+ {
+ btReducedVector ret(m_sz);
+ int i=0, j=0;
+ while (i < m_indices.size() && j < other.m_indices.size())
+ {
+ if (m_indices[i] < other.m_indices[j])
+ {
+ ret.m_indices.push_back(m_indices[i]);
+ ret.m_vecs.push_back(m_vecs[i]);
+ ++i;
+ }
+ else if (m_indices[i] > other.m_indices[j])
+ {
+ ret.m_indices.push_back(other.m_indices[j]);
+ ret.m_vecs.push_back(-other.m_vecs[j]);
+ ++j;
+ }
+ else
+ {
+ ret.m_indices.push_back(other.m_indices[j]);
+ ret.m_vecs.push_back(m_vecs[i] - other.m_vecs[j]);
+ ++i; ++j;
+ }
+ }
+ while (i < m_indices.size())
+ {
+ ret.m_indices.push_back(m_indices[i]);
+ ret.m_vecs.push_back(m_vecs[i]);
+ ++i;
+ }
+ while (j < other.m_indices.size())
+ {
+ ret.m_indices.push_back(other.m_indices[j]);
+ ret.m_vecs.push_back(-other.m_vecs[j]);
+ ++j;
+ }
+ ret.simplify();
+ return ret;
+ }
+
+ bool operator==(const btReducedVector& other) const
+ {
+ if (m_sz != other.m_sz)
+ return false;
+ if (m_indices.size() != other.m_indices.size())
+ return false;
+ for (int i = 0; i < m_indices.size(); ++i)
+ {
+ if (m_indices[i] != other.m_indices[i] || m_vecs[i] != other.m_vecs[i])
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool operator!=(const btReducedVector& other) const
+ {
+ return !(*this == other);
+ }
+
+ btReducedVector& operator=(const btReducedVector& other)
+ {
+ if (this == &other)
+ {
+ return *this;
+ }
+ m_sz = other.m_sz;
+ m_indices.copyFromArray(other.m_indices);
+ m_vecs.copyFromArray(other.m_vecs);
+ return *this;
+ }
+
+ btScalar dot(const btReducedVector& other) const
+ {
+ btScalar ret = 0;
+ int j = 0;
+ for (int i = 0; i < m_indices.size(); ++i)
+ {
+ while (j < other.m_indices.size() && other.m_indices[j] < m_indices[i])
+ {
+ ++j;
+ }
+ if (j < other.m_indices.size() && other.m_indices[j] == m_indices[i])
+ {
+ ret += m_vecs[i].dot(other.m_vecs[j]);
+// ++j;
+ }
+ }
+ return ret;
+ }
+
+ btScalar dot(const btAlignedObjectArray<btVector3>& other) const
+ {
+ btScalar ret = 0;
+ for (int i = 0; i < m_indices.size(); ++i)
+ {
+ ret += m_vecs[i].dot(other[m_indices[i]]);
+ }
+ return ret;
+ }
+
+ btScalar length2() const
+ {
+ return this->dot(*this);
+ }
+
+ void normalize();
+
+ // returns the projection of this onto other
+ btReducedVector proj(const btReducedVector& other) const;
+
+ bool testAdd() const;
+
+ bool testMinus() const;
+
+ bool testDot() const;
+
+ bool testMultiply() const;
+
+ void test() const;
+
+ void print() const
+ {
+ for (int i = 0; i < m_indices.size(); ++i)
+ {
+ printf("%d: (%f, %f, %f)/", m_indices[i], m_vecs[i][0],m_vecs[i][1],m_vecs[i][2]);
+ }
+ printf("\n");
+ }
+
+
+ void sort()
+ {
+ std::vector<TwoInts> tuples;
+ for (int i = 0; i < m_indices.size(); ++i)
+ {
+ TwoInts ti;
+ ti.a = i;
+ ti.b = m_indices[i];
+ tuples.push_back(ti);
+ }
+ std::sort(tuples.begin(), tuples.end());
+ btAlignedObjectArray<int> new_indices;
+ btAlignedObjectArray<btVector3> new_vecs;
+ for (int i = 0; i < tuples.size(); ++i)
+ {
+ new_indices.push_back(tuples[i].b);
+ new_vecs.push_back(m_vecs[tuples[i].a]);
+ }
+ m_indices = new_indices;
+ m_vecs = new_vecs;
+ }
+};
+
+SIMD_FORCE_INLINE btReducedVector operator*(const btReducedVector& v, btScalar s)
+{
+ btReducedVector ret(v.m_sz);
+ for (int i = 0; i < v.m_indices.size(); ++i)
+ {
+ ret.m_indices.push_back(v.m_indices[i]);
+ ret.m_vecs.push_back(s*v.m_vecs[i]);
+ }
+ ret.simplify();
+ return ret;
+}
+
+SIMD_FORCE_INLINE btReducedVector operator*(btScalar s, const btReducedVector& v)
+{
+ return v*s;
+}
+
+SIMD_FORCE_INLINE btReducedVector operator/(const btReducedVector& v, btScalar s)
+{
+ return v * (1.0/s);
+}
+
+SIMD_FORCE_INLINE btReducedVector& operator/=(btReducedVector& v, btScalar s)
+{
+ v = v/s;
+ return v;
+}
+
+SIMD_FORCE_INLINE btReducedVector& operator+=(btReducedVector& v1, const btReducedVector& v2)
+{
+ v1 = v1+v2;
+ return v1;
+}
+
+SIMD_FORCE_INLINE btReducedVector& operator-=(btReducedVector& v1, const btReducedVector& v2)
+{
+ v1 = v1-v2;
+ return v1;
+}
+
+#endif /* btReducedVectors_h */
diff --git a/thirdparty/bullet/btBulletCollisionAll.cpp b/thirdparty/bullet/btBulletCollisionAll.cpp
index 2851fb3b73..4a3ec8dd6f 100644
--- a/thirdparty/bullet/btBulletCollisionAll.cpp
+++ b/thirdparty/bullet/btBulletCollisionAll.cpp
@@ -23,6 +23,7 @@
#include "BulletCollision/CollisionDispatch/btConvexConvexAlgorithm.cpp"
#include "BulletCollision/CollisionDispatch/btSphereBoxCollisionAlgorithm.cpp"
#include "BulletCollision/CollisionDispatch/btCollisionDispatcher.cpp"
+#include "BulletCollision/CollisionDispatch/btCollisionDispatcherMt.cpp"
#include "BulletCollision/CollisionDispatch/btConvexPlaneCollisionAlgorithm.cpp"
#include "BulletCollision/CollisionDispatch/btSphereSphereCollisionAlgorithm.cpp"
#include "BulletCollision/CollisionDispatch/btCollisionObject.cpp"
diff --git a/thirdparty/bullet/btLinearMathAll.cpp b/thirdparty/bullet/btLinearMathAll.cpp
index 808f412803..d05a19e630 100644
--- a/thirdparty/bullet/btLinearMathAll.cpp
+++ b/thirdparty/bullet/btLinearMathAll.cpp
@@ -8,6 +8,7 @@
#include "LinearMath/btConvexHullComputer.cpp"
#include "LinearMath/btQuickprof.cpp"
#include "LinearMath/btThreads.cpp"
+#include "LinearMath/btReducedVector.cpp"
#include "LinearMath/TaskScheduler/btTaskScheduler.cpp"
#include "LinearMath/TaskScheduler/btThreadSupportPosix.cpp"
#include "LinearMath/TaskScheduler/btThreadSupportWin32.cpp"
diff --git a/thirdparty/jpeg-compressor/jpgd.cpp b/thirdparty/jpeg-compressor/jpgd.cpp
index 62fbd1b72d..257d0b7574 100644
--- a/thirdparty/jpeg-compressor/jpgd.cpp
+++ b/thirdparty/jpeg-compressor/jpgd.cpp
@@ -1,27 +1,55 @@
-// jpgd.cpp - C++ class for JPEG decompression.
-// Public domain, Rich Geldreich <richgel99@gmail.com>
+// jpgd.cpp - C++ class for JPEG decompression. Written by Richard Geldreich <richgel99@gmail.com> between 1994-2020.
+// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
+// Supports box and linear chroma upsampling.
+//
+// Released under two licenses. You are free to choose which license you want:
+// License 1:
+// Public Domain
+//
+// License 2:
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
// Alex Evans: Linear memory allocator (taken from jpge.h).
-// v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
+// v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings
+// v2.00, March 20, 2020: Fuzzed with zzuf and afl. Fixed several issues, converted most assert()'s to run-time checks. Added chroma upsampling. Removed freq. domain upsampling. gcc/clang warnings.
//
-// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
+// Important:
+// #define JPGD_USE_SSE2 to 0 to completely disable SSE2 usage.
//
-// Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
-// Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
-// http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
-
#include "jpgd.h"
#include <string.h>
-
+#include <algorithm>
#include <assert.h>
-#define JPGD_ASSERT(x) assert(x)
#ifdef _MSC_VER
#pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
#endif
-// Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
-// This is slower, but results in higher quality on images with highly saturated colors.
-#define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1
+#ifndef JPGD_USE_SSE2
+
+ #if defined(__GNUC__)
+
+ #if (defined(__x86_64__) || defined(_M_X64))
+ #if defined(__SSE2__)
+ #define JPGD_USE_SSE2 (1)
+ #endif
+ #endif
+
+ #else
+ #define JPGD_USE_SSE2 (1)
+ #endif
+
+#endif
#define JPGD_TRUE (1)
#define JPGD_FALSE (0)
@@ -29,28 +57,28 @@
#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b))
#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b))
-// TODO: Move to header and use these constants when declaring the arrays.
-#define JPGD_HUFF_TREE_MAX_LENGTH 512
-#define JPGD_HUFF_CODE_SIZE_MAX_LENGTH 256
-
namespace jpgd {
-static inline void *jpgd_malloc(size_t nSize) { return malloc(nSize); }
-static inline void jpgd_free(void *p) { free(p); }
+ static inline void* jpgd_malloc(size_t nSize) { return malloc(nSize); }
+ static inline void jpgd_free(void* p) { free(p); }
-// DCT coefficients are stored in this sequence.
-static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
+ // DCT coefficients are stored in this sequence.
+ static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
-enum JPEG_MARKER
-{
- M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8,
- M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC,
- M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7,
- M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF,
- M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0
-};
+ enum JPEG_MARKER
+ {
+ M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8,
+ M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC,
+ M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7,
+ M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF,
+ M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0
+ };
-enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
+ enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
+
+#if JPGD_USE_SSE2
+#include "jpgd_idct.h"
+#endif
#define CONST_BITS 13
#define PASS1_BITS 2
@@ -76,3130 +104,3182 @@ enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2,
#define CLAMP(i) ((static_cast<uint>(i) > 255) ? (((~i) >> 31) & 0xFF) : (i))
-// Compiler creates a fast path 1D IDCT for X non-zero columns
-template <int NONZERO_COLS>
-struct Row
-{
- static void idct(int* pTemp, const jpgd_block_t* pSrc)
- {
- // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
- #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
-
- const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
-
- const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
- const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
- const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
-
- const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS;
- const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS;
-
- const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
-
- const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
-
- const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
- const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
-
- const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
- const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
- const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
- const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
-
- const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
- const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
- const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
- const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
-
- pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
- pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
- pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
- pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
- pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
- pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
- pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
- pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
- }
-};
-
-template <>
-struct Row<0>
-{
- static void idct(int* pTemp, const jpgd_block_t* pSrc)
- {
-#ifdef _MSC_VER
- pTemp; pSrc;
+ static inline int left_shifti(int val, uint32_t bits)
+ {
+ return static_cast<int>(static_cast<uint32_t>(val) << bits);
+ }
+
+ // Compiler creates a fast path 1D IDCT for X non-zero columns
+ template <int NONZERO_COLS>
+ struct Row
+ {
+ static void idct(int* pTemp, const jpgd_block_coeff_t* pSrc)
+ {
+ // ACCESS_COL() will be optimized at compile time to either an array access, or 0. Good compilers will then optimize out muls against 0.
+#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
+
+ const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
+
+ const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+ const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065);
+ const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+ const int tmp0 = left_shifti(ACCESS_COL(0) + ACCESS_COL(4), CONST_BITS);
+ const int tmp1 = left_shifti(ACCESS_COL(0) - ACCESS_COL(4), CONST_BITS);
+
+ const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
+
+ const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
+
+ const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
+ const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
+
+ const int az1 = MULTIPLY(bz1, -FIX_0_899976223);
+ const int az2 = MULTIPLY(bz2, -FIX_2_562915447);
+ const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5;
+ const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5;
+
+ const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
+ const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
+ const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
+ const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
+
+ pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS - PASS1_BITS);
+ pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS - PASS1_BITS);
+ pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS - PASS1_BITS);
+ pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS - PASS1_BITS);
+ pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS - PASS1_BITS);
+ pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS - PASS1_BITS);
+ pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS - PASS1_BITS);
+ pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS - PASS1_BITS);
+ }
+ };
+
+ template <>
+ struct Row<0>
+ {
+ static void idct(int* pTemp, const jpgd_block_coeff_t* pSrc)
+ {
+ (void)pTemp;
+ (void)pSrc;
+ }
+ };
+
+ template <>
+ struct Row<1>
+ {
+ static void idct(int* pTemp, const jpgd_block_coeff_t* pSrc)
+ {
+ const int dcval = left_shifti(pSrc[0], PASS1_BITS);
+
+ pTemp[0] = dcval;
+ pTemp[1] = dcval;
+ pTemp[2] = dcval;
+ pTemp[3] = dcval;
+ pTemp[4] = dcval;
+ pTemp[5] = dcval;
+ pTemp[6] = dcval;
+ pTemp[7] = dcval;
+ }
+ };
+
+ // Compiler creates a fast path 1D IDCT for X non-zero rows
+ template <int NONZERO_ROWS>
+ struct Col
+ {
+ static void idct(uint8* pDst_ptr, const int* pTemp)
+ {
+ // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
+#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
+
+ const int z2 = ACCESS_ROW(2);
+ const int z3 = ACCESS_ROW(6);
+
+ const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+ const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065);
+ const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+ const int tmp0 = left_shifti(ACCESS_ROW(0) + ACCESS_ROW(4), CONST_BITS);
+ const int tmp1 = left_shifti(ACCESS_ROW(0) - ACCESS_ROW(4), CONST_BITS);
+
+ const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
+
+ const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
+
+ const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
+ const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
+
+ const int az1 = MULTIPLY(bz1, -FIX_0_899976223);
+ const int az2 = MULTIPLY(bz2, -FIX_2_562915447);
+ const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5;
+ const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5;
+
+ const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
+ const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
+ const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
+ const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
+
+ int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS + PASS1_BITS + 3);
+ pDst_ptr[8 * 0] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS + PASS1_BITS + 3);
+ pDst_ptr[8 * 7] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS + PASS1_BITS + 3);
+ pDst_ptr[8 * 1] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS + PASS1_BITS + 3);
+ pDst_ptr[8 * 6] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS + PASS1_BITS + 3);
+ pDst_ptr[8 * 2] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS + PASS1_BITS + 3);
+ pDst_ptr[8 * 5] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS + PASS1_BITS + 3);
+ pDst_ptr[8 * 3] = (uint8)CLAMP(i);
+
+ i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS + PASS1_BITS + 3);
+ pDst_ptr[8 * 4] = (uint8)CLAMP(i);
+ }
+ };
+
+ template <>
+ struct Col<1>
+ {
+ static void idct(uint8* pDst_ptr, const int* pTemp)
+ {
+ int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS + 3);
+ const uint8 dcval_clamped = (uint8)CLAMP(dcval);
+ pDst_ptr[0 * 8] = dcval_clamped;
+ pDst_ptr[1 * 8] = dcval_clamped;
+ pDst_ptr[2 * 8] = dcval_clamped;
+ pDst_ptr[3 * 8] = dcval_clamped;
+ pDst_ptr[4 * 8] = dcval_clamped;
+ pDst_ptr[5 * 8] = dcval_clamped;
+ pDst_ptr[6 * 8] = dcval_clamped;
+ pDst_ptr[7 * 8] = dcval_clamped;
+ }
+ };
+
+ static const uint8 s_idct_row_table[] =
+ {
+ 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
+ 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
+ 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
+ 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
+ 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
+ 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
+ 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
+ 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
+ };
+
+ static const uint8 s_idct_col_table[] =
+ {
+ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+ 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
+ };
+
+ // Scalar "fast pathing" IDCT.
+ static void idct(const jpgd_block_coeff_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag, bool use_simd)
+ {
+ (void)use_simd;
+
+ assert(block_max_zag >= 1);
+ assert(block_max_zag <= 64);
+
+ if (block_max_zag <= 1)
+ {
+ int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
+ k = CLAMP(k);
+ k = k | (k << 8);
+ k = k | (k << 16);
+
+ for (int i = 8; i > 0; i--)
+ {
+ *(int*)&pDst_ptr[0] = k;
+ *(int*)&pDst_ptr[4] = k;
+ pDst_ptr += 8;
+ }
+ return;
+ }
+
+#if JPGD_USE_SSE2
+ if (use_simd)
+ {
+ assert((((uintptr_t)pSrc_ptr) & 15) == 0);
+ assert((((uintptr_t)pDst_ptr) & 15) == 0);
+ idctSSEShortU8(pSrc_ptr, pDst_ptr);
+ return;
+ }
#endif
- }
-};
-
-template <>
-struct Row<1>
-{
- static void idct(int* pTemp, const jpgd_block_t* pSrc)
- {
- const int dcval = (pSrc[0] << PASS1_BITS);
-
- pTemp[0] = dcval;
- pTemp[1] = dcval;
- pTemp[2] = dcval;
- pTemp[3] = dcval;
- pTemp[4] = dcval;
- pTemp[5] = dcval;
- pTemp[6] = dcval;
- pTemp[7] = dcval;
- }
-};
-
-// Compiler creates a fast path 1D IDCT for X non-zero rows
-template <int NONZERO_ROWS>
-struct Col
-{
- static void idct(uint8* pDst_ptr, const int* pTemp)
- {
- // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
- #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
-
- const int z2 = ACCESS_ROW(2);
- const int z3 = ACCESS_ROW(6);
-
- const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
- const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
- const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
-
- const int tmp0 = (ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS;
- const int tmp1 = (ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS;
-
- const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
-
- const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
-
- const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
- const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
-
- const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
- const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
- const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
- const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
-
- const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
- const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
- const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
- const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
-
- int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
- pDst_ptr[8*0] = (uint8)CLAMP(i);
-
- i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
- pDst_ptr[8*7] = (uint8)CLAMP(i);
-
- i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
- pDst_ptr[8*1] = (uint8)CLAMP(i);
-
- i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
- pDst_ptr[8*6] = (uint8)CLAMP(i);
-
- i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
- pDst_ptr[8*2] = (uint8)CLAMP(i);
-
- i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
- pDst_ptr[8*5] = (uint8)CLAMP(i);
-
- i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
- pDst_ptr[8*3] = (uint8)CLAMP(i);
-
- i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
- pDst_ptr[8*4] = (uint8)CLAMP(i);
- }
-};
-
-template <>
-struct Col<1>
-{
- static void idct(uint8* pDst_ptr, const int* pTemp)
- {
- int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
- const uint8 dcval_clamped = (uint8)CLAMP(dcval);
- pDst_ptr[0*8] = dcval_clamped;
- pDst_ptr[1*8] = dcval_clamped;
- pDst_ptr[2*8] = dcval_clamped;
- pDst_ptr[3*8] = dcval_clamped;
- pDst_ptr[4*8] = dcval_clamped;
- pDst_ptr[5*8] = dcval_clamped;
- pDst_ptr[6*8] = dcval_clamped;
- pDst_ptr[7*8] = dcval_clamped;
- }
-};
-
-static const uint8 s_idct_row_table[] =
-{
- 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
- 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
- 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
- 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
- 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
- 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
- 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
- 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
-};
-
-static const uint8 s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
-
-void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag)
-{
- JPGD_ASSERT(block_max_zag >= 1);
- JPGD_ASSERT(block_max_zag <= 64);
-
- if (block_max_zag <= 1)
- {
- int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
- k = CLAMP(k);
- k = k | (k<<8);
- k = k | (k<<16);
-
- for (int i = 8; i > 0; i--)
- {
- *(int*)&pDst_ptr[0] = k;
- *(int*)&pDst_ptr[4] = k;
- pDst_ptr += 8;
- }
- return;
- }
-
- int temp[64];
-
- const jpgd_block_t* pSrc = pSrc_ptr;
- int* pTemp = temp;
-
- const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
- int i;
- for (i = 8; i > 0; i--, pRow_tab++)
- {
- switch (*pRow_tab)
- {
- case 0: Row<0>::idct(pTemp, pSrc); break;
- case 1: Row<1>::idct(pTemp, pSrc); break;
- case 2: Row<2>::idct(pTemp, pSrc); break;
- case 3: Row<3>::idct(pTemp, pSrc); break;
- case 4: Row<4>::idct(pTemp, pSrc); break;
- case 5: Row<5>::idct(pTemp, pSrc); break;
- case 6: Row<6>::idct(pTemp, pSrc); break;
- case 7: Row<7>::idct(pTemp, pSrc); break;
- case 8: Row<8>::idct(pTemp, pSrc); break;
- }
-
- pSrc += 8;
- pTemp += 8;
- }
-
- pTemp = temp;
-
- const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
- for (i = 8; i > 0; i--)
- {
- switch (nonzero_rows)
- {
- case 1: Col<1>::idct(pDst_ptr, pTemp); break;
- case 2: Col<2>::idct(pDst_ptr, pTemp); break;
- case 3: Col<3>::idct(pDst_ptr, pTemp); break;
- case 4: Col<4>::idct(pDst_ptr, pTemp); break;
- case 5: Col<5>::idct(pDst_ptr, pTemp); break;
- case 6: Col<6>::idct(pDst_ptr, pTemp); break;
- case 7: Col<7>::idct(pDst_ptr, pTemp); break;
- case 8: Col<8>::idct(pDst_ptr, pTemp); break;
- }
-
- pTemp++;
- pDst_ptr++;
- }
-}
-
-void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr)
-{
- int temp[64];
- int* pTemp = temp;
- const jpgd_block_t* pSrc = pSrc_ptr;
-
- for (int i = 4; i > 0; i--)
- {
- Row<4>::idct(pTemp, pSrc);
- pSrc += 8;
- pTemp += 8;
- }
-
- pTemp = temp;
- for (int i = 8; i > 0; i--)
- {
- Col<4>::idct(pDst_ptr, pTemp);
- pTemp++;
- pDst_ptr++;
- }
-}
-
-// Retrieve one character from the input stream.
-inline uint jpeg_decoder::get_char()
-{
- // Any bytes remaining in buffer?
- if (!m_in_buf_left)
- {
- // Try to get more bytes.
- prep_in_buffer();
- // Still nothing to get?
- if (!m_in_buf_left)
- {
- // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
- int t = m_tem_flag;
- m_tem_flag ^= 1;
- if (t)
- return 0xD9;
- else
- return 0xFF;
- }
- }
-
- uint c = *m_pIn_buf_ofs++;
- m_in_buf_left--;
-
- return c;
-}
-
-// Same as previous method, except can indicate if the character is a pad character or not.
-inline uint jpeg_decoder::get_char(bool *pPadding_flag)
-{
- if (!m_in_buf_left)
- {
- prep_in_buffer();
- if (!m_in_buf_left)
- {
- *pPadding_flag = true;
- int t = m_tem_flag;
- m_tem_flag ^= 1;
- if (t)
- return 0xD9;
- else
- return 0xFF;
- }
- }
-
- *pPadding_flag = false;
-
- uint c = *m_pIn_buf_ofs++;
- m_in_buf_left--;
-
- return c;
-}
-
-// Inserts a previously retrieved character back into the input buffer.
-inline void jpeg_decoder::stuff_char(uint8 q)
-{
- *(--m_pIn_buf_ofs) = q;
- m_in_buf_left++;
-}
-
-// Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
-inline uint8 jpeg_decoder::get_octet()
-{
- bool padding_flag;
- int c = get_char(&padding_flag);
-
- if (c == 0xFF)
- {
- if (padding_flag)
- return 0xFF;
-
- c = get_char(&padding_flag);
- if (padding_flag)
- {
- stuff_char(0xFF);
- return 0xFF;
- }
-
- if (c == 0x00)
- return 0xFF;
- else
- {
- stuff_char(static_cast<uint8>(c));
- stuff_char(0xFF);
- return 0xFF;
- }
- }
-
- return static_cast<uint8>(c);
-}
-
-// Retrieves a variable number of bits from the input stream. Does not recognize markers.
-inline uint jpeg_decoder::get_bits(int num_bits)
-{
- if (!num_bits)
- return 0;
-
- uint i = m_bit_buf >> (32 - num_bits);
-
- if ((m_bits_left -= num_bits) <= 0)
- {
- m_bit_buf <<= (num_bits += m_bits_left);
-
- uint c1 = get_char();
- uint c2 = get_char();
- m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
-
- m_bit_buf <<= -m_bits_left;
-
- m_bits_left += 16;
-
- JPGD_ASSERT(m_bits_left >= 0);
- }
- else
- m_bit_buf <<= num_bits;
-
- return i;
-}
-
-// Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
-inline uint jpeg_decoder::get_bits_no_markers(int num_bits)
-{
- if (!num_bits)
- return 0;
-
- uint i = m_bit_buf >> (32 - num_bits);
-
- if ((m_bits_left -= num_bits) <= 0)
- {
- m_bit_buf <<= (num_bits += m_bits_left);
-
- if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF))
- {
- uint c1 = get_octet();
- uint c2 = get_octet();
- m_bit_buf |= (c1 << 8) | c2;
- }
- else
- {
- m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
- m_in_buf_left -= 2;
- m_pIn_buf_ofs += 2;
- }
-
- m_bit_buf <<= -m_bits_left;
-
- m_bits_left += 16;
-
- JPGD_ASSERT(m_bits_left >= 0);
- }
- else
- m_bit_buf <<= num_bits;
-
- return i;
-}
-
-// Decodes a Huffman encoded symbol.
-inline int jpeg_decoder::huff_decode(huff_tables *pH)
-{
- JPGD_ASSERT(pH);
-
- int symbol;
- // Check first 8-bits: do we have a complete symbol?
- if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0)
- {
- // Decode more bits, use a tree traversal to find symbol.
- int ofs = 23;
- do
- {
- unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1));
- JPGD_ASSERT(idx < JPGD_HUFF_TREE_MAX_LENGTH);
- symbol = pH->tree[idx];
- ofs--;
- } while (symbol < 0);
-
- get_bits_no_markers(8 + (23 - ofs));
- }
- else
- {
- JPGD_ASSERT(symbol < JPGD_HUFF_CODE_SIZE_MAX_LENGTH);
- get_bits_no_markers(pH->code_size[symbol]);
- }
-
- return symbol;
-}
-
-// Decodes a Huffman encoded symbol.
-inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits)
-{
- int symbol;
-
- JPGD_ASSERT(pH);
-
- // Check first 8-bits: do we have a complete symbol?
- if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0)
- {
- // Use a tree traversal to find symbol.
- int ofs = 23;
- do
- {
- unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1));
- JPGD_ASSERT(idx < JPGD_HUFF_TREE_MAX_LENGTH);
- symbol = pH->tree[idx];
- ofs--;
- } while (symbol < 0);
-
- get_bits_no_markers(8 + (23 - ofs));
-
- extra_bits = get_bits_no_markers(symbol & 0xF);
- }
- else
- {
- JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
-
- if (symbol & 0x8000)
- {
- get_bits_no_markers((symbol >> 8) & 31);
- extra_bits = symbol >> 16;
- }
- else
- {
- int code_size = (symbol >> 8) & 31;
- int num_extra_bits = symbol & 0xF;
- int bits = code_size + num_extra_bits;
- if (bits <= (m_bits_left + 16))
- extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
- else
- {
- get_bits_no_markers(code_size);
- extra_bits = get_bits_no_markers(num_extra_bits);
- }
- }
-
- symbol &= 0xFF;
- }
-
- return symbol;
-}
-
-// Tables and macro used to fully decode the DPCM differences.
-static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
-static const int s_extend_offset[16] = { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };
-static const int s_extend_mask[] = { 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) };
-// The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
+
+ int temp[64];
+
+ const jpgd_block_coeff_t* pSrc = pSrc_ptr;
+ int* pTemp = temp;
+
+ const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
+ int i;
+ for (i = 8; i > 0; i--, pRow_tab++)
+ {
+ switch (*pRow_tab)
+ {
+ case 0: Row<0>::idct(pTemp, pSrc); break;
+ case 1: Row<1>::idct(pTemp, pSrc); break;
+ case 2: Row<2>::idct(pTemp, pSrc); break;
+ case 3: Row<3>::idct(pTemp, pSrc); break;
+ case 4: Row<4>::idct(pTemp, pSrc); break;
+ case 5: Row<5>::idct(pTemp, pSrc); break;
+ case 6: Row<6>::idct(pTemp, pSrc); break;
+ case 7: Row<7>::idct(pTemp, pSrc); break;
+ case 8: Row<8>::idct(pTemp, pSrc); break;
+ }
+
+ pSrc += 8;
+ pTemp += 8;
+ }
+
+ pTemp = temp;
+
+ const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
+ for (i = 8; i > 0; i--)
+ {
+ switch (nonzero_rows)
+ {
+ case 1: Col<1>::idct(pDst_ptr, pTemp); break;
+ case 2: Col<2>::idct(pDst_ptr, pTemp); break;
+ case 3: Col<3>::idct(pDst_ptr, pTemp); break;
+ case 4: Col<4>::idct(pDst_ptr, pTemp); break;
+ case 5: Col<5>::idct(pDst_ptr, pTemp); break;
+ case 6: Col<6>::idct(pDst_ptr, pTemp); break;
+ case 7: Col<7>::idct(pDst_ptr, pTemp); break;
+ case 8: Col<8>::idct(pDst_ptr, pTemp); break;
+ }
+
+ pTemp++;
+ pDst_ptr++;
+ }
+ }
+
+ // Retrieve one character from the input stream.
+ inline uint jpeg_decoder::get_char()
+ {
+ // Any bytes remaining in buffer?
+ if (!m_in_buf_left)
+ {
+ // Try to get more bytes.
+ prep_in_buffer();
+ // Still nothing to get?
+ if (!m_in_buf_left)
+ {
+ // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
+ int t = m_tem_flag;
+ m_tem_flag ^= 1;
+ if (t)
+ return 0xD9;
+ else
+ return 0xFF;
+ }
+ }
+
+ uint c = *m_pIn_buf_ofs++;
+ m_in_buf_left--;
+
+ return c;
+ }
+
+ // Same as previous method, except can indicate if the character is a pad character or not.
+ inline uint jpeg_decoder::get_char(bool* pPadding_flag)
+ {
+ if (!m_in_buf_left)
+ {
+ prep_in_buffer();
+ if (!m_in_buf_left)
+ {
+ *pPadding_flag = true;
+ int t = m_tem_flag;
+ m_tem_flag ^= 1;
+ if (t)
+ return 0xD9;
+ else
+ return 0xFF;
+ }
+ }
+
+ *pPadding_flag = false;
+
+ uint c = *m_pIn_buf_ofs++;
+ m_in_buf_left--;
+
+ return c;
+ }
+
+ // Inserts a previously retrieved character back into the input buffer.
+ inline void jpeg_decoder::stuff_char(uint8 q)
+ {
+ // This could write before the input buffer, but we've placed another array there.
+ *(--m_pIn_buf_ofs) = q;
+ m_in_buf_left++;
+ }
+
+ // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
+ inline uint8 jpeg_decoder::get_octet()
+ {
+ bool padding_flag;
+ int c = get_char(&padding_flag);
+
+ if (c == 0xFF)
+ {
+ if (padding_flag)
+ return 0xFF;
+
+ c = get_char(&padding_flag);
+ if (padding_flag)
+ {
+ stuff_char(0xFF);
+ return 0xFF;
+ }
+
+ if (c == 0x00)
+ return 0xFF;
+ else
+ {
+ stuff_char(static_cast<uint8>(c));
+ stuff_char(0xFF);
+ return 0xFF;
+ }
+ }
+
+ return static_cast<uint8>(c);
+ }
+
+ // Retrieves a variable number of bits from the input stream. Does not recognize markers.
+ inline uint jpeg_decoder::get_bits(int num_bits)
+ {
+ if (!num_bits)
+ return 0;
+
+ uint i = m_bit_buf >> (32 - num_bits);
+
+ if ((m_bits_left -= num_bits) <= 0)
+ {
+ m_bit_buf <<= (num_bits += m_bits_left);
+
+ uint c1 = get_char();
+ uint c2 = get_char();
+ m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
+
+ m_bit_buf <<= -m_bits_left;
+
+ m_bits_left += 16;
+
+ assert(m_bits_left >= 0);
+ }
+ else
+ m_bit_buf <<= num_bits;
+
+ return i;
+ }
+
+ // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
+ inline uint jpeg_decoder::get_bits_no_markers(int num_bits)
+ {
+ if (!num_bits)
+ return 0;
+
+ assert(num_bits <= 16);
+
+ uint i = m_bit_buf >> (32 - num_bits);
+
+ if ((m_bits_left -= num_bits) <= 0)
+ {
+ m_bit_buf <<= (num_bits += m_bits_left);
+
+ if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF))
+ {
+ uint c1 = get_octet();
+ uint c2 = get_octet();
+ m_bit_buf |= (c1 << 8) | c2;
+ }
+ else
+ {
+ m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
+ m_in_buf_left -= 2;
+ m_pIn_buf_ofs += 2;
+ }
+
+ m_bit_buf <<= -m_bits_left;
+
+ m_bits_left += 16;
+
+ assert(m_bits_left >= 0);
+ }
+ else
+ m_bit_buf <<= num_bits;
+
+ return i;
+ }
+
+ // Decodes a Huffman encoded symbol.
+ inline int jpeg_decoder::huff_decode(huff_tables* pH)
+ {
+ if (!pH)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ int symbol;
+ // Check first 8-bits: do we have a complete symbol?
+ if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0)
+ {
+ // Decode more bits, use a tree traversal to find symbol.
+ int ofs = 23;
+ do
+ {
+ unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1));
+
+ // This should never happen, but to be safe I'm turning these asserts into a run-time check.
+ if ((idx >= JPGD_HUFF_TREE_MAX_LENGTH) || (ofs < 0))
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ symbol = pH->tree[idx];
+ ofs--;
+ } while (symbol < 0);
+
+ get_bits_no_markers(8 + (23 - ofs));
+ }
+ else
+ {
+ assert(symbol < JPGD_HUFF_CODE_SIZE_MAX_LENGTH);
+ get_bits_no_markers(pH->code_size[symbol]);
+ }
+
+ return symbol;
+ }
+
+ // Decodes a Huffman encoded symbol.
+ inline int jpeg_decoder::huff_decode(huff_tables* pH, int& extra_bits)
+ {
+ int symbol;
+
+ if (!pH)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ // Check first 8-bits: do we have a complete symbol?
+ if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0)
+ {
+ // Use a tree traversal to find symbol.
+ int ofs = 23;
+ do
+ {
+ unsigned int idx = -(int)(symbol + ((m_bit_buf >> ofs) & 1));
+
+ // This should never happen, but to be safe I'm turning these asserts into a run-time check.
+ if ((idx >= JPGD_HUFF_TREE_MAX_LENGTH) || (ofs < 0))
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ symbol = pH->tree[idx];
+ ofs--;
+ } while (symbol < 0);
+
+ get_bits_no_markers(8 + (23 - ofs));
+
+ extra_bits = get_bits_no_markers(symbol & 0xF);
+ }
+ else
+ {
+ if (symbol & 0x8000)
+ {
+ //get_bits_no_markers((symbol >> 8) & 31);
+ assert(((symbol >> 8) & 31) <= 15);
+ get_bits_no_markers((symbol >> 8) & 15);
+ extra_bits = symbol >> 16;
+ }
+ else
+ {
+ int code_size = (symbol >> 8) & 31;
+ int num_extra_bits = symbol & 0xF;
+ int bits = code_size + num_extra_bits;
+
+ if (bits <= 16)
+ extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
+ else
+ {
+ get_bits_no_markers(code_size);
+ extra_bits = get_bits_no_markers(num_extra_bits);
+ }
+ }
+
+ symbol &= 0xFF;
+ }
+
+ return symbol;
+ }
+
+ // Tables and macro used to fully decode the DPCM differences.
+ static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
+ static const int s_extend_offset[16] = { 0, -1, -3, -7, -15, -31, -63, -127, -255, -511, -1023, -2047, -4095, -8191, -16383, -32767 };
+ //static const int s_extend_mask[] = { 0, (1 << 0), (1 << 1), (1 << 2), (1 << 3), (1 << 4), (1 << 5), (1 << 6), (1 << 7), (1 << 8), (1 << 9), (1 << 10), (1 << 11), (1 << 12), (1 << 13), (1 << 14), (1 << 15), (1 << 16) };
+
#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
-// Clamps a value between 0-255.
-inline uint8 jpeg_decoder::clamp(int i)
-{
- if (static_cast<uint>(i) > 255)
- i = (((~i) >> 31) & 0xFF);
-
- return static_cast<uint8>(i);
-}
-
-namespace DCT_Upsample
-{
- struct Matrix44
- {
- typedef int Element_Type;
- enum { NUM_ROWS = 4, NUM_COLS = 4 };
-
- Element_Type v[NUM_ROWS][NUM_COLS];
-
- inline int rows() const { return NUM_ROWS; }
- inline int cols() const { return NUM_COLS; }
-
- inline const Element_Type & at(int r, int c) const { return v[r][c]; }
- inline Element_Type & at(int r, int c) { return v[r][c]; }
-
- inline Matrix44() { }
-
- inline Matrix44& operator += (const Matrix44& a)
- {
- for (int r = 0; r < NUM_ROWS; r++)
- {
- at(r, 0) += a.at(r, 0);
- at(r, 1) += a.at(r, 1);
- at(r, 2) += a.at(r, 2);
- at(r, 3) += a.at(r, 3);
- }
- return *this;
- }
-
- inline Matrix44& operator -= (const Matrix44& a)
- {
- for (int r = 0; r < NUM_ROWS; r++)
- {
- at(r, 0) -= a.at(r, 0);
- at(r, 1) -= a.at(r, 1);
- at(r, 2) -= a.at(r, 2);
- at(r, 3) -= a.at(r, 3);
- }
- return *this;
- }
-
- friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
- {
- Matrix44 ret;
- for (int r = 0; r < NUM_ROWS; r++)
- {
- ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
- ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
- ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
- ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
- }
- return ret;
- }
-
- friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
- {
- Matrix44 ret;
- for (int r = 0; r < NUM_ROWS; r++)
- {
- ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
- ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
- ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
- ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
- }
- return ret;
- }
-
- static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
- {
- for (int r = 0; r < 4; r++)
- {
- pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) + b.at(r, 0));
- pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) + b.at(r, 1));
- pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) + b.at(r, 2));
- pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) + b.at(r, 3));
- }
- }
-
- static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
- {
- for (int r = 0; r < 4; r++)
- {
- pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) - b.at(r, 0));
- pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) - b.at(r, 1));
- pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) - b.at(r, 2));
- pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) - b.at(r, 3));
- }
- }
- };
-
- const int FRACT_BITS = 10;
- const int SCALE = 1 << FRACT_BITS;
-
- typedef int Temp_Type;
- #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
- #define F(i) ((int)((i) * SCALE + .5f))
-
- // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
- #define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
-
- // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
- template<int NUM_ROWS, int NUM_COLS>
- struct P_Q
- {
- static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
- {
- // 4x8 = 4x8 times 8x8, matrix 0 is constant
- const Temp_Type X000 = AT(0, 0);
- const Temp_Type X001 = AT(0, 1);
- const Temp_Type X002 = AT(0, 2);
- const Temp_Type X003 = AT(0, 3);
- const Temp_Type X004 = AT(0, 4);
- const Temp_Type X005 = AT(0, 5);
- const Temp_Type X006 = AT(0, 6);
- const Temp_Type X007 = AT(0, 7);
- const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0));
- const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1));
- const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2));
- const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3));
- const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4));
- const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5));
- const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6));
- const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7));
- const Temp_Type X020 = AT(4, 0);
- const Temp_Type X021 = AT(4, 1);
- const Temp_Type X022 = AT(4, 2);
- const Temp_Type X023 = AT(4, 3);
- const Temp_Type X024 = AT(4, 4);
- const Temp_Type X025 = AT(4, 5);
- const Temp_Type X026 = AT(4, 6);
- const Temp_Type X027 = AT(4, 7);
- const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0));
- const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1));
- const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2));
- const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3));
- const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4));
- const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5));
- const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6));
- const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7));
-
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
- P.at(0, 0) = X000;
- P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f));
- P.at(0, 2) = X004;
- P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f));
- P.at(1, 0) = X010;
- P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f));
- P.at(1, 2) = X014;
- P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f));
- P.at(2, 0) = X020;
- P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f));
- P.at(2, 2) = X024;
- P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f));
- P.at(3, 0) = X030;
- P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f));
- P.at(3, 2) = X034;
- P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f));
- // 40 muls 24 adds
-
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
- Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f));
- Q.at(0, 1) = X002;
- Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f));
- Q.at(0, 3) = X006;
- Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f));
- Q.at(1, 1) = X012;
- Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f));
- Q.at(1, 3) = X016;
- Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f));
- Q.at(2, 1) = X022;
- Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f));
- Q.at(2, 3) = X026;
- Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f));
- Q.at(3, 1) = X032;
- Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f));
- Q.at(3, 3) = X036;
- // 40 muls 24 adds
- }
- };
-
- template<int NUM_ROWS, int NUM_COLS>
- struct R_S
- {
- static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
- {
- // 4x8 = 4x8 times 8x8, matrix 0 is constant
- const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0));
- const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1));
- const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2));
- const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3));
- const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4));
- const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5));
- const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6));
- const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7));
- const Temp_Type X110 = AT(2, 0);
- const Temp_Type X111 = AT(2, 1);
- const Temp_Type X112 = AT(2, 2);
- const Temp_Type X113 = AT(2, 3);
- const Temp_Type X114 = AT(2, 4);
- const Temp_Type X115 = AT(2, 5);
- const Temp_Type X116 = AT(2, 6);
- const Temp_Type X117 = AT(2, 7);
- const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0));
- const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1));
- const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2));
- const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3));
- const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4));
- const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5));
- const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6));
- const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7));
- const Temp_Type X130 = AT(6, 0);
- const Temp_Type X131 = AT(6, 1);
- const Temp_Type X132 = AT(6, 2);
- const Temp_Type X133 = AT(6, 3);
- const Temp_Type X134 = AT(6, 4);
- const Temp_Type X135 = AT(6, 5);
- const Temp_Type X136 = AT(6, 6);
- const Temp_Type X137 = AT(6, 7);
- // 80 muls 48 adds
-
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
- R.at(0, 0) = X100;
- R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f));
- R.at(0, 2) = X104;
- R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f));
- R.at(1, 0) = X110;
- R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f));
- R.at(1, 2) = X114;
- R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f));
- R.at(2, 0) = X120;
- R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f));
- R.at(2, 2) = X124;
- R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f));
- R.at(3, 0) = X130;
- R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f));
- R.at(3, 2) = X134;
- R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f));
- // 40 muls 24 adds
- // 4x4 = 4x8 times 8x4, matrix 1 is constant
- S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f));
- S.at(0, 1) = X102;
- S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f));
- S.at(0, 3) = X106;
- S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f));
- S.at(1, 1) = X112;
- S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f));
- S.at(1, 3) = X116;
- S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f));
- S.at(2, 1) = X122;
- S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f));
- S.at(2, 3) = X126;
- S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f));
- S.at(3, 1) = X132;
- S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f));
- S.at(3, 3) = X136;
- // 40 muls 24 adds
- }
- };
-} // end namespace DCT_Upsample
-
-// Unconditionally frees all allocated m_blocks.
-void jpeg_decoder::free_all_blocks()
-{
- m_pStream = NULL;
- for (mem_block *b = m_pMem_blocks; b; )
- {
- mem_block *n = b->m_pNext;
- jpgd_free(b);
- b = n;
- }
- m_pMem_blocks = NULL;
-}
-
-// This method handles all errors. It will never return.
-// It could easily be changed to use C++ exceptions.
-JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status)
-{
- m_error_code = status;
- free_all_blocks();
- longjmp(m_jmp_state, status);
-}
-
-void *jpeg_decoder::alloc(size_t nSize, bool zero)
-{
- nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
- char *rv = NULL;
- for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext)
- {
- if ((b->m_used_count + nSize) <= b->m_size)
- {
- rv = b->m_data + b->m_used_count;
- b->m_used_count += nSize;
- break;
- }
- }
- if (!rv)
- {
- int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
- mem_block *b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity);
- if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
- b->m_pNext = m_pMem_blocks; m_pMem_blocks = b;
- b->m_used_count = nSize;
- b->m_size = capacity;
- rv = b->m_data;
- }
- if (zero) memset(rv, 0, nSize);
- return rv;
-}
-
-void jpeg_decoder::word_clear(void *p, uint16 c, uint n)
-{
- uint8 *pD = (uint8*)p;
- const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF;
- while (n)
- {
- pD[0] = l; pD[1] = h; pD += 2;
- n--;
- }
-}
-
-// Refill the input buffer.
-// This method will sit in a loop until (A) the buffer is full or (B)
-// the stream's read() method reports and end of file condition.
-void jpeg_decoder::prep_in_buffer()
-{
- m_in_buf_left = 0;
- m_pIn_buf_ofs = m_in_buf;
-
- if (m_eof_flag)
- return;
-
- do
- {
- int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
- if (bytes_read == -1)
- stop_decoding(JPGD_STREAM_READ);
-
- m_in_buf_left += bytes_read;
- } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
-
- m_total_bytes_read += m_in_buf_left;
-
- // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
- // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
- word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
-}
-
-// Read a Huffman code table.
-void jpeg_decoder::read_dht_marker()
-{
- int i, index, count;
- uint8 huff_num[17];
- uint8 huff_val[256];
-
- uint num_left = get_bits(16);
-
- if (num_left < 2)
- stop_decoding(JPGD_BAD_DHT_MARKER);
-
- num_left -= 2;
-
- while (num_left)
- {
- index = get_bits(8);
-
- huff_num[0] = 0;
-
- count = 0;
-
- for (i = 1; i <= 16; i++)
- {
- huff_num[i] = static_cast<uint8>(get_bits(8));
- count += huff_num[i];
- }
-
- if (count > 255)
- stop_decoding(JPGD_BAD_DHT_COUNTS);
-
- for (i = 0; i < count; i++)
- huff_val[i] = static_cast<uint8>(get_bits(8));
+ // Unconditionally frees all allocated m_blocks.
+ void jpeg_decoder::free_all_blocks()
+ {
+ m_pStream = nullptr;
+ for (mem_block* b = m_pMem_blocks; b; )
+ {
+ mem_block* n = b->m_pNext;
+ jpgd_free(b);
+ b = n;
+ }
+ m_pMem_blocks = nullptr;
+ }
+
+ // This method handles all errors. It will never return.
+ // It could easily be changed to use C++ exceptions.
+ JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status)
+ {
+ m_error_code = status;
+ free_all_blocks();
+ longjmp(m_jmp_state, status);
+ }
+
+ void* jpeg_decoder::alloc(size_t nSize, bool zero)
+ {
+ nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
+ char* rv = nullptr;
+ for (mem_block* b = m_pMem_blocks; b; b = b->m_pNext)
+ {
+ if ((b->m_used_count + nSize) <= b->m_size)
+ {
+ rv = b->m_data + b->m_used_count;
+ b->m_used_count += nSize;
+ break;
+ }
+ }
+ if (!rv)
+ {
+ int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
+ mem_block* b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity);
+ if (!b)
+ {
+ stop_decoding(JPGD_NOTENOUGHMEM);
+ }
+
+ b->m_pNext = m_pMem_blocks;
+ m_pMem_blocks = b;
+ b->m_used_count = nSize;
+ b->m_size = capacity;
+ rv = b->m_data;
+ }
+ if (zero) memset(rv, 0, nSize);
+ return rv;
+ }
+
+ void* jpeg_decoder::alloc_aligned(size_t nSize, uint32_t align, bool zero)
+ {
+ assert((align >= 1U) && ((align & (align - 1U)) == 0U));
+ void *p = alloc(nSize + align - 1U, zero);
+ p = (void *)( ((uintptr_t)p + (align - 1U)) & ~((uintptr_t)(align - 1U)) );
+ return p;
+ }
+
+ void jpeg_decoder::word_clear(void* p, uint16 c, uint n)
+ {
+ uint8* pD = (uint8*)p;
+ const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF;
+ while (n)
+ {
+ pD[0] = l;
+ pD[1] = h;
+ pD += 2;
+ n--;
+ }
+ }
+
+ // Refill the input buffer.
+ // This method will sit in a loop until (A) the buffer is full or (B)
+ // the stream's read() method reports and end of file condition.
+ void jpeg_decoder::prep_in_buffer()
+ {
+ m_in_buf_left = 0;
+ m_pIn_buf_ofs = m_in_buf;
+
+ if (m_eof_flag)
+ return;
+
+ do
+ {
+ int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
+ if (bytes_read == -1)
+ stop_decoding(JPGD_STREAM_READ);
+
+ m_in_buf_left += bytes_read;
+ } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
+
+ m_total_bytes_read += m_in_buf_left;
+
+ // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
+ // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
+ word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
+ }
+
+ // Read a Huffman code table.
+ void jpeg_decoder::read_dht_marker()
+ {
+ int i, index, count;
+ uint8 huff_num[17];
+ uint8 huff_val[256];
+
+ uint num_left = get_bits(16);
+
+ if (num_left < 2)
+ stop_decoding(JPGD_BAD_DHT_MARKER);
+
+ num_left -= 2;
+
+ while (num_left)
+ {
+ index = get_bits(8);
+
+ huff_num[0] = 0;
+
+ count = 0;
+
+ for (i = 1; i <= 16; i++)
+ {
+ huff_num[i] = static_cast<uint8>(get_bits(8));
+ count += huff_num[i];
+ }
+
+ if (count > 255)
+ stop_decoding(JPGD_BAD_DHT_COUNTS);
+
+ bool symbol_present[256];
+ memset(symbol_present, 0, sizeof(symbol_present));
- i = 1 + 16 + count;
+ for (i = 0; i < count; i++)
+ {
+ const int s = get_bits(8);
+
+ // Check for obviously bogus tables.
+ if (symbol_present[s])
+ stop_decoding(JPGD_BAD_DHT_COUNTS);
+
+ huff_val[i] = static_cast<uint8_t>(s);
+ symbol_present[s] = true;
+ }
- if (num_left < (uint)i)
- stop_decoding(JPGD_BAD_DHT_MARKER);
+ i = 1 + 16 + count;
- num_left -= i;
+ if (num_left < (uint)i)
+ stop_decoding(JPGD_BAD_DHT_MARKER);
- if ((index & 0x10) > 0x10)
- stop_decoding(JPGD_BAD_DHT_INDEX);
+ num_left -= i;
- index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
+ if ((index & 0x10) > 0x10)
+ stop_decoding(JPGD_BAD_DHT_INDEX);
- if (index >= JPGD_MAX_HUFF_TABLES)
- stop_decoding(JPGD_BAD_DHT_INDEX);
+ index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
- if (!m_huff_num[index])
- m_huff_num[index] = (uint8 *)alloc(17);
+ if (index >= JPGD_MAX_HUFF_TABLES)
+ stop_decoding(JPGD_BAD_DHT_INDEX);
- if (!m_huff_val[index])
- m_huff_val[index] = (uint8 *)alloc(256);
+ if (!m_huff_num[index])
+ m_huff_num[index] = (uint8*)alloc(17);
- m_huff_ac[index] = (index & 0x10) != 0;
- memcpy(m_huff_num[index], huff_num, 17);
- memcpy(m_huff_val[index], huff_val, 256);
- }
-}
+ if (!m_huff_val[index])
+ m_huff_val[index] = (uint8*)alloc(256);
-// Read a quantization table.
-void jpeg_decoder::read_dqt_marker()
-{
- int n, i, prec;
- uint num_left;
- uint temp;
+ m_huff_ac[index] = (index & 0x10) != 0;
+ memcpy(m_huff_num[index], huff_num, 17);
+ memcpy(m_huff_val[index], huff_val, 256);
+ }
+ }
- num_left = get_bits(16);
+ // Read a quantization table.
+ void jpeg_decoder::read_dqt_marker()
+ {
+ int n, i, prec;
+ uint num_left;
+ uint temp;
- if (num_left < 2)
- stop_decoding(JPGD_BAD_DQT_MARKER);
+ num_left = get_bits(16);
- num_left -= 2;
+ if (num_left < 2)
+ stop_decoding(JPGD_BAD_DQT_MARKER);
- while (num_left)
- {
- n = get_bits(8);
- prec = n >> 4;
- n &= 0x0F;
+ num_left -= 2;
- if (n >= JPGD_MAX_QUANT_TABLES)
- stop_decoding(JPGD_BAD_DQT_TABLE);
+ while (num_left)
+ {
+ n = get_bits(8);
+ prec = n >> 4;
+ n &= 0x0F;
- if (!m_quant[n])
- m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t));
+ if (n >= JPGD_MAX_QUANT_TABLES)
+ stop_decoding(JPGD_BAD_DQT_TABLE);
- // read quantization entries, in zag order
- for (i = 0; i < 64; i++)
- {
- temp = get_bits(8);
+ if (!m_quant[n])
+ m_quant[n] = (jpgd_quant_t*)alloc(64 * sizeof(jpgd_quant_t));
- if (prec)
- temp = (temp << 8) + get_bits(8);
+ // read quantization entries, in zag order
+ for (i = 0; i < 64; i++)
+ {
+ temp = get_bits(8);
- m_quant[n][i] = static_cast<jpgd_quant_t>(temp);
- }
+ if (prec)
+ temp = (temp << 8) + get_bits(8);
- i = 64 + 1;
+ m_quant[n][i] = static_cast<jpgd_quant_t>(temp);
+ }
- if (prec)
- i += 64;
+ i = 64 + 1;
- if (num_left < (uint)i)
- stop_decoding(JPGD_BAD_DQT_LENGTH);
+ if (prec)
+ i += 64;
- num_left -= i;
- }
-}
+ if (num_left < (uint)i)
+ stop_decoding(JPGD_BAD_DQT_LENGTH);
-// Read the start of frame (SOF) marker.
-void jpeg_decoder::read_sof_marker()
-{
- int i;
- uint num_left;
+ num_left -= i;
+ }
+ }
- num_left = get_bits(16);
+ // Read the start of frame (SOF) marker.
+ void jpeg_decoder::read_sof_marker()
+ {
+ int i;
+ uint num_left;
- if (get_bits(8) != 8) /* precision: sorry, only 8-bit precision is supported right now */
- stop_decoding(JPGD_BAD_PRECISION);
+ num_left = get_bits(16);
- m_image_y_size = get_bits(16);
+ /* precision: sorry, only 8-bit precision is supported */
+ if (get_bits(8) != 8)
+ stop_decoding(JPGD_BAD_PRECISION);
- if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
- stop_decoding(JPGD_BAD_HEIGHT);
+ m_image_y_size = get_bits(16);
- m_image_x_size = get_bits(16);
+ if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
+ stop_decoding(JPGD_BAD_HEIGHT);
- if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
- stop_decoding(JPGD_BAD_WIDTH);
+ m_image_x_size = get_bits(16);
- m_comps_in_frame = get_bits(8);
+ if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
+ stop_decoding(JPGD_BAD_WIDTH);
- if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
- stop_decoding(JPGD_TOO_MANY_COMPONENTS);
+ m_comps_in_frame = get_bits(8);
- if (num_left != (uint)(m_comps_in_frame * 3 + 8))
- stop_decoding(JPGD_BAD_SOF_LENGTH);
+ if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
+ stop_decoding(JPGD_TOO_MANY_COMPONENTS);
- for (i = 0; i < m_comps_in_frame; i++)
- {
- m_comp_ident[i] = get_bits(8);
- m_comp_h_samp[i] = get_bits(4);
- m_comp_v_samp[i] = get_bits(4);
- m_comp_quant[i] = get_bits(8);
- }
-}
+ if (num_left != (uint)(m_comps_in_frame * 3 + 8))
+ stop_decoding(JPGD_BAD_SOF_LENGTH);
-// Used to skip unrecognized markers.
-void jpeg_decoder::skip_variable_marker()
-{
- uint num_left;
+ for (i = 0; i < m_comps_in_frame; i++)
+ {
+ m_comp_ident[i] = get_bits(8);
+ m_comp_h_samp[i] = get_bits(4);
+ m_comp_v_samp[i] = get_bits(4);
- num_left = get_bits(16);
+ if (!m_comp_h_samp[i] || !m_comp_v_samp[i] || (m_comp_h_samp[i] > 2) || (m_comp_v_samp[i] > 2))
+ stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
- if (num_left < 2)
- stop_decoding(JPGD_BAD_VARIABLE_MARKER);
+ m_comp_quant[i] = get_bits(8);
+ if (m_comp_quant[i] >= JPGD_MAX_QUANT_TABLES)
+ stop_decoding(JPGD_DECODE_ERROR);
+ }
+ }
+
+ // Used to skip unrecognized markers.
+ void jpeg_decoder::skip_variable_marker()
+ {
+ uint num_left;
+
+ num_left = get_bits(16);
+
+ if (num_left < 2)
+ stop_decoding(JPGD_BAD_VARIABLE_MARKER);
+
+ num_left -= 2;
+
+ while (num_left)
+ {
+ get_bits(8);
+ num_left--;
+ }
+ }
+
+ // Read a define restart interval (DRI) marker.
+ void jpeg_decoder::read_dri_marker()
+ {
+ if (get_bits(16) != 4)
+ stop_decoding(JPGD_BAD_DRI_LENGTH);
+
+ m_restart_interval = get_bits(16);
+ }
+
+ // Read a start of scan (SOS) marker.
+ void jpeg_decoder::read_sos_marker()
+ {
+ uint num_left;
+ int i, ci, n, c, cc;
+
+ num_left = get_bits(16);
+
+ n = get_bits(8);
+
+ m_comps_in_scan = n;
+
+ num_left -= 3;
+
+ if ((num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN))
+ stop_decoding(JPGD_BAD_SOS_LENGTH);
+
+ for (i = 0; i < n; i++)
+ {
+ cc = get_bits(8);
+ c = get_bits(8);
+ num_left -= 2;
+
+ for (ci = 0; ci < m_comps_in_frame; ci++)
+ if (cc == m_comp_ident[ci])
+ break;
+
+ if (ci >= m_comps_in_frame)
+ stop_decoding(JPGD_BAD_SOS_COMP_ID);
+
+ if (ci >= JPGD_MAX_COMPONENTS)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ m_comp_list[i] = ci;
+
+ m_comp_dc_tab[ci] = (c >> 4) & 15;
+ m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
+
+ if (m_comp_dc_tab[ci] >= JPGD_MAX_HUFF_TABLES)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ if (m_comp_ac_tab[ci] >= JPGD_MAX_HUFF_TABLES)
+ stop_decoding(JPGD_DECODE_ERROR);
+ }
+
+ m_spectral_start = get_bits(8);
+ m_spectral_end = get_bits(8);
+ m_successive_high = get_bits(4);
+ m_successive_low = get_bits(4);
+
+ if (!m_progressive_flag)
+ {
+ m_spectral_start = 0;
+ m_spectral_end = 63;
+ }
+
+ num_left -= 3;
+
+ /* read past whatever is num_left */
+ while (num_left)
+ {
+ get_bits(8);
+ num_left--;
+ }
+ }
+
+ // Finds the next marker.
+ int jpeg_decoder::next_marker()
+ {
+ uint c, bytes;
+
+ bytes = 0;
+
+ do
+ {
+ do
+ {
+ bytes++;
+ c = get_bits(8);
+ } while (c != 0xFF);
- num_left -= 2;
+ do
+ {
+ c = get_bits(8);
+ } while (c == 0xFF);
+
+ } while (c == 0);
+
+ // If bytes > 0 here, there where extra bytes before the marker (not good).
+
+ return c;
+ }
+
+ // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
+ // encountered.
+ int jpeg_decoder::process_markers()
+ {
+ int c;
+
+ for (; ; )
+ {
+ c = next_marker();
+
+ switch (c)
+ {
+ case M_SOF0:
+ case M_SOF1:
+ case M_SOF2:
+ case M_SOF3:
+ case M_SOF5:
+ case M_SOF6:
+ case M_SOF7:
+ // case M_JPG:
+ case M_SOF9:
+ case M_SOF10:
+ case M_SOF11:
+ case M_SOF13:
+ case M_SOF14:
+ case M_SOF15:
+ case M_SOI:
+ case M_EOI:
+ case M_SOS:
+ {
+ return c;
+ }
+ case M_DHT:
+ {
+ read_dht_marker();
+ break;
+ }
+ // No arithmitic support - dumb patents!
+ case M_DAC:
+ {
+ stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
+ break;
+ }
+ case M_DQT:
+ {
+ read_dqt_marker();
+ break;
+ }
+ case M_DRI:
+ {
+ read_dri_marker();
+ break;
+ }
+ //case M_APP0: /* no need to read the JFIF marker */
+ case M_JPG:
+ case M_RST0: /* no parameters */
+ case M_RST1:
+ case M_RST2:
+ case M_RST3:
+ case M_RST4:
+ case M_RST5:
+ case M_RST6:
+ case M_RST7:
+ case M_TEM:
+ {
+ stop_decoding(JPGD_UNEXPECTED_MARKER);
+ break;
+ }
+ default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
+ {
+ skip_variable_marker();
+ break;
+ }
+ }
+ }
+ }
+
+ // Finds the start of image (SOI) marker.
+ void jpeg_decoder::locate_soi_marker()
+ {
+ uint lastchar, thischar;
+ uint bytesleft;
+
+ lastchar = get_bits(8);
+
+ thischar = get_bits(8);
+
+ /* ok if it's a normal JPEG file without a special header */
+
+ if ((lastchar == 0xFF) && (thischar == M_SOI))
+ return;
+
+ bytesleft = 4096;
+
+ for (; ; )
+ {
+ if (--bytesleft == 0)
+ stop_decoding(JPGD_NOT_JPEG);
+
+ lastchar = thischar;
+
+ thischar = get_bits(8);
+
+ if (lastchar == 0xFF)
+ {
+ if (thischar == M_SOI)
+ break;
+ else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
+ stop_decoding(JPGD_NOT_JPEG);
+ }
+ }
+
+ // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
+ thischar = (m_bit_buf >> 24) & 0xFF;
+
+ if (thischar != 0xFF)
+ stop_decoding(JPGD_NOT_JPEG);
+ }
+
+ // Find a start of frame (SOF) marker.
+ void jpeg_decoder::locate_sof_marker()
+ {
+ locate_soi_marker();
+
+ int c = process_markers();
+
+ switch (c)
+ {
+ case M_SOF2:
+ {
+ m_progressive_flag = JPGD_TRUE;
+ read_sof_marker();
+ break;
+ }
+ case M_SOF0: /* baseline DCT */
+ case M_SOF1: /* extended sequential DCT */
+ {
+ read_sof_marker();
+ break;
+ }
+ case M_SOF9: /* Arithmitic coding */
+ {
+ stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
+ break;
+ }
+ default:
+ {
+ stop_decoding(JPGD_UNSUPPORTED_MARKER);
+ break;
+ }
+ }
+ }
- while (num_left)
- {
- get_bits(8);
- num_left--;
- }
-}
+ // Find a start of scan (SOS) marker.
+ int jpeg_decoder::locate_sos_marker()
+ {
+ int c;
-// Read a define restart interval (DRI) marker.
-void jpeg_decoder::read_dri_marker()
-{
- if (get_bits(16) != 4)
- stop_decoding(JPGD_BAD_DRI_LENGTH);
+ c = process_markers();
- m_restart_interval = get_bits(16);
-}
+ if (c == M_EOI)
+ return JPGD_FALSE;
+ else if (c != M_SOS)
+ stop_decoding(JPGD_UNEXPECTED_MARKER);
-// Read a start of scan (SOS) marker.
-void jpeg_decoder::read_sos_marker()
-{
- uint num_left;
- int i, ci, n, c, cc;
+ read_sos_marker();
- num_left = get_bits(16);
+ return JPGD_TRUE;
+ }
- n = get_bits(8);
-
- m_comps_in_scan = n;
-
- num_left -= 3;
-
- if ( (num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
- stop_decoding(JPGD_BAD_SOS_LENGTH);
-
- for (i = 0; i < n; i++)
- {
- cc = get_bits(8);
- c = get_bits(8);
- num_left -= 2;
-
- for (ci = 0; ci < m_comps_in_frame; ci++)
- if (cc == m_comp_ident[ci])
- break;
-
- if (ci >= m_comps_in_frame)
- stop_decoding(JPGD_BAD_SOS_COMP_ID);
-
- m_comp_list[i] = ci;
- m_comp_dc_tab[ci] = (c >> 4) & 15;
- m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
- }
-
- m_spectral_start = get_bits(8);
- m_spectral_end = get_bits(8);
- m_successive_high = get_bits(4);
- m_successive_low = get_bits(4);
-
- if (!m_progressive_flag)
- {
- m_spectral_start = 0;
- m_spectral_end = 63;
- }
-
- num_left -= 3;
-
- while (num_left) /* read past whatever is num_left */
- {
- get_bits(8);
- num_left--;
- }
-}
-
-// Finds the next marker.
-int jpeg_decoder::next_marker()
-{
- uint c, bytes;
-
- bytes = 0;
-
- do
- {
- do
- {
- bytes++;
- c = get_bits(8);
- } while (c != 0xFF);
-
- do
- {
- c = get_bits(8);
- } while (c == 0xFF);
-
- } while (c == 0);
-
- // If bytes > 0 here, there where extra bytes before the marker (not good).
-
- return c;
-}
-
-// Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
-// encountered.
-int jpeg_decoder::process_markers()
-{
- int c;
-
- for ( ; ; )
- {
- c = next_marker();
-
- switch (c)
- {
- case M_SOF0:
- case M_SOF1:
- case M_SOF2:
- case M_SOF3:
- case M_SOF5:
- case M_SOF6:
- case M_SOF7:
-// case M_JPG:
- case M_SOF9:
- case M_SOF10:
- case M_SOF11:
- case M_SOF13:
- case M_SOF14:
- case M_SOF15:
- case M_SOI:
- case M_EOI:
- case M_SOS:
- {
- return c;
- }
- case M_DHT:
- {
- read_dht_marker();
- break;
- }
- // No arithmitic support - dumb patents!
- case M_DAC:
- {
- stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
- break;
- }
- case M_DQT:
- {
- read_dqt_marker();
- break;
- }
- case M_DRI:
- {
- read_dri_marker();
- break;
- }
- //case M_APP0: /* no need to read the JFIF marker */
-
- case M_JPG:
- case M_RST0: /* no parameters */
- case M_RST1:
- case M_RST2:
- case M_RST3:
- case M_RST4:
- case M_RST5:
- case M_RST6:
- case M_RST7:
- case M_TEM:
- {
- stop_decoding(JPGD_UNEXPECTED_MARKER);
- break;
- }
- default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
- {
- skip_variable_marker();
- break;
- }
- }
- }
-}
-
-// Finds the start of image (SOI) marker.
-// This code is rather defensive: it only checks the first 512 bytes to avoid
-// false positives.
-void jpeg_decoder::locate_soi_marker()
-{
- uint lastchar, thischar;
- uint bytesleft;
-
- lastchar = get_bits(8);
-
- thischar = get_bits(8);
-
- /* ok if it's a normal JPEG file without a special header */
-
- if ((lastchar == 0xFF) && (thischar == M_SOI))
- return;
-
- bytesleft = 4096; //512;
-
- for ( ; ; )
- {
- if (--bytesleft == 0)
- stop_decoding(JPGD_NOT_JPEG);
-
- lastchar = thischar;
-
- thischar = get_bits(8);
-
- if (lastchar == 0xFF)
- {
- if (thischar == M_SOI)
- break;
- else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
- stop_decoding(JPGD_NOT_JPEG);
- }
- }
-
- // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
- thischar = (m_bit_buf >> 24) & 0xFF;
-
- if (thischar != 0xFF)
- stop_decoding(JPGD_NOT_JPEG);
-}
-
-// Find a start of frame (SOF) marker.
-void jpeg_decoder::locate_sof_marker()
-{
- locate_soi_marker();
-
- int c = process_markers();
-
- switch (c)
- {
- case M_SOF2:
- m_progressive_flag = JPGD_TRUE;
- case M_SOF0: /* baseline DCT */
- case M_SOF1: /* extended sequential DCT */
- {
- read_sof_marker();
- break;
- }
- case M_SOF9: /* Arithmitic coding */
- {
- stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
- break;
- }
- default:
- {
- stop_decoding(JPGD_UNSUPPORTED_MARKER);
- break;
- }
- }
-}
-
-// Find a start of scan (SOS) marker.
-int jpeg_decoder::locate_sos_marker()
-{
- int c;
-
- c = process_markers();
-
- if (c == M_EOI)
- return JPGD_FALSE;
- else if (c != M_SOS)
- stop_decoding(JPGD_UNEXPECTED_MARKER);
-
- read_sos_marker();
-
- return JPGD_TRUE;
-}
-
-// Reset everything to default/uninitialized state.
-void jpeg_decoder::init(jpeg_decoder_stream *pStream)
-{
- m_pMem_blocks = NULL;
- m_error_code = JPGD_SUCCESS;
- m_ready_flag = false;
- m_image_x_size = m_image_y_size = 0;
- m_pStream = pStream;
- m_progressive_flag = JPGD_FALSE;
-
- memset(m_huff_ac, 0, sizeof(m_huff_ac));
- memset(m_huff_num, 0, sizeof(m_huff_num));
- memset(m_huff_val, 0, sizeof(m_huff_val));
- memset(m_quant, 0, sizeof(m_quant));
-
- m_scan_type = 0;
- m_comps_in_frame = 0;
-
- memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
- memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
- memset(m_comp_quant, 0, sizeof(m_comp_quant));
- memset(m_comp_ident, 0, sizeof(m_comp_ident));
- memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
- memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
-
- m_comps_in_scan = 0;
- memset(m_comp_list, 0, sizeof(m_comp_list));
- memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
- memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
-
- m_spectral_start = 0;
- m_spectral_end = 0;
- m_successive_low = 0;
- m_successive_high = 0;
- m_max_mcu_x_size = 0;
- m_max_mcu_y_size = 0;
- m_blocks_per_mcu = 0;
- m_max_blocks_per_row = 0;
- m_mcus_per_row = 0;
- m_mcus_per_col = 0;
- m_expanded_blocks_per_component = 0;
- m_expanded_blocks_per_mcu = 0;
- m_expanded_blocks_per_row = 0;
- m_freq_domain_chroma_upsample = false;
-
- memset(m_mcu_org, 0, sizeof(m_mcu_org));
-
- m_total_lines_left = 0;
- m_mcu_lines_left = 0;
- m_real_dest_bytes_per_scan_line = 0;
- m_dest_bytes_per_scan_line = 0;
- m_dest_bytes_per_pixel = 0;
-
- memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
-
- memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
- memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
- memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
-
- m_eob_run = 0;
-
- memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
-
- m_pIn_buf_ofs = m_in_buf;
- m_in_buf_left = 0;
- m_eof_flag = false;
- m_tem_flag = 0;
-
- memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
- memset(m_in_buf, 0, sizeof(m_in_buf));
- memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
-
- m_restart_interval = 0;
- m_restarts_left = 0;
- m_next_restart_num = 0;
-
- m_max_mcus_per_row = 0;
- m_max_blocks_per_mcu = 0;
- m_max_mcus_per_col = 0;
-
- memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
- m_pMCU_coefficients = NULL;
- m_pSample_buf = NULL;
-
- m_total_bytes_read = 0;
-
- m_pScan_line_0 = NULL;
- m_pScan_line_1 = NULL;
-
- // Ready the input buffer.
- prep_in_buffer();
-
- // Prime the bit buffer.
- m_bits_left = 16;
- m_bit_buf = 0;
-
- get_bits(16);
- get_bits(16);
-
- for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
- m_mcu_block_max_zag[i] = 64;
-}
+ // Reset everything to default/uninitialized state.
+ void jpeg_decoder::init(jpeg_decoder_stream* pStream, uint32_t flags)
+ {
+ m_flags = flags;
+ m_pMem_blocks = nullptr;
+ m_error_code = JPGD_SUCCESS;
+ m_ready_flag = false;
+ m_image_x_size = m_image_y_size = 0;
+ m_pStream = pStream;
+ m_progressive_flag = JPGD_FALSE;
+
+ memset(m_huff_ac, 0, sizeof(m_huff_ac));
+ memset(m_huff_num, 0, sizeof(m_huff_num));
+ memset(m_huff_val, 0, sizeof(m_huff_val));
+ memset(m_quant, 0, sizeof(m_quant));
+
+ m_scan_type = 0;
+ m_comps_in_frame = 0;
+
+ memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
+ memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
+ memset(m_comp_quant, 0, sizeof(m_comp_quant));
+ memset(m_comp_ident, 0, sizeof(m_comp_ident));
+ memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
+ memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
+
+ m_comps_in_scan = 0;
+ memset(m_comp_list, 0, sizeof(m_comp_list));
+ memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
+ memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
+
+ m_spectral_start = 0;
+ m_spectral_end = 0;
+ m_successive_low = 0;
+ m_successive_high = 0;
+ m_max_mcu_x_size = 0;
+ m_max_mcu_y_size = 0;
+ m_blocks_per_mcu = 0;
+ m_max_blocks_per_row = 0;
+ m_mcus_per_row = 0;
+ m_mcus_per_col = 0;
+
+ memset(m_mcu_org, 0, sizeof(m_mcu_org));
+
+ m_total_lines_left = 0;
+ m_mcu_lines_left = 0;
+ m_num_buffered_scanlines = 0;
+ m_real_dest_bytes_per_scan_line = 0;
+ m_dest_bytes_per_scan_line = 0;
+ m_dest_bytes_per_pixel = 0;
+
+ memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
+
+ memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
+ memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
+ memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
+
+ m_eob_run = 0;
+
+ m_pIn_buf_ofs = m_in_buf;
+ m_in_buf_left = 0;
+ m_eof_flag = false;
+ m_tem_flag = 0;
+
+ memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
+ memset(m_in_buf, 0, sizeof(m_in_buf));
+ memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
+
+ m_restart_interval = 0;
+ m_restarts_left = 0;
+ m_next_restart_num = 0;
+
+ m_max_mcus_per_row = 0;
+ m_max_blocks_per_mcu = 0;
+ m_max_mcus_per_col = 0;
+
+ memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
+ m_pMCU_coefficients = nullptr;
+ m_pSample_buf = nullptr;
+ m_pSample_buf_prev = nullptr;
+ m_sample_buf_prev_valid = false;
+
+ m_total_bytes_read = 0;
+
+ m_pScan_line_0 = nullptr;
+ m_pScan_line_1 = nullptr;
+
+ // Ready the input buffer.
+ prep_in_buffer();
+
+ // Prime the bit buffer.
+ m_bits_left = 16;
+ m_bit_buf = 0;
+
+ get_bits(16);
+ get_bits(16);
+
+ for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
+ m_mcu_block_max_zag[i] = 64;
+
+ m_has_sse2 = false;
+
+#if JPGD_USE_SSE2
+#ifdef _MSC_VER
+ int cpu_info[4];
+ __cpuid(cpu_info, 1);
+ const int cpu_info3 = cpu_info[3];
+ m_has_sse2 = ((cpu_info3 >> 26U) & 1U) != 0U;
+#else
+ m_has_sse2 = true;
+#endif
+#endif
+ }
#define SCALEBITS 16
#define ONE_HALF ((int) 1 << (SCALEBITS-1))
#define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5f))
-// Create a few tables that allow us to quickly convert YCbCr to RGB.
-void jpeg_decoder::create_look_ups()
-{
- for (int i = 0; i <= 255; i++)
- {
- int k = i - 128;
- m_crr[i] = ( FIX(1.40200f) * k + ONE_HALF) >> SCALEBITS;
- m_cbb[i] = ( FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS;
- m_crg[i] = (-FIX(0.71414f)) * k;
- m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
- }
-}
-
-// This method throws back into the stream any bytes that where read
-// into the bit buffer during initial marker scanning.
-void jpeg_decoder::fix_in_buffer()
-{
- // In case any 0xFF's where pulled into the buffer during marker scanning.
- JPGD_ASSERT((m_bits_left & 7) == 0);
-
- if (m_bits_left == 16)
- stuff_char( (uint8)(m_bit_buf & 0xFF));
-
- if (m_bits_left >= 8)
- stuff_char( (uint8)((m_bit_buf >> 8) & 0xFF));
-
- stuff_char((uint8)((m_bit_buf >> 16) & 0xFF));
- stuff_char((uint8)((m_bit_buf >> 24) & 0xFF));
-
- m_bits_left = 16;
- get_bits_no_markers(16);
- get_bits_no_markers(16);
-}
-
-void jpeg_decoder::transform_mcu(int mcu_row)
-{
- jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
- if (m_freq_domain_chroma_upsample) {
- JPGD_ASSERT(mcu_row * m_blocks_per_mcu < m_expanded_blocks_per_row);
- }
- else {
- JPGD_ASSERT(mcu_row * m_blocks_per_mcu < m_max_blocks_per_row);
- }
- uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
-
- for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
- {
- idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
- pSrc_ptr += 64;
- pDst_ptr += 64;
- }
-}
-
-static const uint8 s_max_rc[64] =
-{
- 17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
- 102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
- 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
- 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
-};
-
-void jpeg_decoder::transform_mcu_expand(int mcu_row)
-{
- jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
- uint8* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
-
- // Y IDCT
- int mcu_block;
- for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
- {
- idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
- pSrc_ptr += 64;
- pDst_ptr += 64;
- }
-
- // Chroma IDCT, with upsampling
- jpgd_block_t temp_block[64];
-
- for (int i = 0; i < 2; i++)
- {
- DCT_Upsample::Matrix44 P, Q, R, S;
-
- JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1);
- JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64);
-
- int max_zag = m_mcu_block_max_zag[mcu_block++] - 1;
- if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
- switch (s_max_rc[max_zag])
- {
- case 1*16+1:
- DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr);
- break;
- case 1*16+2:
- DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr);
- break;
- case 2*16+2:
- DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr);
- break;
- case 3*16+2:
- DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr);
- break;
- case 3*16+3:
- DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr);
- break;
- case 3*16+4:
- DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr);
- break;
- case 4*16+4:
- DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr);
- break;
- case 5*16+4:
- DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr);
- break;
- case 5*16+5:
- DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr);
- break;
- case 5*16+6:
- DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr);
- break;
- case 6*16+6:
- DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr);
- break;
- case 7*16+6:
- DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr);
- break;
- case 7*16+7:
- DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr);
- break;
- case 7*16+8:
- DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr);
- break;
- case 8*16+8:
- DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr);
- DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr);
- break;
- default:
- JPGD_ASSERT(false);
- }
-
- DCT_Upsample::Matrix44 a(P + Q); P -= Q;
- DCT_Upsample::Matrix44& b = P;
- DCT_Upsample::Matrix44 c(R + S); R -= S;
- DCT_Upsample::Matrix44& d = R;
-
- DCT_Upsample::Matrix44::add_and_store(temp_block, a, c);
- idct_4x4(temp_block, pDst_ptr);
- pDst_ptr += 64;
-
- DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c);
- idct_4x4(temp_block, pDst_ptr);
- pDst_ptr += 64;
-
- DCT_Upsample::Matrix44::add_and_store(temp_block, b, d);
- idct_4x4(temp_block, pDst_ptr);
- pDst_ptr += 64;
-
- DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d);
- idct_4x4(temp_block, pDst_ptr);
- pDst_ptr += 64;
-
- pSrc_ptr += 64;
- }
-}
-
-// Loads and dequantizes the next row of (already decoded) coefficients.
-// Progressive images only.
-void jpeg_decoder::load_next_row()
-{
- int i;
- jpgd_block_t *p;
- jpgd_quant_t *q;
- int mcu_row, mcu_block, row_block = 0;
- int component_num, component_id;
- int block_x_mcu[JPGD_MAX_COMPONENTS];
-
- memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
-
- for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
- {
- int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
-
- for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
- {
- component_id = m_mcu_org[mcu_block];
- JPGD_ASSERT(m_comp_quant[component_id] < JPGD_MAX_QUANT_TABLES);
- q = m_quant[m_comp_quant[component_id]];
-
- p = m_pMCU_coefficients + 64 * mcu_block;
-
- jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
- jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
- p[0] = pDC[0];
- memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t));
-
- for (i = 63; i > 0; i--)
- if (p[g_ZAG[i]])
- break;
-
- m_mcu_block_max_zag[mcu_block] = i + 1;
-
- for ( ; i >= 0; i--)
- if (p[g_ZAG[i]])
- p[g_ZAG[i]] = static_cast<jpgd_block_t>(p[g_ZAG[i]] * q[i]);
-
- row_block++;
-
- if (m_comps_in_scan == 1)
- block_x_mcu[component_id]++;
- else
- {
- if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
- {
- block_x_mcu_ofs = 0;
-
- if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
- {
- block_y_mcu_ofs = 0;
-
- block_x_mcu[component_id] += m_comp_h_samp[component_id];
- }
- }
- }
- }
-
- if (m_freq_domain_chroma_upsample)
- transform_mcu_expand(mcu_row);
- else
- transform_mcu(mcu_row);
- }
-
- if (m_comps_in_scan == 1)
- m_block_y_mcu[m_comp_list[0]]++;
- else
- {
- for (component_num = 0; component_num < m_comps_in_scan; component_num++)
- {
- component_id = m_comp_list[component_num];
-
- m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
- }
- }
-}
-
-// Restart interval processing.
-void jpeg_decoder::process_restart()
-{
- int i;
- int c = 0;
-
- // Align to a byte boundry
- // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
- //get_bits_no_markers(m_bits_left & 7);
-
- // Let's scan a little bit to find the marker, but not _too_ far.
- // 1536 is a "fudge factor" that determines how much to scan.
- for (i = 1536; i > 0; i--)
- if (get_char() == 0xFF)
- break;
-
- if (i == 0)
- stop_decoding(JPGD_BAD_RESTART_MARKER);
-
- for ( ; i > 0; i--)
- if ((c = get_char()) != 0xFF)
- break;
-
- if (i == 0)
- stop_decoding(JPGD_BAD_RESTART_MARKER);
-
- // Is it the expected marker? If not, something bad happened.
- if (c != (m_next_restart_num + M_RST0))
- stop_decoding(JPGD_BAD_RESTART_MARKER);
-
- // Reset each component's DC prediction values.
- memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
-
- m_eob_run = 0;
-
- m_restarts_left = m_restart_interval;
-
- m_next_restart_num = (m_next_restart_num + 1) & 7;
-
- // Get the bit buffer going again...
-
- m_bits_left = 16;
- get_bits_no_markers(16);
- get_bits_no_markers(16);
-}
-
-static inline int dequantize_ac(int c, int q) { c *= q; return c; }
-
-// Decodes and dequantizes the next row of coefficients.
-void jpeg_decoder::decode_next_row()
-{
- int row_block = 0;
-
- for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
- {
- if ((m_restart_interval) && (m_restarts_left == 0))
- process_restart();
-
- jpgd_block_t* p = m_pMCU_coefficients;
- for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
- {
- int component_id = m_mcu_org[mcu_block];
- JPGD_ASSERT(m_comp_quant[component_id] < JPGD_MAX_QUANT_TABLES);
- jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
-
- int r, s;
- s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
- s = JPGD_HUFF_EXTEND(r, s);
-
- m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
-
- p[0] = static_cast<jpgd_block_t>(s * q[0]);
-
- int prev_num_set = m_mcu_block_max_zag[mcu_block];
-
- huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
-
- int k;
- for (k = 1; k < 64; k++)
- {
- int extra_bits;
- s = huff_decode(pH, extra_bits);
-
- r = s >> 4;
- s &= 15;
-
- if (s)
- {
- if (r)
- {
- if ((k + r) > 63)
- stop_decoding(JPGD_DECODE_ERROR);
-
- if (k < prev_num_set)
- {
- int n = JPGD_MIN(r, prev_num_set - k);
- int kt = k;
- while (n--)
- p[g_ZAG[kt++]] = 0;
- }
-
- k += r;
- }
-
- s = JPGD_HUFF_EXTEND(extra_bits, s);
-
- JPGD_ASSERT(k < 64);
-
- p[g_ZAG[k]] = static_cast<jpgd_block_t>(dequantize_ac(s, q[k])); //s * q[k];
- }
- else
- {
- if (r == 15)
- {
- if ((k + 16) > 64)
- stop_decoding(JPGD_DECODE_ERROR);
-
- if (k < prev_num_set)
- {
- int n = JPGD_MIN(16, prev_num_set - k);
- int kt = k;
- while (n--)
- {
- JPGD_ASSERT(kt <= 63);
- p[g_ZAG[kt++]] = 0;
- }
- }
-
- k += 16 - 1; // - 1 because the loop counter is k
- JPGD_ASSERT(p[g_ZAG[k]] == 0);
- }
- else
- break;
- }
- }
-
- if (k < prev_num_set)
- {
- int kt = k;
- while (kt < prev_num_set)
- p[g_ZAG[kt++]] = 0;
- }
-
- m_mcu_block_max_zag[mcu_block] = k;
-
- row_block++;
- }
-
- if (m_freq_domain_chroma_upsample)
- transform_mcu_expand(mcu_row);
- else
- transform_mcu(mcu_row);
-
- m_restarts_left--;
- }
-}
-
-// YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
-void jpeg_decoder::H1V1Convert()
-{
- int row = m_max_mcu_y_size - m_mcu_lines_left;
- uint8 *d = m_pScan_line_0;
- uint8 *s = m_pSample_buf + row * 8;
-
- for (int i = m_max_mcus_per_row; i > 0; i--)
- {
- for (int j = 0; j < 8; j++)
- {
- int y = s[j];
- int cb = s[64+j];
- int cr = s[128+j];
-
- d[0] = clamp(y + m_crr[cr]);
- d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
- d[2] = clamp(y + m_cbb[cb]);
- d[3] = 255;
-
- d += 4;
- }
-
- s += 64*3;
- }
-}
-
-// YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
-void jpeg_decoder::H2V1Convert()
-{
- int row = m_max_mcu_y_size - m_mcu_lines_left;
- uint8 *d0 = m_pScan_line_0;
- uint8 *y = m_pSample_buf + row * 8;
- uint8 *c = m_pSample_buf + 2*64 + row * 8;
-
- for (int i = m_max_mcus_per_row; i > 0; i--)
- {
- for (int l = 0; l < 2; l++)
- {
- for (int j = 0; j < 4; j++)
- {
- int cb = c[0];
- int cr = c[64];
-
- int rc = m_crr[cr];
- int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
- int bc = m_cbb[cb];
-
- int yy = y[j<<1];
- d0[0] = clamp(yy+rc);
- d0[1] = clamp(yy+gc);
- d0[2] = clamp(yy+bc);
- d0[3] = 255;
-
- yy = y[(j<<1)+1];
- d0[4] = clamp(yy+rc);
- d0[5] = clamp(yy+gc);
- d0[6] = clamp(yy+bc);
- d0[7] = 255;
-
- d0 += 8;
-
- c++;
- }
- y += 64;
- }
-
- y += 64*4 - 64*2;
- c += 64*4 - 8;
- }
-}
-
-// YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
-void jpeg_decoder::H1V2Convert()
-{
- int row = m_max_mcu_y_size - m_mcu_lines_left;
- uint8 *d0 = m_pScan_line_0;
- uint8 *d1 = m_pScan_line_1;
- uint8 *y;
- uint8 *c;
-
- if (row < 8)
- y = m_pSample_buf + row * 8;
- else
- y = m_pSample_buf + 64*1 + (row & 7) * 8;
-
- c = m_pSample_buf + 64*2 + (row >> 1) * 8;
-
- for (int i = m_max_mcus_per_row; i > 0; i--)
- {
- for (int j = 0; j < 8; j++)
- {
- int cb = c[0+j];
- int cr = c[64+j];
-
- int rc = m_crr[cr];
- int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
- int bc = m_cbb[cb];
-
- int yy = y[j];
- d0[0] = clamp(yy+rc);
- d0[1] = clamp(yy+gc);
- d0[2] = clamp(yy+bc);
- d0[3] = 255;
-
- yy = y[8+j];
- d1[0] = clamp(yy+rc);
- d1[1] = clamp(yy+gc);
- d1[2] = clamp(yy+bc);
- d1[3] = 255;
-
- d0 += 4;
- d1 += 4;
- }
-
- y += 64*4;
- c += 64*4;
- }
-}
-
-// YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
-void jpeg_decoder::H2V2Convert()
-{
- int row = m_max_mcu_y_size - m_mcu_lines_left;
- uint8 *d0 = m_pScan_line_0;
- uint8 *d1 = m_pScan_line_1;
- uint8 *y;
- uint8 *c;
-
- if (row < 8)
- y = m_pSample_buf + row * 8;
- else
- y = m_pSample_buf + 64*2 + (row & 7) * 8;
-
- c = m_pSample_buf + 64*4 + (row >> 1) * 8;
-
- for (int i = m_max_mcus_per_row; i > 0; i--)
- {
- for (int l = 0; l < 2; l++)
- {
- for (int j = 0; j < 8; j += 2)
- {
- int cb = c[0];
- int cr = c[64];
+ // Create a few tables that allow us to quickly convert YCbCr to RGB.
+ void jpeg_decoder::create_look_ups()
+ {
+ for (int i = 0; i <= 255; i++)
+ {
+ int k = i - 128;
+ m_crr[i] = (FIX(1.40200f) * k + ONE_HALF) >> SCALEBITS;
+ m_cbb[i] = (FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS;
+ m_crg[i] = (-FIX(0.71414f)) * k;
+ m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
+ }
+ }
+
+ // This method throws back into the stream any bytes that where read
+ // into the bit buffer during initial marker scanning.
+ void jpeg_decoder::fix_in_buffer()
+ {
+ // In case any 0xFF's where pulled into the buffer during marker scanning.
+ assert((m_bits_left & 7) == 0);
+
+ if (m_bits_left == 16)
+ stuff_char((uint8)(m_bit_buf & 0xFF));
+
+ if (m_bits_left >= 8)
+ stuff_char((uint8)((m_bit_buf >> 8) & 0xFF));
+
+ stuff_char((uint8)((m_bit_buf >> 16) & 0xFF));
+ stuff_char((uint8)((m_bit_buf >> 24) & 0xFF));
+
+ m_bits_left = 16;
+ get_bits_no_markers(16);
+ get_bits_no_markers(16);
+ }
+
+ void jpeg_decoder::transform_mcu(int mcu_row)
+ {
+ jpgd_block_coeff_t* pSrc_ptr = m_pMCU_coefficients;
+ if (mcu_row * m_blocks_per_mcu >= m_max_blocks_per_row)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
+
+ for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
+ {
+ idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block], ((m_flags & cFlagDisableSIMD) == 0) && m_has_sse2);
+ pSrc_ptr += 64;
+ pDst_ptr += 64;
+ }
+ }
+
+ // Loads and dequantizes the next row of (already decoded) coefficients.
+ // Progressive images only.
+ void jpeg_decoder::load_next_row()
+ {
+ int i;
+ jpgd_block_coeff_t* p;
+ jpgd_quant_t* q;
+ int mcu_row, mcu_block, row_block = 0;
+ int component_num, component_id;
+ int block_x_mcu[JPGD_MAX_COMPONENTS];
+
+ memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
+
+ for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
+ {
+ int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
+
+ for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
+ {
+ component_id = m_mcu_org[mcu_block];
+ if (m_comp_quant[component_id] >= JPGD_MAX_QUANT_TABLES)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ q = m_quant[m_comp_quant[component_id]];
+
+ p = m_pMCU_coefficients + 64 * mcu_block;
+
+ jpgd_block_coeff_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
+ jpgd_block_coeff_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
+ p[0] = pDC[0];
+ memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_coeff_t));
+
+ for (i = 63; i > 0; i--)
+ if (p[g_ZAG[i]])
+ break;
+
+ m_mcu_block_max_zag[mcu_block] = i + 1;
+
+ for (; i >= 0; i--)
+ if (p[g_ZAG[i]])
+ p[g_ZAG[i]] = static_cast<jpgd_block_coeff_t>(p[g_ZAG[i]] * q[i]);
+
+ row_block++;
+
+ if (m_comps_in_scan == 1)
+ block_x_mcu[component_id]++;
+ else
+ {
+ if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
+ {
+ block_x_mcu_ofs = 0;
+
+ if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
+ {
+ block_y_mcu_ofs = 0;
+
+ block_x_mcu[component_id] += m_comp_h_samp[component_id];
+ }
+ }
+ }
+ }
+
+ transform_mcu(mcu_row);
+ }
+
+ if (m_comps_in_scan == 1)
+ m_block_y_mcu[m_comp_list[0]]++;
+ else
+ {
+ for (component_num = 0; component_num < m_comps_in_scan; component_num++)
+ {
+ component_id = m_comp_list[component_num];
+
+ m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
+ }
+ }
+ }
+
+ // Restart interval processing.
+ void jpeg_decoder::process_restart()
+ {
+ int i;
+ int c = 0;
+
+ // Align to a byte boundry
+ // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
+ //get_bits_no_markers(m_bits_left & 7);
+
+ // Let's scan a little bit to find the marker, but not _too_ far.
+ // 1536 is a "fudge factor" that determines how much to scan.
+ for (i = 1536; i > 0; i--)
+ if (get_char() == 0xFF)
+ break;
+
+ if (i == 0)
+ stop_decoding(JPGD_BAD_RESTART_MARKER);
+
+ for (; i > 0; i--)
+ if ((c = get_char()) != 0xFF)
+ break;
+
+ if (i == 0)
+ stop_decoding(JPGD_BAD_RESTART_MARKER);
+
+ // Is it the expected marker? If not, something bad happened.
+ if (c != (m_next_restart_num + M_RST0))
+ stop_decoding(JPGD_BAD_RESTART_MARKER);
+
+ // Reset each component's DC prediction values.
+ memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
+
+ m_eob_run = 0;
+
+ m_restarts_left = m_restart_interval;
+
+ m_next_restart_num = (m_next_restart_num + 1) & 7;
+
+ // Get the bit buffer going again...
+
+ m_bits_left = 16;
+ get_bits_no_markers(16);
+ get_bits_no_markers(16);
+ }
+
+ static inline int dequantize_ac(int c, int q) { c *= q; return c; }
+
+ // Decodes and dequantizes the next row of coefficients.
+ void jpeg_decoder::decode_next_row()
+ {
+ int row_block = 0;
+
+ for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
+ {
+ if ((m_restart_interval) && (m_restarts_left == 0))
+ process_restart();
+
+ jpgd_block_coeff_t* p = m_pMCU_coefficients;
+ for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
+ {
+ int component_id = m_mcu_org[mcu_block];
+ if (m_comp_quant[component_id] >= JPGD_MAX_QUANT_TABLES)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
+
+ int r, s;
+ s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
+ if (s >= 16)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ s = JPGD_HUFF_EXTEND(r, s);
+
+ m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
+
+ p[0] = static_cast<jpgd_block_coeff_t>(s * q[0]);
+
+ int prev_num_set = m_mcu_block_max_zag[mcu_block];
+
+ huff_tables* pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
+
+ int k;
+ for (k = 1; k < 64; k++)
+ {
+ int extra_bits;
+ s = huff_decode(pH, extra_bits);
+
+ r = s >> 4;
+ s &= 15;
+
+ if (s)
+ {
+ if (r)
+ {
+ if ((k + r) > 63)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ if (k < prev_num_set)
+ {
+ int n = JPGD_MIN(r, prev_num_set - k);
+ int kt = k;
+ while (n--)
+ p[g_ZAG[kt++]] = 0;
+ }
+
+ k += r;
+ }
+
+ s = JPGD_HUFF_EXTEND(extra_bits, s);
+
+ if (k >= 64)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ p[g_ZAG[k]] = static_cast<jpgd_block_coeff_t>(dequantize_ac(s, q[k])); //s * q[k];
+ }
+ else
+ {
+ if (r == 15)
+ {
+ if ((k + 16) > 64)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ if (k < prev_num_set)
+ {
+ int n = JPGD_MIN(16, prev_num_set - k);
+ int kt = k;
+ while (n--)
+ {
+ if (kt > 63)
+ stop_decoding(JPGD_DECODE_ERROR);
+ p[g_ZAG[kt++]] = 0;
+ }
+ }
+
+ k += 16 - 1; // - 1 because the loop counter is k
+
+ if (p[g_ZAG[k & 63]] != 0)
+ stop_decoding(JPGD_DECODE_ERROR);
+ }
+ else
+ break;
+ }
+ }
+
+ if (k < prev_num_set)
+ {
+ int kt = k;
+ while (kt < prev_num_set)
+ p[g_ZAG[kt++]] = 0;
+ }
+
+ m_mcu_block_max_zag[mcu_block] = k;
+
+ row_block++;
+ }
+
+ transform_mcu(mcu_row);
+
+ m_restarts_left--;
+ }
+ }
+
+ // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
+ void jpeg_decoder::H1V1Convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8* d = m_pScan_line_0;
+ uint8* s = m_pSample_buf + row * 8;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ int y = s[j];
+ int cb = s[64 + j];
+ int cr = s[128 + j];
+
+ d[0] = clamp(y + m_crr[cr]);
+ d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
+ d[2] = clamp(y + m_cbb[cb]);
+ d[3] = 255;
+
+ d += 4;
+ }
+
+ s += 64 * 3;
+ }
+ }
+
+ // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
+ void jpeg_decoder::H2V1Convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8* d0 = m_pScan_line_0;
+ uint8* y = m_pSample_buf + row * 8;
+ uint8* c = m_pSample_buf + 2 * 64 + row * 8;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ for (int l = 0; l < 2; l++)
+ {
+ for (int j = 0; j < 4; j++)
+ {
+ int cb = c[0];
+ int cr = c[64];
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ int yy = y[j << 1];
+ d0[0] = clamp(yy + rc);
+ d0[1] = clamp(yy + gc);
+ d0[2] = clamp(yy + bc);
+ d0[3] = 255;
+
+ yy = y[(j << 1) + 1];
+ d0[4] = clamp(yy + rc);
+ d0[5] = clamp(yy + gc);
+ d0[6] = clamp(yy + bc);
+ d0[7] = 255;
+
+ d0 += 8;
+
+ c++;
+ }
+ y += 64;
+ }
+
+ y += 64 * 4 - 64 * 2;
+ c += 64 * 4 - 8;
+ }
+ }
+
+ // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
+ void jpeg_decoder::H2V1ConvertFiltered()
+ {
+ const uint BLOCKS_PER_MCU = 4;
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8* d0 = m_pScan_line_0;
+
+ const int half_image_x_size = (m_image_x_size >> 1) - 1;
+ const int row_x8 = row * 8;
+
+ for (int x = 0; x < m_image_x_size; x++)
+ {
+ int y = m_pSample_buf[check_sample_buf_ofs((x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7) + row_x8)];
+
+ int c_x0 = (x - 1) >> 1;
+ int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size);
+ c_x0 = JPGD_MAX(c_x0, 0);
+
+ int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7) + row_x8 + 128;
+ int cb0 = m_pSample_buf[check_sample_buf_ofs(a)];
+ int cr0 = m_pSample_buf[check_sample_buf_ofs(a + 64)];
+
+ int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7) + row_x8 + 128;
+ int cb1 = m_pSample_buf[check_sample_buf_ofs(b)];
+ int cr1 = m_pSample_buf[check_sample_buf_ofs(b + 64)];
+
+ int w0 = (x & 1) ? 3 : 1;
+ int w1 = (x & 1) ? 1 : 3;
+
+ int cb = (cb0 * w0 + cb1 * w1 + 2) >> 2;
+ int cr = (cr0 * w0 + cr1 * w1 + 2) >> 2;
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ d0[0] = clamp(y + rc);
+ d0[1] = clamp(y + gc);
+ d0[2] = clamp(y + bc);
+ d0[3] = 255;
+
+ d0 += 4;
+ }
+ }
+
+ // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
+ void jpeg_decoder::H1V2Convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8* d0 = m_pScan_line_0;
+ uint8* d1 = m_pScan_line_1;
+ uint8* y;
+ uint8* c;
+
+ if (row < 8)
+ y = m_pSample_buf + row * 8;
+ else
+ y = m_pSample_buf + 64 * 1 + (row & 7) * 8;
+
+ c = m_pSample_buf + 64 * 2 + (row >> 1) * 8;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ int cb = c[0 + j];
+ int cr = c[64 + j];
int rc = m_crr[cr];
int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
int bc = m_cbb[cb];
int yy = y[j];
- d0[0] = clamp(yy+rc);
- d0[1] = clamp(yy+gc);
- d0[2] = clamp(yy+bc);
+ d0[0] = clamp(yy + rc);
+ d0[1] = clamp(yy + gc);
+ d0[2] = clamp(yy + bc);
d0[3] = 255;
- yy = y[j+1];
- d0[4] = clamp(yy+rc);
- d0[5] = clamp(yy+gc);
- d0[6] = clamp(yy+bc);
- d0[7] = 255;
-
- yy = y[j+8];
- d1[0] = clamp(yy+rc);
- d1[1] = clamp(yy+gc);
- d1[2] = clamp(yy+bc);
+ yy = y[8 + j];
+ d1[0] = clamp(yy + rc);
+ d1[1] = clamp(yy + gc);
+ d1[2] = clamp(yy + bc);
d1[3] = 255;
- yy = y[j+8+1];
- d1[4] = clamp(yy+rc);
- d1[5] = clamp(yy+gc);
- d1[6] = clamp(yy+bc);
- d1[7] = 255;
+ d0 += 4;
+ d1 += 4;
+ }
+
+ y += 64 * 4;
+ c += 64 * 4;
+ }
+ }
+
+ // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
+ void jpeg_decoder::H1V2ConvertFiltered()
+ {
+ const uint BLOCKS_PER_MCU = 4;
+ int y = m_image_y_size - m_total_lines_left;
+ int row = y & 15;
+
+ const int half_image_y_size = (m_image_y_size >> 1) - 1;
+
+ uint8* d0 = m_pScan_line_0;
+
+ const int w0 = (row & 1) ? 3 : 1;
+ const int w1 = (row & 1) ? 1 : 3;
+
+ int c_y0 = (y - 1) >> 1;
+ int c_y1 = JPGD_MIN(c_y0 + 1, half_image_y_size);
+
+ const uint8_t* p_YSamples = m_pSample_buf;
+ const uint8_t* p_C0Samples = m_pSample_buf;
+ if ((c_y0 >= 0) && (((row & 15) == 0) || ((row & 15) == 15)) && (m_total_lines_left > 1))
+ {
+ assert(y > 0);
+ assert(m_sample_buf_prev_valid);
+
+ if ((row & 15) == 15)
+ p_YSamples = m_pSample_buf_prev;
- d0 += 8;
- d1 += 8;
+ p_C0Samples = m_pSample_buf_prev;
+ }
+
+ const int y_sample_base_ofs = ((row & 8) ? 64 : 0) + (row & 7) * 8;
+ const int y0_base = (c_y0 & 7) * 8 + 128;
+ const int y1_base = (c_y1 & 7) * 8 + 128;
+
+ for (int x = 0; x < m_image_x_size; x++)
+ {
+ const int base_ofs = (x >> 3) * BLOCKS_PER_MCU * 64 + (x & 7);
+
+ int y_sample = p_YSamples[check_sample_buf_ofs(base_ofs + y_sample_base_ofs)];
+
+ int a = base_ofs + y0_base;
+ int cb0_sample = p_C0Samples[check_sample_buf_ofs(a)];
+ int cr0_sample = p_C0Samples[check_sample_buf_ofs(a + 64)];
+
+ int b = base_ofs + y1_base;
+ int cb1_sample = m_pSample_buf[check_sample_buf_ofs(b)];
+ int cr1_sample = m_pSample_buf[check_sample_buf_ofs(b + 64)];
+
+ int cb = (cb0_sample * w0 + cb1_sample * w1 + 2) >> 2;
+ int cr = (cr0_sample * w0 + cr1_sample * w1 + 2) >> 2;
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ d0[0] = clamp(y_sample + rc);
+ d0[1] = clamp(y_sample + gc);
+ d0[2] = clamp(y_sample + bc);
+ d0[3] = 255;
+
+ d0 += 4;
+ }
+ }
+
+ // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
+ void jpeg_decoder::H2V2Convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8* d0 = m_pScan_line_0;
+ uint8* d1 = m_pScan_line_1;
+ uint8* y;
+ uint8* c;
- c++;
+ if (row < 8)
+ y = m_pSample_buf + row * 8;
+ else
+ y = m_pSample_buf + 64 * 2 + (row & 7) * 8;
+
+ c = m_pSample_buf + 64 * 4 + (row >> 1) * 8;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ for (int l = 0; l < 2; l++)
+ {
+ for (int j = 0; j < 8; j += 2)
+ {
+ int cb = c[0];
+ int cr = c[64];
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ int yy = y[j];
+ d0[0] = clamp(yy + rc);
+ d0[1] = clamp(yy + gc);
+ d0[2] = clamp(yy + bc);
+ d0[3] = 255;
+
+ yy = y[j + 1];
+ d0[4] = clamp(yy + rc);
+ d0[5] = clamp(yy + gc);
+ d0[6] = clamp(yy + bc);
+ d0[7] = 255;
+
+ yy = y[j + 8];
+ d1[0] = clamp(yy + rc);
+ d1[1] = clamp(yy + gc);
+ d1[2] = clamp(yy + bc);
+ d1[3] = 255;
+
+ yy = y[j + 8 + 1];
+ d1[4] = clamp(yy + rc);
+ d1[5] = clamp(yy + gc);
+ d1[6] = clamp(yy + bc);
+ d1[7] = 255;
+
+ d0 += 8;
+ d1 += 8;
+
+ c++;
+ }
+ y += 64;
}
- y += 64;
- }
-
- y += 64*6 - 64*2;
- c += 64*6 - 8;
- }
-}
-
-// Y (1 block per MCU) to 8-bit grayscale
-void jpeg_decoder::gray_convert()
-{
- int row = m_max_mcu_y_size - m_mcu_lines_left;
- uint8 *d = m_pScan_line_0;
- uint8 *s = m_pSample_buf + row * 8;
-
- for (int i = m_max_mcus_per_row; i > 0; i--)
- {
- *(uint *)d = *(uint *)s;
- *(uint *)(&d[4]) = *(uint *)(&s[4]);
-
- s += 64;
- d += 8;
- }
-}
-
-void jpeg_decoder::expanded_convert()
-{
- int row = m_max_mcu_y_size - m_mcu_lines_left;
-
- uint8* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8;
-
- uint8* d = m_pScan_line_0;
-
- for (int i = m_max_mcus_per_row; i > 0; i--)
- {
- for (int k = 0; k < m_max_mcu_x_size; k += 8)
- {
- const int Y_ofs = k * 8;
- const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
- const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
- for (int j = 0; j < 8; j++)
- {
- int y = Py[Y_ofs + j];
- int cb = Py[Cb_ofs + j];
- int cr = Py[Cr_ofs + j];
-
- d[0] = clamp(y + m_crr[cr]);
- d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
- d[2] = clamp(y + m_cbb[cb]);
- d[3] = 255;
-
- d += 4;
- }
- }
-
- Py += 64 * m_expanded_blocks_per_mcu;
- }
-}
-
-// Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
-void jpeg_decoder::find_eoi()
-{
- if (!m_progressive_flag)
- {
- // Attempt to read the EOI marker.
- //get_bits_no_markers(m_bits_left & 7);
-
- // Prime the bit buffer
- m_bits_left = 16;
- get_bits(16);
- get_bits(16);
-
- // The next marker _should_ be EOI
- process_markers();
- }
-
- m_total_bytes_read -= m_in_buf_left;
-}
-
-int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len)
-{
- if ((m_error_code) || (!m_ready_flag))
- return JPGD_FAILED;
-
- if (m_total_lines_left == 0)
- return JPGD_DONE;
-
- if (m_mcu_lines_left == 0)
- {
- if (setjmp(m_jmp_state))
- return JPGD_FAILED;
-
- if (m_progressive_flag)
- load_next_row();
- else
- decode_next_row();
-
- // Find the EOI marker if that was the last row.
- if (m_total_lines_left <= m_max_mcu_y_size)
- find_eoi();
-
- m_mcu_lines_left = m_max_mcu_y_size;
- }
-
- if (m_freq_domain_chroma_upsample)
- {
- expanded_convert();
- *pScan_line = m_pScan_line_0;
- }
- else
- {
- switch (m_scan_type)
- {
- case JPGD_YH2V2:
- {
- if ((m_mcu_lines_left & 1) == 0)
- {
- H2V2Convert();
- *pScan_line = m_pScan_line_0;
- }
- else
- *pScan_line = m_pScan_line_1;
-
- break;
- }
- case JPGD_YH2V1:
- {
- H2V1Convert();
- *pScan_line = m_pScan_line_0;
- break;
- }
- case JPGD_YH1V2:
- {
- if ((m_mcu_lines_left & 1) == 0)
- {
- H1V2Convert();
- *pScan_line = m_pScan_line_0;
- }
- else
- *pScan_line = m_pScan_line_1;
-
- break;
- }
- case JPGD_YH1V1:
- {
- H1V1Convert();
- *pScan_line = m_pScan_line_0;
- break;
- }
- case JPGD_GRAYSCALE:
- {
- gray_convert();
- *pScan_line = m_pScan_line_0;
-
- break;
- }
- }
- }
-
- *pScan_line_len = m_real_dest_bytes_per_scan_line;
-
- m_mcu_lines_left--;
- m_total_lines_left--;
-
- return JPGD_SUCCESS;
-}
-
-// Creates the tables needed for efficient Huffman decoding.
-void jpeg_decoder::make_huff_table(int index, huff_tables *pH)
-{
- int p, i, l, si;
- uint8 huffsize[257];
- uint huffcode[257];
- uint code;
- uint subtree;
- int code_size;
- int lastp;
- int nextfreeentry;
- int currententry;
-
- pH->ac_table = m_huff_ac[index] != 0;
-
- p = 0;
-
- for (l = 1; l <= 16; l++)
- {
- for (i = 1; i <= m_huff_num[index][l]; i++)
- {
- JPGD_ASSERT(p < 257);
- huffsize[p++] = static_cast<uint8>(l);
- }
- }
-
- huffsize[p] = 0;
-
- lastp = p;
-
- code = 0;
- si = huffsize[0];
- p = 0;
-
- while (huffsize[p])
- {
- while (huffsize[p] == si)
- {
- JPGD_ASSERT(p < 257);
- huffcode[p++] = code;
- code++;
- }
-
- code <<= 1;
- si++;
- }
-
- memset(pH->look_up, 0, sizeof(pH->look_up));
- memset(pH->look_up2, 0, sizeof(pH->look_up2));
- memset(pH->tree, 0, sizeof(pH->tree));
- memset(pH->code_size, 0, sizeof(pH->code_size));
-
- nextfreeentry = -1;
-
- p = 0;
-
- while (p < lastp)
- {
- i = m_huff_val[index][p];
- code = huffcode[p];
- code_size = huffsize[p];
-
- pH->code_size[i] = static_cast<uint8>(code_size);
-
- if (code_size <= 8)
- {
- code <<= (8 - code_size);
-
- for (l = 1 << (8 - code_size); l > 0; l--)
- {
- JPGD_ASSERT(i < JPGD_HUFF_CODE_SIZE_MAX_LENGTH);
- JPGD_ASSERT(code < JPGD_HUFF_CODE_SIZE_MAX_LENGTH);
-
- pH->look_up[code] = i;
-
- bool has_extrabits = false;
- int extra_bits = 0;
- int num_extra_bits = i & 15;
-
- int bits_to_fetch = code_size;
- if (num_extra_bits)
- {
- int total_codesize = code_size + num_extra_bits;
- if (total_codesize <= 8)
- {
- has_extrabits = true;
- extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
- JPGD_ASSERT(extra_bits <= 0x7FFF);
- bits_to_fetch += num_extra_bits;
- }
- }
-
- if (!has_extrabits)
- pH->look_up2[code] = i | (bits_to_fetch << 8);
- else
- pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
-
- code++;
- }
- }
- else
- {
- subtree = (code >> (code_size - 8)) & 0xFF;
-
- currententry = pH->look_up[subtree];
-
- if (currententry == 0)
- {
- pH->look_up[subtree] = currententry = nextfreeentry;
- pH->look_up2[subtree] = currententry = nextfreeentry;
-
- nextfreeentry -= 2;
- }
-
- code <<= (16 - (code_size - 8));
-
- for (l = code_size; l > 9; l--)
- {
- if ((code & 0x8000) == 0)
- currententry--;
-
- unsigned int idx = -currententry - 1;
- JPGD_ASSERT(idx < JPGD_HUFF_TREE_MAX_LENGTH);
- if (pH->tree[idx] == 0)
- {
- pH->tree[idx] = nextfreeentry;
-
- currententry = nextfreeentry;
-
- nextfreeentry -= 2;
- }
- else {
- currententry = pH->tree[idx];
- }
-
- code <<= 1;
- }
-
- if ((code & 0x8000) == 0)
- currententry--;
-
- pH->tree[-currententry - 1] = i;
- }
-
- p++;
- }
-}
-
-// Verifies the quantization tables needed for this scan are available.
-void jpeg_decoder::check_quant_tables()
-{
- for (int i = 0; i < m_comps_in_scan; i++)
- if (m_quant[m_comp_quant[m_comp_list[i]]] == NULL)
- stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
-}
-
-// Verifies that all the Huffman tables needed for this scan are available.
-void jpeg_decoder::check_huff_tables()
-{
- for (int i = 0; i < m_comps_in_scan; i++)
- {
- if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == NULL))
- stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
-
- if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == NULL))
- stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
- }
-
- for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
- if (m_huff_num[i])
- {
- if (!m_pHuff_tabs[i])
- m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables));
-
- make_huff_table(i, m_pHuff_tabs[i]);
- }
-}
-
-// Determines the component order inside each MCU.
-// Also calcs how many MCU's are on each row, etc.
-void jpeg_decoder::calc_mcu_block_order()
-{
- int component_num, component_id;
- int max_h_samp = 0, max_v_samp = 0;
-
- for (component_id = 0; component_id < m_comps_in_frame; component_id++)
- {
- if (m_comp_h_samp[component_id] > max_h_samp)
- max_h_samp = m_comp_h_samp[component_id];
-
- if (m_comp_v_samp[component_id] > max_v_samp)
- max_v_samp = m_comp_v_samp[component_id];
- }
-
- for (component_id = 0; component_id < m_comps_in_frame; component_id++)
- {
- m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
- m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
- }
-
- if (m_comps_in_scan == 1)
- {
- m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
- m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
- }
- else
- {
- m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
- m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
- }
-
- if (m_comps_in_scan == 1)
- {
- m_mcu_org[0] = m_comp_list[0];
-
- m_blocks_per_mcu = 1;
- }
- else
- {
- m_blocks_per_mcu = 0;
-
- for (component_num = 0; component_num < m_comps_in_scan; component_num++)
- {
- int num_blocks;
-
- component_id = m_comp_list[component_num];
-
- num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
-
- while (num_blocks--)
- m_mcu_org[m_blocks_per_mcu++] = component_id;
- }
- }
-}
-
-// Starts a new scan.
-int jpeg_decoder::init_scan()
-{
- if (!locate_sos_marker())
- return JPGD_FALSE;
-
- calc_mcu_block_order();
-
- check_huff_tables();
-
- check_quant_tables();
-
- memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
-
- m_eob_run = 0;
-
- if (m_restart_interval)
- {
- m_restarts_left = m_restart_interval;
- m_next_restart_num = 0;
- }
-
- fix_in_buffer();
-
- return JPGD_TRUE;
-}
-
-// Starts a frame. Determines if the number of components or sampling factors
-// are supported.
-void jpeg_decoder::init_frame()
-{
- int i;
-
- if (m_comps_in_frame == 1)
- {
- if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1))
- stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
-
- m_scan_type = JPGD_GRAYSCALE;
- m_max_blocks_per_mcu = 1;
- m_max_mcu_x_size = 8;
- m_max_mcu_y_size = 8;
- }
- else if (m_comps_in_frame == 3)
- {
- if ( ((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) ||
- ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)) )
- stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
-
- if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
- {
- m_scan_type = JPGD_YH1V1;
-
- m_max_blocks_per_mcu = 3;
- m_max_mcu_x_size = 8;
- m_max_mcu_y_size = 8;
- }
- else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
- {
- m_scan_type = JPGD_YH2V1;
- m_max_blocks_per_mcu = 4;
- m_max_mcu_x_size = 16;
- m_max_mcu_y_size = 8;
- }
- else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2))
- {
- m_scan_type = JPGD_YH1V2;
- m_max_blocks_per_mcu = 4;
- m_max_mcu_x_size = 8;
- m_max_mcu_y_size = 16;
- }
- else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
- {
- m_scan_type = JPGD_YH2V2;
- m_max_blocks_per_mcu = 6;
- m_max_mcu_x_size = 16;
- m_max_mcu_y_size = 16;
- }
- else
- stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
- }
- else
- stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
-
- m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
- m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
-
- // These values are for the *destination* pixels: after conversion.
- if (m_scan_type == JPGD_GRAYSCALE)
- m_dest_bytes_per_pixel = 1;
- else
- m_dest_bytes_per_pixel = 4;
-
- m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
-
- m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
-
- // Initialize two scan line buffers.
- m_pScan_line_0 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
- if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
- m_pScan_line_1 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true);
-
- m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
-
- // Should never happen
- if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
- stop_decoding(JPGD_ASSERTION_ERROR);
-
- // Allocate the coefficient buffer, enough for one MCU
- m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t));
-
- for (i = 0; i < m_max_blocks_per_mcu; i++)
- m_mcu_block_max_zag[i] = 64;
-
- m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0];
- m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
- m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
- // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
- m_freq_domain_chroma_upsample = false;
-#if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING
- m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
-#endif
- if (m_freq_domain_chroma_upsample)
- m_pSample_buf = (uint8 *)alloc(m_expanded_blocks_per_row * 64);
- else
- m_pSample_buf = (uint8 *)alloc(m_max_blocks_per_row * 64);
-
- m_total_lines_left = m_image_y_size;
-
- m_mcu_lines_left = 0;
-
- create_look_ups();
-}
-
-// The coeff_buf series of methods originally stored the coefficients
-// into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
-// was used to make this process more efficient. Now, we can store the entire
-// thing in RAM.
-jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
-{
- coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
-
- cb->block_num_x = block_num_x;
- cb->block_num_y = block_num_y;
- cb->block_len_x = block_len_x;
- cb->block_len_y = block_len_y;
- cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t);
- cb->pData = (uint8 *)alloc(cb->block_size * block_num_x * block_num_y, true);
- return cb;
-}
-
-inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y)
-{
- JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y));
- return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
-}
-
-// The following methods decode the various types of m_blocks encountered
-// in progressively encoded images.
-void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
-{
- int s, r;
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
-
- if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0)
- {
- r = pD->get_bits_no_markers(s);
- s = JPGD_HUFF_EXTEND(r, s);
- }
-
- pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
-
- p[0] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
-}
-
-void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
-{
- if (pD->get_bits_no_markers(1))
- {
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
-
- p[0] |= (1 << pD->m_successive_low);
- }
-}
-
-void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
-{
- int k, s, r;
-
- if (pD->m_eob_run)
- {
- pD->m_eob_run--;
- return;
- }
-
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
-
- for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++)
- {
- unsigned int idx = pD->m_comp_ac_tab[component_id];
- JPGD_ASSERT(idx < JPGD_MAX_HUFF_TABLES);
- s = pD->huff_decode(pD->m_pHuff_tabs[idx]);
-
- r = s >> 4;
- s &= 15;
-
- if (s)
- {
- if ((k += r) > 63)
- pD->stop_decoding(JPGD_DECODE_ERROR);
-
- r = pD->get_bits_no_markers(s);
- s = JPGD_HUFF_EXTEND(r, s);
-
- p[g_ZAG[k]] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
- }
- else
- {
- if (r == 15)
- {
- if ((k += 15) > 63)
- pD->stop_decoding(JPGD_DECODE_ERROR);
- }
- else
- {
- pD->m_eob_run = 1 << r;
-
- if (r)
- pD->m_eob_run += pD->get_bits_no_markers(r);
-
- pD->m_eob_run--;
-
- break;
- }
- }
- }
-}
-
-void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
-{
- int s, k, r;
- int p1 = 1 << pD->m_successive_low;
- int m1 = (-1) << pD->m_successive_low;
- jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
- JPGD_ASSERT(pD->m_spectral_end <= 63);
-
- k = pD->m_spectral_start;
-
- if (pD->m_eob_run == 0)
- {
- for ( ; k <= pD->m_spectral_end; k++)
- {
- unsigned int idx = pD->m_comp_ac_tab[component_id];
- JPGD_ASSERT(idx < JPGD_MAX_HUFF_TABLES);
- s = pD->huff_decode(pD->m_pHuff_tabs[idx]);
-
- r = s >> 4;
- s &= 15;
-
- if (s)
- {
- if (s != 1)
- pD->stop_decoding(JPGD_DECODE_ERROR);
-
- if (pD->get_bits_no_markers(1))
- s = p1;
- else
- s = m1;
- }
- else
- {
- if (r != 15)
- {
- pD->m_eob_run = 1 << r;
-
- if (r)
- pD->m_eob_run += pD->get_bits_no_markers(r);
-
- break;
- }
- }
-
- do
- {
- jpgd_block_t *this_coef = p + g_ZAG[k & 63];
-
- if (*this_coef != 0)
- {
- if (pD->get_bits_no_markers(1))
- {
- if ((*this_coef & p1) == 0)
- {
- if (*this_coef >= 0)
- *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
- else
- *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
- }
- }
- }
- else
- {
- if (--r < 0)
- break;
- }
-
- k++;
-
- } while (k <= pD->m_spectral_end);
-
- if ((s) && (k < 64))
- {
- p[g_ZAG[k]] = static_cast<jpgd_block_t>(s);
- }
- }
- }
-
- if (pD->m_eob_run > 0)
- {
- for ( ; k <= pD->m_spectral_end; k++)
- {
- jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
-
- if (*this_coef != 0)
- {
- if (pD->get_bits_no_markers(1))
- {
- if ((*this_coef & p1) == 0)
- {
- if (*this_coef >= 0)
- *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
- else
- *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
- }
- }
- }
- }
-
- pD->m_eob_run--;
- }
-}
-
-// Decode a scan in a progressively encoded image.
-void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
-{
- int mcu_row, mcu_col, mcu_block;
- int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS];
-
- memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
-
- for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
- {
- int component_num, component_id;
-
- memset(block_x_mcu, 0, sizeof(block_x_mcu));
-
- for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
- {
- int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
-
- if ((m_restart_interval) && (m_restarts_left == 0))
- process_restart();
-
- for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
- {
- component_id = m_mcu_org[mcu_block];
-
- decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
-
- if (m_comps_in_scan == 1)
- block_x_mcu[component_id]++;
- else
- {
- if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
- {
- block_x_mcu_ofs = 0;
-
- if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
- {
- block_y_mcu_ofs = 0;
- block_x_mcu[component_id] += m_comp_h_samp[component_id];
- }
- }
- }
- }
-
- m_restarts_left--;
- }
-
- if (m_comps_in_scan == 1)
- m_block_y_mcu[m_comp_list[0]]++;
- else
- {
- for (component_num = 0; component_num < m_comps_in_scan; component_num++)
- {
- component_id = m_comp_list[component_num];
- m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
- }
- }
- }
-}
-
-// Decode a progressively encoded image.
-void jpeg_decoder::init_progressive()
-{
- int i;
-
- if (m_comps_in_frame == 4)
- stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
-
- // Allocate the coefficient buffers.
- for (i = 0; i < m_comps_in_frame; i++)
- {
- m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
- m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
- }
-
- for ( ; ; )
- {
- int dc_only_scan, refinement_scan;
- pDecode_block_func decode_block_func;
-
- if (!init_scan())
- break;
-
- dc_only_scan = (m_spectral_start == 0);
- refinement_scan = (m_successive_high != 0);
-
- if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
- stop_decoding(JPGD_BAD_SOS_SPECTRAL);
-
- if (dc_only_scan)
- {
- if (m_spectral_end)
- stop_decoding(JPGD_BAD_SOS_SPECTRAL);
- }
- else if (m_comps_in_scan != 1) /* AC scans can only contain one component */
- stop_decoding(JPGD_BAD_SOS_SPECTRAL);
-
- if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
- stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
-
- if (dc_only_scan)
- {
- if (refinement_scan)
- decode_block_func = decode_block_dc_refine;
- else
- decode_block_func = decode_block_dc_first;
- }
- else
- {
- if (refinement_scan)
- decode_block_func = decode_block_ac_refine;
- else
- decode_block_func = decode_block_ac_first;
- }
-
- decode_scan(decode_block_func);
-
- m_bits_left = 16;
- get_bits(16);
- get_bits(16);
- }
-
- m_comps_in_scan = m_comps_in_frame;
-
- for (i = 0; i < m_comps_in_frame; i++)
- m_comp_list[i] = i;
-
- calc_mcu_block_order();
-}
-
-void jpeg_decoder::init_sequential()
-{
- if (!init_scan())
- stop_decoding(JPGD_UNEXPECTED_MARKER);
-}
-
-void jpeg_decoder::decode_start()
-{
- init_frame();
-
- if (m_progressive_flag)
- init_progressive();
- else
- init_sequential();
-}
-
-void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream)
-{
- init(pStream);
- locate_sof_marker();
-}
-
-jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream)
-{
- if (setjmp(m_jmp_state))
- return;
- decode_init(pStream);
-}
-
-int jpeg_decoder::begin_decoding()
-{
- if (m_ready_flag)
- return JPGD_SUCCESS;
-
- if (m_error_code)
- return JPGD_FAILED;
-
- if (setjmp(m_jmp_state))
- return JPGD_FAILED;
-
- decode_start();
-
- m_ready_flag = true;
-
- return JPGD_SUCCESS;
-}
-
-jpeg_decoder::~jpeg_decoder()
-{
- free_all_blocks();
-}
-
-jpeg_decoder_file_stream::jpeg_decoder_file_stream()
-{
- m_pFile = NULL;
- m_eof_flag = false;
- m_error_flag = false;
-}
-
-void jpeg_decoder_file_stream::close()
-{
- if (m_pFile)
- {
- fclose(m_pFile);
- m_pFile = NULL;
- }
-
- m_eof_flag = false;
- m_error_flag = false;
-}
-
-jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
-{
- close();
-}
-
-bool jpeg_decoder_file_stream::open(const char *Pfilename)
-{
- close();
-
- m_eof_flag = false;
- m_error_flag = false;
+ y += 64 * 6 - 64 * 2;
+ c += 64 * 6 - 8;
+ }
+ }
+
+ uint32_t jpeg_decoder::H2V2ConvertFiltered()
+ {
+ const uint BLOCKS_PER_MCU = 6;
+ int y = m_image_y_size - m_total_lines_left;
+ int row = y & 15;
+
+ const int half_image_y_size = (m_image_y_size >> 1) - 1;
+
+ uint8* d0 = m_pScan_line_0;
+
+ int c_y0 = (y - 1) >> 1;
+ int c_y1 = JPGD_MIN(c_y0 + 1, half_image_y_size);
+
+ const uint8_t* p_YSamples = m_pSample_buf;
+ const uint8_t* p_C0Samples = m_pSample_buf;
+ if ((c_y0 >= 0) && (((row & 15) == 0) || ((row & 15) == 15)) && (m_total_lines_left > 1))
+ {
+ assert(y > 0);
+ assert(m_sample_buf_prev_valid);
+
+ if ((row & 15) == 15)
+ p_YSamples = m_pSample_buf_prev;
+
+ p_C0Samples = m_pSample_buf_prev;
+ }
+
+ const int y_sample_base_ofs = ((row & 8) ? 128 : 0) + (row & 7) * 8;
+ const int y0_base = (c_y0 & 7) * 8 + 256;
+ const int y1_base = (c_y1 & 7) * 8 + 256;
+
+ const int half_image_x_size = (m_image_x_size >> 1) - 1;
+
+ static const uint8_t s_muls[2][2][4] =
+ {
+ { { 1, 3, 3, 9 }, { 3, 9, 1, 3 }, },
+ { { 3, 1, 9, 3 }, { 9, 3, 3, 1 } }
+ };
+
+ if (((row & 15) >= 1) && ((row & 15) <= 14))
+ {
+ assert((row & 1) == 1);
+ assert(((y + 1 - 1) >> 1) == c_y0);
+
+ assert(p_YSamples == m_pSample_buf);
+ assert(p_C0Samples == m_pSample_buf);
+
+ uint8* d1 = m_pScan_line_1;
+ const int y_sample_base_ofs1 = (((row + 1) & 8) ? 128 : 0) + ((row + 1) & 7) * 8;
+
+ for (int x = 0; x < m_image_x_size; x++)
+ {
+ int k = (x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7);
+ int y_sample0 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs)];
+ int y_sample1 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs1)];
+
+ int c_x0 = (x - 1) >> 1;
+ int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size);
+ c_x0 = JPGD_MAX(c_x0, 0);
+
+ int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7);
+ int cb00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base)];
+ int cr00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base + 64)];
+
+ int cb01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base)];
+ int cr01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base + 64)];
+
+ int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7);
+ int cb10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base)];
+ int cr10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base + 64)];
+
+ int cb11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base)];
+ int cr11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base + 64)];
+
+ {
+ const uint8_t* pMuls = &s_muls[row & 1][x & 1][0];
+ int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4;
+ int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4;
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ d0[0] = clamp(y_sample0 + rc);
+ d0[1] = clamp(y_sample0 + gc);
+ d0[2] = clamp(y_sample0 + bc);
+ d0[3] = 255;
+
+ d0 += 4;
+ }
+
+ {
+ const uint8_t* pMuls = &s_muls[(row + 1) & 1][x & 1][0];
+ int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4;
+ int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4;
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ d1[0] = clamp(y_sample1 + rc);
+ d1[1] = clamp(y_sample1 + gc);
+ d1[2] = clamp(y_sample1 + bc);
+ d1[3] = 255;
+
+ d1 += 4;
+ }
+
+ if (((x & 1) == 1) && (x < m_image_x_size - 1))
+ {
+ const int nx = x + 1;
+ assert(c_x0 == (nx - 1) >> 1);
+
+ k = (nx >> 4) * BLOCKS_PER_MCU * 64 + ((nx & 8) ? 64 : 0) + (nx & 7);
+ y_sample0 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs)];
+ y_sample1 = p_YSamples[check_sample_buf_ofs(k + y_sample_base_ofs1)];
+
+ {
+ const uint8_t* pMuls = &s_muls[row & 1][nx & 1][0];
+ int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4;
+ int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4;
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ d0[0] = clamp(y_sample0 + rc);
+ d0[1] = clamp(y_sample0 + gc);
+ d0[2] = clamp(y_sample0 + bc);
+ d0[3] = 255;
+
+ d0 += 4;
+ }
+
+ {
+ const uint8_t* pMuls = &s_muls[(row + 1) & 1][nx & 1][0];
+ int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4;
+ int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4;
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ d1[0] = clamp(y_sample1 + rc);
+ d1[1] = clamp(y_sample1 + gc);
+ d1[2] = clamp(y_sample1 + bc);
+ d1[3] = 255;
+
+ d1 += 4;
+ }
+
+ ++x;
+ }
+ }
+
+ return 2;
+ }
+ else
+ {
+ for (int x = 0; x < m_image_x_size; x++)
+ {
+ int y_sample = p_YSamples[check_sample_buf_ofs((x >> 4) * BLOCKS_PER_MCU * 64 + ((x & 8) ? 64 : 0) + (x & 7) + y_sample_base_ofs)];
+
+ int c_x0 = (x - 1) >> 1;
+ int c_x1 = JPGD_MIN(c_x0 + 1, half_image_x_size);
+ c_x0 = JPGD_MAX(c_x0, 0);
+
+ int a = (c_x0 >> 3) * BLOCKS_PER_MCU * 64 + (c_x0 & 7);
+ int cb00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base)];
+ int cr00_sample = p_C0Samples[check_sample_buf_ofs(a + y0_base + 64)];
+
+ int cb01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base)];
+ int cr01_sample = m_pSample_buf[check_sample_buf_ofs(a + y1_base + 64)];
+
+ int b = (c_x1 >> 3) * BLOCKS_PER_MCU * 64 + (c_x1 & 7);
+ int cb10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base)];
+ int cr10_sample = p_C0Samples[check_sample_buf_ofs(b + y0_base + 64)];
+
+ int cb11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base)];
+ int cr11_sample = m_pSample_buf[check_sample_buf_ofs(b + y1_base + 64)];
+
+ const uint8_t* pMuls = &s_muls[row & 1][x & 1][0];
+ int cb = (cb00_sample * pMuls[0] + cb01_sample * pMuls[1] + cb10_sample * pMuls[2] + cb11_sample * pMuls[3] + 8) >> 4;
+ int cr = (cr00_sample * pMuls[0] + cr01_sample * pMuls[1] + cr10_sample * pMuls[2] + cr11_sample * pMuls[3] + 8) >> 4;
+
+ int rc = m_crr[cr];
+ int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
+ int bc = m_cbb[cb];
+
+ d0[0] = clamp(y_sample + rc);
+ d0[1] = clamp(y_sample + gc);
+ d0[2] = clamp(y_sample + bc);
+ d0[3] = 255;
+
+ d0 += 4;
+ }
+
+ return 1;
+ }
+ }
+
+ // Y (1 block per MCU) to 8-bit grayscale
+ void jpeg_decoder::gray_convert()
+ {
+ int row = m_max_mcu_y_size - m_mcu_lines_left;
+ uint8* d = m_pScan_line_0;
+ uint8* s = m_pSample_buf + row * 8;
+
+ for (int i = m_max_mcus_per_row; i > 0; i--)
+ {
+ *(uint*)d = *(uint*)s;
+ *(uint*)(&d[4]) = *(uint*)(&s[4]);
+
+ s += 64;
+ d += 8;
+ }
+ }
+
+ // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
+ void jpeg_decoder::find_eoi()
+ {
+ if (!m_progressive_flag)
+ {
+ // Attempt to read the EOI marker.
+ //get_bits_no_markers(m_bits_left & 7);
+
+ // Prime the bit buffer
+ m_bits_left = 16;
+ get_bits(16);
+ get_bits(16);
+
+ // The next marker _should_ be EOI
+ process_markers();
+ }
+
+ m_total_bytes_read -= m_in_buf_left;
+ }
+
+ int jpeg_decoder::decode_next_mcu_row()
+ {
+ if (::setjmp(m_jmp_state))
+ return JPGD_FAILED;
+
+ const bool chroma_y_filtering = ((m_flags & cFlagBoxChromaFiltering) == 0) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2));
+ if (chroma_y_filtering)
+ {
+ std::swap(m_pSample_buf, m_pSample_buf_prev);
+
+ m_sample_buf_prev_valid = true;
+ }
+
+ if (m_progressive_flag)
+ load_next_row();
+ else
+ decode_next_row();
+
+ // Find the EOI marker if that was the last row.
+ if (m_total_lines_left <= m_max_mcu_y_size)
+ find_eoi();
+
+ m_mcu_lines_left = m_max_mcu_y_size;
+ return 0;
+ }
+
+ int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len)
+ {
+ if ((m_error_code) || (!m_ready_flag))
+ return JPGD_FAILED;
+
+ if (m_total_lines_left == 0)
+ return JPGD_DONE;
+
+ const bool chroma_y_filtering = ((m_flags & cFlagBoxChromaFiltering) == 0) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2));
+
+ bool get_another_mcu_row = false;
+ bool got_mcu_early = false;
+ if (chroma_y_filtering)
+ {
+ if (m_total_lines_left == m_image_y_size)
+ get_another_mcu_row = true;
+ else if ((m_mcu_lines_left == 1) && (m_total_lines_left > 1))
+ {
+ get_another_mcu_row = true;
+ got_mcu_early = true;
+ }
+ }
+ else
+ {
+ get_another_mcu_row = (m_mcu_lines_left == 0);
+ }
+
+ if (get_another_mcu_row)
+ {
+ int status = decode_next_mcu_row();
+ if (status != 0)
+ return status;
+ }
+
+ switch (m_scan_type)
+ {
+ case JPGD_YH2V2:
+ {
+ if ((m_flags & cFlagBoxChromaFiltering) == 0)
+ {
+ if (m_num_buffered_scanlines == 1)
+ {
+ *pScan_line = m_pScan_line_1;
+ }
+ else if (m_num_buffered_scanlines == 0)
+ {
+ m_num_buffered_scanlines = H2V2ConvertFiltered();
+ *pScan_line = m_pScan_line_0;
+ }
+
+ m_num_buffered_scanlines--;
+ }
+ else
+ {
+ if ((m_mcu_lines_left & 1) == 0)
+ {
+ H2V2Convert();
+ *pScan_line = m_pScan_line_0;
+ }
+ else
+ *pScan_line = m_pScan_line_1;
+ }
+
+ break;
+ }
+ case JPGD_YH2V1:
+ {
+ if ((m_flags & cFlagBoxChromaFiltering) == 0)
+ H2V1ConvertFiltered();
+ else
+ H2V1Convert();
+ *pScan_line = m_pScan_line_0;
+ break;
+ }
+ case JPGD_YH1V2:
+ {
+ if (chroma_y_filtering)
+ {
+ H1V2ConvertFiltered();
+ *pScan_line = m_pScan_line_0;
+ }
+ else
+ {
+ if ((m_mcu_lines_left & 1) == 0)
+ {
+ H1V2Convert();
+ *pScan_line = m_pScan_line_0;
+ }
+ else
+ *pScan_line = m_pScan_line_1;
+ }
+
+ break;
+ }
+ case JPGD_YH1V1:
+ {
+ H1V1Convert();
+ *pScan_line = m_pScan_line_0;
+ break;
+ }
+ case JPGD_GRAYSCALE:
+ {
+ gray_convert();
+ *pScan_line = m_pScan_line_0;
+
+ break;
+ }
+ }
+
+ *pScan_line_len = m_real_dest_bytes_per_scan_line;
+
+ if (!got_mcu_early)
+ {
+ m_mcu_lines_left--;
+ }
+
+ m_total_lines_left--;
+
+ return JPGD_SUCCESS;
+ }
+
+ // Creates the tables needed for efficient Huffman decoding.
+ void jpeg_decoder::make_huff_table(int index, huff_tables* pH)
+ {
+ int p, i, l, si;
+ uint8 huffsize[258];
+ uint huffcode[258];
+ uint code;
+ uint subtree;
+ int code_size;
+ int lastp;
+ int nextfreeentry;
+ int currententry;
+
+ pH->ac_table = m_huff_ac[index] != 0;
+
+ p = 0;
+
+ for (l = 1; l <= 16; l++)
+ {
+ for (i = 1; i <= m_huff_num[index][l]; i++)
+ {
+ if (p >= 257)
+ stop_decoding(JPGD_DECODE_ERROR);
+ huffsize[p++] = static_cast<uint8>(l);
+ }
+ }
+
+ assert(p < 258);
+ huffsize[p] = 0;
+
+ lastp = p;
+
+ code = 0;
+ si = huffsize[0];
+ p = 0;
+
+ while (huffsize[p])
+ {
+ while (huffsize[p] == si)
+ {
+ if (p >= 257)
+ stop_decoding(JPGD_DECODE_ERROR);
+ huffcode[p++] = code;
+ code++;
+ }
+
+ code <<= 1;
+ si++;
+ }
+
+ memset(pH->look_up, 0, sizeof(pH->look_up));
+ memset(pH->look_up2, 0, sizeof(pH->look_up2));
+ memset(pH->tree, 0, sizeof(pH->tree));
+ memset(pH->code_size, 0, sizeof(pH->code_size));
+
+ nextfreeentry = -1;
+
+ p = 0;
+
+ while (p < lastp)
+ {
+ i = m_huff_val[index][p];
+
+ code = huffcode[p];
+ code_size = huffsize[p];
+
+ assert(i < JPGD_HUFF_CODE_SIZE_MAX_LENGTH);
+ pH->code_size[i] = static_cast<uint8>(code_size);
+
+ if (code_size <= 8)
+ {
+ code <<= (8 - code_size);
+
+ for (l = 1 << (8 - code_size); l > 0; l--)
+ {
+ if (code >= 256)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ pH->look_up[code] = i;
+
+ bool has_extrabits = false;
+ int extra_bits = 0;
+ int num_extra_bits = i & 15;
+
+ int bits_to_fetch = code_size;
+ if (num_extra_bits)
+ {
+ int total_codesize = code_size + num_extra_bits;
+ if (total_codesize <= 8)
+ {
+ has_extrabits = true;
+ extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
+
+ if (extra_bits > 0x7FFF)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ bits_to_fetch += num_extra_bits;
+ }
+ }
+
+ if (!has_extrabits)
+ pH->look_up2[code] = i | (bits_to_fetch << 8);
+ else
+ pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
+
+ code++;
+ }
+ }
+ else
+ {
+ subtree = (code >> (code_size - 8)) & 0xFF;
+
+ currententry = pH->look_up[subtree];
+
+ if (currententry == 0)
+ {
+ pH->look_up[subtree] = currententry = nextfreeentry;
+ pH->look_up2[subtree] = currententry = nextfreeentry;
+
+ nextfreeentry -= 2;
+ }
+
+ code <<= (16 - (code_size - 8));
+
+ for (l = code_size; l > 9; l--)
+ {
+ if ((code & 0x8000) == 0)
+ currententry--;
+
+ unsigned int idx = -currententry - 1;
+
+ if (idx >= JPGD_HUFF_TREE_MAX_LENGTH)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ if (pH->tree[idx] == 0)
+ {
+ pH->tree[idx] = nextfreeentry;
+
+ currententry = nextfreeentry;
+
+ nextfreeentry -= 2;
+ }
+ else
+ {
+ currententry = pH->tree[idx];
+ }
+
+ code <<= 1;
+ }
+
+ if ((code & 0x8000) == 0)
+ currententry--;
+
+ if ((-currententry - 1) >= JPGD_HUFF_TREE_MAX_LENGTH)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ pH->tree[-currententry - 1] = i;
+ }
+
+ p++;
+ }
+ }
+
+ // Verifies the quantization tables needed for this scan are available.
+ void jpeg_decoder::check_quant_tables()
+ {
+ for (int i = 0; i < m_comps_in_scan; i++)
+ if (m_quant[m_comp_quant[m_comp_list[i]]] == nullptr)
+ stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
+ }
+
+ // Verifies that all the Huffman tables needed for this scan are available.
+ void jpeg_decoder::check_huff_tables()
+ {
+ for (int i = 0; i < m_comps_in_scan; i++)
+ {
+ if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == nullptr))
+ stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
+
+ if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == nullptr))
+ stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
+ }
+
+ for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
+ if (m_huff_num[i])
+ {
+ if (!m_pHuff_tabs[i])
+ m_pHuff_tabs[i] = (huff_tables*)alloc(sizeof(huff_tables));
+
+ make_huff_table(i, m_pHuff_tabs[i]);
+ }
+ }
+
+ // Determines the component order inside each MCU.
+ // Also calcs how many MCU's are on each row, etc.
+ bool jpeg_decoder::calc_mcu_block_order()
+ {
+ int component_num, component_id;
+ int max_h_samp = 0, max_v_samp = 0;
+
+ for (component_id = 0; component_id < m_comps_in_frame; component_id++)
+ {
+ if (m_comp_h_samp[component_id] > max_h_samp)
+ max_h_samp = m_comp_h_samp[component_id];
+
+ if (m_comp_v_samp[component_id] > max_v_samp)
+ max_v_samp = m_comp_v_samp[component_id];
+ }
+
+ for (component_id = 0; component_id < m_comps_in_frame; component_id++)
+ {
+ m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
+ m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
+ }
+
+ if (m_comps_in_scan == 1)
+ {
+ m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
+ m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
+ }
+ else
+ {
+ m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
+ m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
+ }
+
+ if (m_comps_in_scan == 1)
+ {
+ m_mcu_org[0] = m_comp_list[0];
+
+ m_blocks_per_mcu = 1;
+ }
+ else
+ {
+ m_blocks_per_mcu = 0;
+
+ for (component_num = 0; component_num < m_comps_in_scan; component_num++)
+ {
+ int num_blocks;
+
+ component_id = m_comp_list[component_num];
+
+ num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
+
+ while (num_blocks--)
+ m_mcu_org[m_blocks_per_mcu++] = component_id;
+ }
+ }
+
+ if (m_blocks_per_mcu > m_max_blocks_per_mcu)
+ return false;
+
+ for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
+ {
+ int comp_id = m_mcu_org[mcu_block];
+ if (comp_id >= JPGD_MAX_QUANT_TABLES)
+ return false;
+ }
+
+ return true;
+ }
+
+ // Starts a new scan.
+ int jpeg_decoder::init_scan()
+ {
+ if (!locate_sos_marker())
+ return JPGD_FALSE;
+
+ if (!calc_mcu_block_order())
+ return JPGD_FALSE;
+
+ check_huff_tables();
+
+ check_quant_tables();
+
+ memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint));
+
+ m_eob_run = 0;
+
+ if (m_restart_interval)
+ {
+ m_restarts_left = m_restart_interval;
+ m_next_restart_num = 0;
+ }
+
+ fix_in_buffer();
+
+ return JPGD_TRUE;
+ }
+
+ // Starts a frame. Determines if the number of components or sampling factors
+ // are supported.
+ void jpeg_decoder::init_frame()
+ {
+ int i;
+
+ if (m_comps_in_frame == 1)
+ {
+ if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1))
+ stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
+
+ m_scan_type = JPGD_GRAYSCALE;
+ m_max_blocks_per_mcu = 1;
+ m_max_mcu_x_size = 8;
+ m_max_mcu_y_size = 8;
+ }
+ else if (m_comps_in_frame == 3)
+ {
+ if (((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) ||
+ ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)))
+ stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
+
+ if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
+ {
+ m_scan_type = JPGD_YH1V1;
+
+ m_max_blocks_per_mcu = 3;
+ m_max_mcu_x_size = 8;
+ m_max_mcu_y_size = 8;
+ }
+ else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
+ {
+ m_scan_type = JPGD_YH2V1;
+ m_max_blocks_per_mcu = 4;
+ m_max_mcu_x_size = 16;
+ m_max_mcu_y_size = 8;
+ }
+ else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2))
+ {
+ m_scan_type = JPGD_YH1V2;
+ m_max_blocks_per_mcu = 4;
+ m_max_mcu_x_size = 8;
+ m_max_mcu_y_size = 16;
+ }
+ else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
+ {
+ m_scan_type = JPGD_YH2V2;
+ m_max_blocks_per_mcu = 6;
+ m_max_mcu_x_size = 16;
+ m_max_mcu_y_size = 16;
+ }
+ else
+ stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
+ }
+ else
+ stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
+
+ m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
+ m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
+
+ // These values are for the *destination* pixels: after conversion.
+ if (m_scan_type == JPGD_GRAYSCALE)
+ m_dest_bytes_per_pixel = 1;
+ else
+ m_dest_bytes_per_pixel = 4;
+
+ m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
+
+ m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
+
+ // Initialize two scan line buffers.
+ m_pScan_line_0 = (uint8*)alloc_aligned(m_dest_bytes_per_scan_line, true);
+ if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
+ m_pScan_line_1 = (uint8*)alloc_aligned(m_dest_bytes_per_scan_line, true);
+
+ m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
+
+ // Should never happen
+ if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ // Allocate the coefficient buffer, enough for one MCU
+ m_pMCU_coefficients = (jpgd_block_coeff_t *)alloc_aligned(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_coeff_t));
+
+ for (i = 0; i < m_max_blocks_per_mcu; i++)
+ m_mcu_block_max_zag[i] = 64;
+
+ m_pSample_buf = (uint8*)alloc_aligned(m_max_blocks_per_row * 64);
+ m_pSample_buf_prev = (uint8*)alloc_aligned(m_max_blocks_per_row * 64);
+
+ m_total_lines_left = m_image_y_size;
+
+ m_mcu_lines_left = 0;
+
+ create_look_ups();
+ }
+
+ // The coeff_buf series of methods originally stored the coefficients
+ // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
+ // was used to make this process more efficient. Now, we can store the entire
+ // thing in RAM.
+ jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
+ {
+ coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
+
+ cb->block_num_x = block_num_x;
+ cb->block_num_y = block_num_y;
+ cb->block_len_x = block_len_x;
+ cb->block_len_y = block_len_y;
+ cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_coeff_t);
+ cb->pData = (uint8*)alloc(cb->block_size * block_num_x * block_num_y, true);
+ return cb;
+ }
+
+ inline jpgd_block_coeff_t* jpeg_decoder::coeff_buf_getp(coeff_buf* cb, int block_x, int block_y)
+ {
+ if ((block_x >= cb->block_num_x) || (block_y >= cb->block_num_y))
+ stop_decoding(JPGD_DECODE_ERROR);
+
+ return (jpgd_block_coeff_t*)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
+ }
+
+ // The following methods decode the various types of m_blocks encountered
+ // in progressively encoded images.
+ void jpeg_decoder::decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y)
+ {
+ int s, r;
+ jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
+
+ if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0)
+ {
+ if (s >= 16)
+ pD->stop_decoding(JPGD_DECODE_ERROR);
+
+ r = pD->get_bits_no_markers(s);
+ s = JPGD_HUFF_EXTEND(r, s);
+ }
+
+ pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
+
+ p[0] = static_cast<jpgd_block_coeff_t>(s << pD->m_successive_low);
+ }
+
+ void jpeg_decoder::decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y)
+ {
+ if (pD->get_bits_no_markers(1))
+ {
+ jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
+
+ p[0] |= (1 << pD->m_successive_low);
+ }
+ }
+
+ void jpeg_decoder::decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y)
+ {
+ int k, s, r;
+
+ if (pD->m_eob_run)
+ {
+ pD->m_eob_run--;
+ return;
+ }
+
+ jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
+
+ for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++)
+ {
+ unsigned int idx = pD->m_comp_ac_tab[component_id];
+ if (idx >= JPGD_MAX_HUFF_TABLES)
+ pD->stop_decoding(JPGD_DECODE_ERROR);
+
+ s = pD->huff_decode(pD->m_pHuff_tabs[idx]);
+
+ r = s >> 4;
+ s &= 15;
+
+ if (s)
+ {
+ if ((k += r) > 63)
+ pD->stop_decoding(JPGD_DECODE_ERROR);
+
+ r = pD->get_bits_no_markers(s);
+ s = JPGD_HUFF_EXTEND(r, s);
+
+ p[g_ZAG[k]] = static_cast<jpgd_block_coeff_t>(s << pD->m_successive_low);
+ }
+ else
+ {
+ if (r == 15)
+ {
+ if ((k += 15) > 63)
+ pD->stop_decoding(JPGD_DECODE_ERROR);
+ }
+ else
+ {
+ pD->m_eob_run = 1 << r;
+
+ if (r)
+ pD->m_eob_run += pD->get_bits_no_markers(r);
+
+ pD->m_eob_run--;
+
+ break;
+ }
+ }
+ }
+ }
+
+ void jpeg_decoder::decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y)
+ {
+ int s, k, r;
+
+ int p1 = 1 << pD->m_successive_low;
+
+ //int m1 = (-1) << pD->m_successive_low;
+ int m1 = static_cast<int>((UINT32_MAX << pD->m_successive_low));
+
+ jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
+ if (pD->m_spectral_end > 63)
+ pD->stop_decoding(JPGD_DECODE_ERROR);
+
+ k = pD->m_spectral_start;
+
+ if (pD->m_eob_run == 0)
+ {
+ for (; k <= pD->m_spectral_end; k++)
+ {
+ unsigned int idx = pD->m_comp_ac_tab[component_id];
+ if (idx >= JPGD_MAX_HUFF_TABLES)
+ pD->stop_decoding(JPGD_DECODE_ERROR);
+
+ s = pD->huff_decode(pD->m_pHuff_tabs[idx]);
+
+ r = s >> 4;
+ s &= 15;
+
+ if (s)
+ {
+ if (s != 1)
+ pD->stop_decoding(JPGD_DECODE_ERROR);
+
+ if (pD->get_bits_no_markers(1))
+ s = p1;
+ else
+ s = m1;
+ }
+ else
+ {
+ if (r != 15)
+ {
+ pD->m_eob_run = 1 << r;
+
+ if (r)
+ pD->m_eob_run += pD->get_bits_no_markers(r);
+
+ break;
+ }
+ }
+
+ do
+ {
+ jpgd_block_coeff_t* this_coef = p + g_ZAG[k & 63];
+
+ if (*this_coef != 0)
+ {
+ if (pD->get_bits_no_markers(1))
+ {
+ if ((*this_coef & p1) == 0)
+ {
+ if (*this_coef >= 0)
+ *this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + p1);
+ else
+ *this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + m1);
+ }
+ }
+ }
+ else
+ {
+ if (--r < 0)
+ break;
+ }
+
+ k++;
+
+ } while (k <= pD->m_spectral_end);
+
+ if ((s) && (k < 64))
+ {
+ p[g_ZAG[k]] = static_cast<jpgd_block_coeff_t>(s);
+ }
+ }
+ }
+
+ if (pD->m_eob_run > 0)
+ {
+ for (; k <= pD->m_spectral_end; k++)
+ {
+ jpgd_block_coeff_t* this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
+
+ if (*this_coef != 0)
+ {
+ if (pD->get_bits_no_markers(1))
+ {
+ if ((*this_coef & p1) == 0)
+ {
+ if (*this_coef >= 0)
+ *this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + p1);
+ else
+ *this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + m1);
+ }
+ }
+ }
+ }
+
+ pD->m_eob_run--;
+ }
+ }
+
+ // Decode a scan in a progressively encoded image.
+ void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
+ {
+ int mcu_row, mcu_col, mcu_block;
+ int block_x_mcu[JPGD_MAX_COMPONENTS], block_y_mcu[JPGD_MAX_COMPONENTS];
+
+ memset(block_y_mcu, 0, sizeof(block_y_mcu));
+
+ for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
+ {
+ int component_num, component_id;
+
+ memset(block_x_mcu, 0, sizeof(block_x_mcu));
+
+ for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
+ {
+ int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
+
+ if ((m_restart_interval) && (m_restarts_left == 0))
+ process_restart();
+
+ for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
+ {
+ component_id = m_mcu_org[mcu_block];
+
+ decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, block_y_mcu[component_id] + block_y_mcu_ofs);
+
+ if (m_comps_in_scan == 1)
+ block_x_mcu[component_id]++;
+ else
+ {
+ if (++block_x_mcu_ofs == m_comp_h_samp[component_id])
+ {
+ block_x_mcu_ofs = 0;
+
+ if (++block_y_mcu_ofs == m_comp_v_samp[component_id])
+ {
+ block_y_mcu_ofs = 0;
+ block_x_mcu[component_id] += m_comp_h_samp[component_id];
+ }
+ }
+ }
+ }
+
+ m_restarts_left--;
+ }
+
+ if (m_comps_in_scan == 1)
+ block_y_mcu[m_comp_list[0]]++;
+ else
+ {
+ for (component_num = 0; component_num < m_comps_in_scan; component_num++)
+ {
+ component_id = m_comp_list[component_num];
+ block_y_mcu[component_id] += m_comp_v_samp[component_id];
+ }
+ }
+ }
+ }
+
+ // Decode a progressively encoded image.
+ void jpeg_decoder::init_progressive()
+ {
+ int i;
+
+ if (m_comps_in_frame == 4)
+ stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
+
+ // Allocate the coefficient buffers.
+ for (i = 0; i < m_comps_in_frame; i++)
+ {
+ m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
+ m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
+ }
+
+ // See https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf
+ uint32_t total_scans = 0;
+ const uint32_t MAX_SCANS_TO_PROCESS = 1000;
+
+ for (; ; )
+ {
+ int dc_only_scan, refinement_scan;
+ pDecode_block_func decode_block_func;
+
+ if (!init_scan())
+ break;
+
+ dc_only_scan = (m_spectral_start == 0);
+ refinement_scan = (m_successive_high != 0);
+
+ if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
+ stop_decoding(JPGD_BAD_SOS_SPECTRAL);
+
+ if (dc_only_scan)
+ {
+ if (m_spectral_end)
+ stop_decoding(JPGD_BAD_SOS_SPECTRAL);
+ }
+ else if (m_comps_in_scan != 1) /* AC scans can only contain one component */
+ stop_decoding(JPGD_BAD_SOS_SPECTRAL);
+
+ if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
+ stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
+
+ if (dc_only_scan)
+ {
+ if (refinement_scan)
+ decode_block_func = decode_block_dc_refine;
+ else
+ decode_block_func = decode_block_dc_first;
+ }
+ else
+ {
+ if (refinement_scan)
+ decode_block_func = decode_block_ac_refine;
+ else
+ decode_block_func = decode_block_ac_first;
+ }
+
+ decode_scan(decode_block_func);
+
+ m_bits_left = 16;
+ get_bits(16);
+ get_bits(16);
+
+ total_scans++;
+ if (total_scans > MAX_SCANS_TO_PROCESS)
+ stop_decoding(JPGD_TOO_MANY_SCANS);
+ }
+
+ m_comps_in_scan = m_comps_in_frame;
+
+ for (i = 0; i < m_comps_in_frame; i++)
+ m_comp_list[i] = i;
+
+ if (!calc_mcu_block_order())
+ stop_decoding(JPGD_DECODE_ERROR);
+ }
+
+ void jpeg_decoder::init_sequential()
+ {
+ if (!init_scan())
+ stop_decoding(JPGD_UNEXPECTED_MARKER);
+ }
+
+ void jpeg_decoder::decode_start()
+ {
+ init_frame();
+
+ if (m_progressive_flag)
+ init_progressive();
+ else
+ init_sequential();
+ }
+
+ void jpeg_decoder::decode_init(jpeg_decoder_stream* pStream, uint32_t flags)
+ {
+ init(pStream, flags);
+ locate_sof_marker();
+ }
+
+ jpeg_decoder::jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags)
+ {
+ if (::setjmp(m_jmp_state))
+ return;
+ decode_init(pStream, flags);
+ }
+
+ int jpeg_decoder::begin_decoding()
+ {
+ if (m_ready_flag)
+ return JPGD_SUCCESS;
+
+ if (m_error_code)
+ return JPGD_FAILED;
+
+ if (::setjmp(m_jmp_state))
+ return JPGD_FAILED;
+
+ decode_start();
+
+ m_ready_flag = true;
+
+ return JPGD_SUCCESS;
+ }
+
+ jpeg_decoder::~jpeg_decoder()
+ {
+ free_all_blocks();
+ }
+
+ jpeg_decoder_file_stream::jpeg_decoder_file_stream()
+ {
+ m_pFile = nullptr;
+ m_eof_flag = false;
+ m_error_flag = false;
+ }
+
+ void jpeg_decoder_file_stream::close()
+ {
+ if (m_pFile)
+ {
+ fclose(m_pFile);
+ m_pFile = nullptr;
+ }
+
+ m_eof_flag = false;
+ m_error_flag = false;
+ }
+
+ jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
+ {
+ close();
+ }
+
+ bool jpeg_decoder_file_stream::open(const char* Pfilename)
+ {
+ close();
+
+ m_eof_flag = false;
+ m_error_flag = false;
#if defined(_MSC_VER)
- m_pFile = NULL;
- fopen_s(&m_pFile, Pfilename, "rb");
+ m_pFile = nullptr;
+ fopen_s(&m_pFile, Pfilename, "rb");
#else
- m_pFile = fopen(Pfilename, "rb");
+ m_pFile = fopen(Pfilename, "rb");
#endif
- return m_pFile != NULL;
-}
-
-int jpeg_decoder_file_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
-{
- if (!m_pFile)
- return -1;
-
- if (m_eof_flag)
- {
- *pEOF_flag = true;
- return 0;
- }
-
- if (m_error_flag)
- return -1;
-
- int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile));
- if (bytes_read < max_bytes_to_read)
- {
- if (ferror(m_pFile))
- {
- m_error_flag = true;
- return -1;
- }
-
- m_eof_flag = true;
- *pEOF_flag = true;
- }
-
- return bytes_read;
-}
-
-bool jpeg_decoder_mem_stream::open(const uint8 *pSrc_data, uint size)
-{
- close();
- m_pSrc_data = pSrc_data;
- m_ofs = 0;
- m_size = size;
- return true;
-}
-
-int jpeg_decoder_mem_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag)
-{
- *pEOF_flag = false;
-
- if (!m_pSrc_data)
- return -1;
-
- uint bytes_remaining = m_size - m_ofs;
- if ((uint)max_bytes_to_read > bytes_remaining)
- {
- max_bytes_to_read = bytes_remaining;
- *pEOF_flag = true;
- }
-
- memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
- m_ofs += max_bytes_to_read;
-
- return max_bytes_to_read;
-}
-
-unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps)
-{
- if (!actual_comps)
- return NULL;
- *actual_comps = 0;
-
- if ((!pStream) || (!width) || (!height) || (!req_comps))
- return NULL;
-
- if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4))
- return NULL;
-
- jpeg_decoder decoder(pStream);
- if (decoder.get_error_code() != JPGD_SUCCESS)
- return NULL;
-
- const int image_width = decoder.get_width(), image_height = decoder.get_height();
- *width = image_width;
- *height = image_height;
- *actual_comps = decoder.get_num_components();
-
- if (decoder.begin_decoding() != JPGD_SUCCESS)
- return NULL;
-
- const int dst_bpl = image_width * req_comps;
-
- uint8 *pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height);
- if (!pImage_data)
- return NULL;
-
- for (int y = 0; y < image_height; y++)
- {
- const uint8* pScan_line;
- uint scan_line_len;
- if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS)
- {
- jpgd_free(pImage_data);
- return NULL;
- }
-
- uint8 *pDst = pImage_data + y * dst_bpl;
-
- if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3)))
- memcpy(pDst, pScan_line, dst_bpl);
- else if (decoder.get_num_components() == 1)
- {
- if (req_comps == 3)
- {
- for (int x = 0; x < image_width; x++)
- {
- uint8 luma = pScan_line[x];
- pDst[0] = luma;
- pDst[1] = luma;
- pDst[2] = luma;
- pDst += 3;
- }
- }
- else
- {
- for (int x = 0; x < image_width; x++)
- {
- uint8 luma = pScan_line[x];
- pDst[0] = luma;
- pDst[1] = luma;
- pDst[2] = luma;
- pDst[3] = 255;
- pDst += 4;
- }
- }
- }
- else if (decoder.get_num_components() == 3)
- {
- if (req_comps == 1)
- {
- const int YR = 19595, YG = 38470, YB = 7471;
- for (int x = 0; x < image_width; x++)
- {
- int r = pScan_line[x*4+0];
- int g = pScan_line[x*4+1];
- int b = pScan_line[x*4+2];
- *pDst++ = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
- }
- }
- else
- {
- for (int x = 0; x < image_width; x++)
- {
- pDst[0] = pScan_line[x*4+0];
- pDst[1] = pScan_line[x*4+1];
- pDst[2] = pScan_line[x*4+2];
- pDst += 3;
- }
- }
- }
- }
-
- return pImage_data;
-}
-
-unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps)
-{
- jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size);
- return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps);
-}
-
-unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps)
-{
- jpgd::jpeg_decoder_file_stream file_stream;
- if (!file_stream.open(pSrc_filename))
- return NULL;
- return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps);
-}
-
-} // namespace jpgd \ No newline at end of file
+ return m_pFile != nullptr;
+ }
+
+ int jpeg_decoder_file_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag)
+ {
+ if (!m_pFile)
+ return -1;
+
+ if (m_eof_flag)
+ {
+ *pEOF_flag = true;
+ return 0;
+ }
+
+ if (m_error_flag)
+ return -1;
+
+ int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile));
+ if (bytes_read < max_bytes_to_read)
+ {
+ if (ferror(m_pFile))
+ {
+ m_error_flag = true;
+ return -1;
+ }
+
+ m_eof_flag = true;
+ *pEOF_flag = true;
+ }
+
+ return bytes_read;
+ }
+
+ bool jpeg_decoder_mem_stream::open(const uint8* pSrc_data, uint size)
+ {
+ close();
+ m_pSrc_data = pSrc_data;
+ m_ofs = 0;
+ m_size = size;
+ return true;
+ }
+
+ int jpeg_decoder_mem_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag)
+ {
+ *pEOF_flag = false;
+
+ if (!m_pSrc_data)
+ return -1;
+
+ uint bytes_remaining = m_size - m_ofs;
+ if ((uint)max_bytes_to_read > bytes_remaining)
+ {
+ max_bytes_to_read = bytes_remaining;
+ *pEOF_flag = true;
+ }
+
+ memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
+ m_ofs += max_bytes_to_read;
+
+ return max_bytes_to_read;
+ }
+
+ unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags)
+ {
+ if (!actual_comps)
+ return nullptr;
+ *actual_comps = 0;
+
+ if ((!pStream) || (!width) || (!height) || (!req_comps))
+ return nullptr;
+
+ if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4))
+ return nullptr;
+
+ jpeg_decoder decoder(pStream, flags);
+ if (decoder.get_error_code() != JPGD_SUCCESS)
+ return nullptr;
+
+ const int image_width = decoder.get_width(), image_height = decoder.get_height();
+ *width = image_width;
+ *height = image_height;
+ *actual_comps = decoder.get_num_components();
+
+ if (decoder.begin_decoding() != JPGD_SUCCESS)
+ return nullptr;
+
+ const int dst_bpl = image_width * req_comps;
+
+ uint8* pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height);
+ if (!pImage_data)
+ return nullptr;
+
+ for (int y = 0; y < image_height; y++)
+ {
+ const uint8* pScan_line;
+ uint scan_line_len;
+ if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS)
+ {
+ jpgd_free(pImage_data);
+ return nullptr;
+ }
+
+ uint8* pDst = pImage_data + y * dst_bpl;
+
+ if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3)))
+ memcpy(pDst, pScan_line, dst_bpl);
+ else if (decoder.get_num_components() == 1)
+ {
+ if (req_comps == 3)
+ {
+ for (int x = 0; x < image_width; x++)
+ {
+ uint8 luma = pScan_line[x];
+ pDst[0] = luma;
+ pDst[1] = luma;
+ pDst[2] = luma;
+ pDst += 3;
+ }
+ }
+ else
+ {
+ for (int x = 0; x < image_width; x++)
+ {
+ uint8 luma = pScan_line[x];
+ pDst[0] = luma;
+ pDst[1] = luma;
+ pDst[2] = luma;
+ pDst[3] = 255;
+ pDst += 4;
+ }
+ }
+ }
+ else if (decoder.get_num_components() == 3)
+ {
+ if (req_comps == 1)
+ {
+ const int YR = 19595, YG = 38470, YB = 7471;
+ for (int x = 0; x < image_width; x++)
+ {
+ int r = pScan_line[x * 4 + 0];
+ int g = pScan_line[x * 4 + 1];
+ int b = pScan_line[x * 4 + 2];
+ *pDst++ = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
+ }
+ }
+ else
+ {
+ for (int x = 0; x < image_width; x++)
+ {
+ pDst[0] = pScan_line[x * 4 + 0];
+ pDst[1] = pScan_line[x * 4 + 1];
+ pDst[2] = pScan_line[x * 4 + 2];
+ pDst += 3;
+ }
+ }
+ }
+ }
+
+ return pImage_data;
+ }
+
+ unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags)
+ {
+ jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size);
+ return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps, flags);
+ }
+
+ unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags)
+ {
+ jpgd::jpeg_decoder_file_stream file_stream;
+ if (!file_stream.open(pSrc_filename))
+ return nullptr;
+ return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps, flags);
+ }
+
+} // namespace jpgd
diff --git a/thirdparty/jpeg-compressor/jpgd.h b/thirdparty/jpeg-compressor/jpgd.h
index 150b9a0b26..39136696ba 100644
--- a/thirdparty/jpeg-compressor/jpgd.h
+++ b/thirdparty/jpeg-compressor/jpgd.h
@@ -1,319 +1,351 @@
// jpgd.h - C++ class for JPEG decompression.
-// Public domain, Rich Geldreich <richgel99@gmail.com>
+// Richard Geldreich <richgel99@gmail.com>
+// See jpgd.cpp for license (Public Domain or Apache 2.0).
#ifndef JPEG_DECODER_H
#define JPEG_DECODER_H
#include <stdlib.h>
#include <stdio.h>
#include <setjmp.h>
+#include <assert.h>
+#include <stdint.h>
#ifdef _MSC_VER
- #define JPGD_NORETURN __declspec(noreturn)
+#define JPGD_NORETURN __declspec(noreturn)
#elif defined(__GNUC__)
- #define JPGD_NORETURN __attribute__ ((noreturn))
+#define JPGD_NORETURN __attribute__ ((noreturn))
#else
- #define JPGD_NORETURN
+#define JPGD_NORETURN
#endif
+#define JPGD_HUFF_TREE_MAX_LENGTH 512
+#define JPGD_HUFF_CODE_SIZE_MAX_LENGTH 256
+
namespace jpgd
{
- typedef unsigned char uint8;
- typedef signed short int16;
- typedef unsigned short uint16;
- typedef unsigned int uint;
- typedef signed int int32;
-
- // Loads a JPEG image from a memory buffer or a file.
- // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
- // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
- // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly.
- // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
- unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps);
- unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps);
-
- // Success/failure error codes.
- enum jpgd_status
- {
- JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
- JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
- JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
- JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
- JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
- JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
- JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
- JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
- JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM
- };
-
- // Input stream interface.
- // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available.
- // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set.
- // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer.
- // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding.
- class jpeg_decoder_stream
- {
- public:
- jpeg_decoder_stream() { }
- virtual ~jpeg_decoder_stream() { }
-
- // The read() method is called when the internal input buffer is empty.
- // Parameters:
- // pBuf - input buffer
- // max_bytes_to_read - maximum bytes that can be written to pBuf
- // pEOF_flag - set this to true if at end of stream (no more bytes remaining)
- // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
- // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
- virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) = 0;
- };
-
- // stdio FILE stream class.
- class jpeg_decoder_file_stream : public jpeg_decoder_stream
- {
- jpeg_decoder_file_stream(const jpeg_decoder_file_stream &);
- jpeg_decoder_file_stream &operator =(const jpeg_decoder_file_stream &);
-
- FILE *m_pFile;
- bool m_eof_flag, m_error_flag;
-
- public:
- jpeg_decoder_file_stream();
- virtual ~jpeg_decoder_file_stream();
-
- bool open(const char *Pfilename);
- void close();
-
- virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag);
- };
-
- // Memory stream class.
- class jpeg_decoder_mem_stream : public jpeg_decoder_stream
- {
- const uint8 *m_pSrc_data;
- uint m_ofs, m_size;
-
- public:
- jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { }
- jpeg_decoder_mem_stream(const uint8 *pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { }
-
- virtual ~jpeg_decoder_mem_stream() { }
-
- bool open(const uint8 *pSrc_data, uint size);
- void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; }
-
- virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag);
- };
-
- // Loads JPEG file from a jpeg_decoder_stream.
- unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps);
-
- enum
- {
- JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
- JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384
- };
-
- typedef int16 jpgd_quant_t;
- typedef int16 jpgd_block_t;
-
- class jpeg_decoder
- {
- public:
- // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc.
- // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
- jpeg_decoder(jpeg_decoder_stream *pStream);
-
- ~jpeg_decoder();
-
- // Call this method after constructing the object to begin decompression.
- // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
- int begin_decoding();
-
- // Returns the next scan line.
- // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1).
- // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4).
- // Returns JPGD_SUCCESS if a scan line has been returned.
- // Returns JPGD_DONE if all scan lines have been returned.
- // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info.
- int decode(const void** pScan_line, uint* pScan_line_len);
-
- inline jpgd_status get_error_code() const { return m_error_code; }
-
- inline int get_width() const { return m_image_x_size; }
- inline int get_height() const { return m_image_y_size; }
-
- inline int get_num_components() const { return m_comps_in_frame; }
-
- inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; }
- inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); }
-
- // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
- inline int get_total_bytes_read() const { return m_total_bytes_read; }
-
- private:
- jpeg_decoder(const jpeg_decoder &);
- jpeg_decoder &operator =(const jpeg_decoder &);
-
- typedef void (*pDecode_block_func)(jpeg_decoder *, int, int, int);
-
- struct huff_tables
- {
- bool ac_table;
- uint look_up[256];
- uint look_up2[256];
- uint8 code_size[256];
- uint tree[512];
- };
-
- struct coeff_buf
- {
- uint8 *pData;
- int block_num_x, block_num_y;
- int block_len_x, block_len_y;
- int block_size;
- };
-
- struct mem_block
- {
- mem_block *m_pNext;
- size_t m_used_count;
- size_t m_size;
- char m_data[1];
- };
-
- jmp_buf m_jmp_state;
- mem_block *m_pMem_blocks;
- int m_image_x_size;
- int m_image_y_size;
- jpeg_decoder_stream *m_pStream;
- int m_progressive_flag;
- uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES];
- uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size
- uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size
- jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables
- int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
- int m_comps_in_frame; // # of components in frame
- int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor
- int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor
- int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector
- int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID
- int m_comp_h_blocks[JPGD_MAX_COMPONENTS];
- int m_comp_v_blocks[JPGD_MAX_COMPONENTS];
- int m_comps_in_scan; // # of components in scan
- int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan
- int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector
- int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector
- int m_spectral_start; // spectral selection start
- int m_spectral_end; // spectral selection end
- int m_successive_low; // successive approximation low
- int m_successive_high; // successive approximation high
- int m_max_mcu_x_size; // MCU's max. X size in pixels
- int m_max_mcu_y_size; // MCU's max. Y size in pixels
- int m_blocks_per_mcu;
- int m_max_blocks_per_row;
- int m_mcus_per_row, m_mcus_per_col;
- int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU];
- int m_total_lines_left; // total # lines left in image
- int m_mcu_lines_left; // total # lines left in this MCU
- int m_real_dest_bytes_per_scan_line;
- int m_dest_bytes_per_scan_line; // rounded up
- int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y)
- huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES];
- coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS];
- coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS];
- int m_eob_run;
- int m_block_y_mcu[JPGD_MAX_COMPONENTS];
- uint8* m_pIn_buf_ofs;
- int m_in_buf_left;
- int m_tem_flag;
- bool m_eof_flag;
- uint8 m_in_buf_pad_start[128];
- uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128];
- uint8 m_in_buf_pad_end[128];
- int m_bits_left;
- uint m_bit_buf;
- int m_restart_interval;
- int m_restarts_left;
- int m_next_restart_num;
- int m_max_mcus_per_row;
- int m_max_blocks_per_mcu;
- int m_expanded_blocks_per_mcu;
- int m_expanded_blocks_per_row;
- int m_expanded_blocks_per_component;
- bool m_freq_domain_chroma_upsample;
- int m_max_mcus_per_col;
- uint m_last_dc_val[JPGD_MAX_COMPONENTS];
- jpgd_block_t* m_pMCU_coefficients;
- int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU];
- uint8* m_pSample_buf;
- int m_crr[256];
- int m_cbb[256];
- int m_crg[256];
- int m_cbg[256];
- uint8* m_pScan_line_0;
- uint8* m_pScan_line_1;
- jpgd_status m_error_code;
- bool m_ready_flag;
- int m_total_bytes_read;
-
- void free_all_blocks();
- JPGD_NORETURN void stop_decoding(jpgd_status status);
- void *alloc(size_t n, bool zero = false);
- void word_clear(void *p, uint16 c, uint n);
- void prep_in_buffer();
- void read_dht_marker();
- void read_dqt_marker();
- void read_sof_marker();
- void skip_variable_marker();
- void read_dri_marker();
- void read_sos_marker();
- int next_marker();
- int process_markers();
- void locate_soi_marker();
- void locate_sof_marker();
- int locate_sos_marker();
- void init(jpeg_decoder_stream * pStream);
- void create_look_ups();
- void fix_in_buffer();
- void transform_mcu(int mcu_row);
- void transform_mcu_expand(int mcu_row);
- coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y);
- inline jpgd_block_t *coeff_buf_getp(coeff_buf *cb, int block_x, int block_y);
- void load_next_row();
- void decode_next_row();
- void make_huff_table(int index, huff_tables *pH);
- void check_quant_tables();
- void check_huff_tables();
- void calc_mcu_block_order();
- int init_scan();
- void init_frame();
- void process_restart();
- void decode_scan(pDecode_block_func decode_block_func);
- void init_progressive();
- void init_sequential();
- void decode_start();
- void decode_init(jpeg_decoder_stream * pStream);
- void H2V2Convert();
- void H2V1Convert();
- void H1V2Convert();
- void H1V1Convert();
- void gray_convert();
- void expanded_convert();
- void find_eoi();
- inline uint get_char();
- inline uint get_char(bool *pPadding_flag);
- inline void stuff_char(uint8 q);
- inline uint8 get_octet();
- inline uint get_bits(int num_bits);
- inline uint get_bits_no_markers(int numbits);
- inline int huff_decode(huff_tables *pH);
- inline int huff_decode(huff_tables *pH, int& extrabits);
- static inline uint8 clamp(int i);
- static void decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
- static void decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
- static void decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
- static void decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
- };
-
+ typedef unsigned char uint8;
+ typedef signed short int16;
+ typedef unsigned short uint16;
+ typedef unsigned int uint;
+ typedef signed int int32;
+
+ // Loads a JPEG image from a memory buffer or a file.
+ // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
+ // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
+ // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly.
+ // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
+ unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0);
+ unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0);
+
+ // Success/failure error codes.
+ enum jpgd_status
+ {
+ JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
+ JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
+ JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
+ JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
+ JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
+ JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
+ JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
+ JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER,
+ JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM, JPGD_TOO_MANY_SCANS
+ };
+
+ // Input stream interface.
+ // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available.
+ // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set.
+ // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer.
+ // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding.
+ class jpeg_decoder_stream
+ {
+ public:
+ jpeg_decoder_stream() { }
+ virtual ~jpeg_decoder_stream() { }
+
+ // The read() method is called when the internal input buffer is empty.
+ // Parameters:
+ // pBuf - input buffer
+ // max_bytes_to_read - maximum bytes that can be written to pBuf
+ // pEOF_flag - set this to true if at end of stream (no more bytes remaining)
+ // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
+ // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
+ virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) = 0;
+ };
+
+ // stdio FILE stream class.
+ class jpeg_decoder_file_stream : public jpeg_decoder_stream
+ {
+ jpeg_decoder_file_stream(const jpeg_decoder_file_stream&);
+ jpeg_decoder_file_stream& operator =(const jpeg_decoder_file_stream&);
+
+ FILE* m_pFile;
+ bool m_eof_flag, m_error_flag;
+
+ public:
+ jpeg_decoder_file_stream();
+ virtual ~jpeg_decoder_file_stream();
+
+ bool open(const char* Pfilename);
+ void close();
+
+ virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag);
+ };
+
+ // Memory stream class.
+ class jpeg_decoder_mem_stream : public jpeg_decoder_stream
+ {
+ const uint8* m_pSrc_data;
+ uint m_ofs, m_size;
+
+ public:
+ jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { }
+ jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { }
+
+ virtual ~jpeg_decoder_mem_stream() { }
+
+ bool open(const uint8* pSrc_data, uint size);
+ void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; }
+
+ virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag);
+ };
+
+ // Loads JPEG file from a jpeg_decoder_stream.
+ unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps, uint32_t flags = 0);
+
+ enum
+ {
+ JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
+ JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 16384, JPGD_MAX_HEIGHT = 32768, JPGD_MAX_WIDTH = 32768
+ };
+
+ typedef int16 jpgd_quant_t;
+ typedef int16 jpgd_block_coeff_t;
+
+ class jpeg_decoder
+ {
+ public:
+ enum
+ {
+ cFlagBoxChromaFiltering = 1,
+ cFlagDisableSIMD = 2
+ };
+
+ // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc.
+ // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
+ jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags = 0);
+
+ ~jpeg_decoder();
+
+ // Call this method after constructing the object to begin decompression.
+ // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
+
+ int begin_decoding();
+
+ // Returns the next scan line.
+ // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1).
+ // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4).
+ // Returns JPGD_SUCCESS if a scan line has been returned.
+ // Returns JPGD_DONE if all scan lines have been returned.
+ // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info.
+ int decode(const void** pScan_line, uint* pScan_line_len);
+
+ inline jpgd_status get_error_code() const { return m_error_code; }
+
+ inline int get_width() const { return m_image_x_size; }
+ inline int get_height() const { return m_image_y_size; }
+
+ inline int get_num_components() const { return m_comps_in_frame; }
+
+ inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; }
+ inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); }
+
+ // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
+ inline int get_total_bytes_read() const { return m_total_bytes_read; }
+
+ private:
+ jpeg_decoder(const jpeg_decoder&);
+ jpeg_decoder& operator =(const jpeg_decoder&);
+
+ typedef void (*pDecode_block_func)(jpeg_decoder*, int, int, int);
+
+ struct huff_tables
+ {
+ bool ac_table;
+ uint look_up[256];
+ uint look_up2[256];
+ uint8 code_size[JPGD_HUFF_CODE_SIZE_MAX_LENGTH];
+ uint tree[JPGD_HUFF_TREE_MAX_LENGTH];
+ };
+
+ struct coeff_buf
+ {
+ uint8* pData;
+ int block_num_x, block_num_y;
+ int block_len_x, block_len_y;
+ int block_size;
+ };
+
+ struct mem_block
+ {
+ mem_block* m_pNext;
+ size_t m_used_count;
+ size_t m_size;
+ char m_data[1];
+ };
+
+ jmp_buf m_jmp_state;
+ uint32_t m_flags;
+ mem_block* m_pMem_blocks;
+ int m_image_x_size;
+ int m_image_y_size;
+ jpeg_decoder_stream* m_pStream;
+
+ int m_progressive_flag;
+
+ uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES];
+ uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size
+ uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size
+ jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables
+ int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
+ int m_comps_in_frame; // # of components in frame
+ int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor
+ int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor
+ int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector
+ int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID
+ int m_comp_h_blocks[JPGD_MAX_COMPONENTS];
+ int m_comp_v_blocks[JPGD_MAX_COMPONENTS];
+ int m_comps_in_scan; // # of components in scan
+ int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan
+ int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector
+ int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector
+ int m_spectral_start; // spectral selection start
+ int m_spectral_end; // spectral selection end
+ int m_successive_low; // successive approximation low
+ int m_successive_high; // successive approximation high
+ int m_max_mcu_x_size; // MCU's max. X size in pixels
+ int m_max_mcu_y_size; // MCU's max. Y size in pixels
+ int m_blocks_per_mcu;
+ int m_max_blocks_per_row;
+ int m_mcus_per_row, m_mcus_per_col;
+ int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU];
+ int m_total_lines_left; // total # lines left in image
+ int m_mcu_lines_left; // total # lines left in this MCU
+ int m_num_buffered_scanlines;
+ int m_real_dest_bytes_per_scan_line;
+ int m_dest_bytes_per_scan_line; // rounded up
+ int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y)
+ huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES];
+ coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS];
+ coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS];
+ int m_eob_run;
+ int m_block_y_mcu[JPGD_MAX_COMPONENTS];
+ uint8* m_pIn_buf_ofs;
+ int m_in_buf_left;
+ int m_tem_flag;
+
+ uint8 m_in_buf_pad_start[64];
+ uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128];
+ uint8 m_in_buf_pad_end[64];
+
+ int m_bits_left;
+ uint m_bit_buf;
+ int m_restart_interval;
+ int m_restarts_left;
+ int m_next_restart_num;
+ int m_max_mcus_per_row;
+ int m_max_blocks_per_mcu;
+
+ int m_max_mcus_per_col;
+ uint m_last_dc_val[JPGD_MAX_COMPONENTS];
+ jpgd_block_coeff_t* m_pMCU_coefficients;
+ int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU];
+ uint8* m_pSample_buf;
+ uint8* m_pSample_buf_prev;
+ int m_crr[256];
+ int m_cbb[256];
+ int m_crg[256];
+ int m_cbg[256];
+ uint8* m_pScan_line_0;
+ uint8* m_pScan_line_1;
+ jpgd_status m_error_code;
+ int m_total_bytes_read;
+
+ bool m_ready_flag;
+ bool m_eof_flag;
+ bool m_sample_buf_prev_valid;
+ bool m_has_sse2;
+
+ inline int check_sample_buf_ofs(int ofs) const { assert(ofs >= 0); assert(ofs < m_max_blocks_per_row * 64); return ofs; }
+ void free_all_blocks();
+ JPGD_NORETURN void stop_decoding(jpgd_status status);
+ void* alloc(size_t n, bool zero = false);
+ void* alloc_aligned(size_t nSize, uint32_t align = 16, bool zero = false);
+ void word_clear(void* p, uint16 c, uint n);
+ void prep_in_buffer();
+ void read_dht_marker();
+ void read_dqt_marker();
+ void read_sof_marker();
+ void skip_variable_marker();
+ void read_dri_marker();
+ void read_sos_marker();
+ int next_marker();
+ int process_markers();
+ void locate_soi_marker();
+ void locate_sof_marker();
+ int locate_sos_marker();
+ void init(jpeg_decoder_stream* pStream, uint32_t flags);
+ void create_look_ups();
+ void fix_in_buffer();
+ void transform_mcu(int mcu_row);
+ coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y);
+ inline jpgd_block_coeff_t* coeff_buf_getp(coeff_buf* cb, int block_x, int block_y);
+ void load_next_row();
+ void decode_next_row();
+ void make_huff_table(int index, huff_tables* pH);
+ void check_quant_tables();
+ void check_huff_tables();
+ bool calc_mcu_block_order();
+ int init_scan();
+ void init_frame();
+ void process_restart();
+ void decode_scan(pDecode_block_func decode_block_func);
+ void init_progressive();
+ void init_sequential();
+ void decode_start();
+ void decode_init(jpeg_decoder_stream* pStream, uint32_t flags);
+ void H2V2Convert();
+ uint32_t H2V2ConvertFiltered();
+ void H2V1Convert();
+ void H2V1ConvertFiltered();
+ void H1V2Convert();
+ void H1V2ConvertFiltered();
+ void H1V1Convert();
+ void gray_convert();
+ void find_eoi();
+ inline uint get_char();
+ inline uint get_char(bool* pPadding_flag);
+ inline void stuff_char(uint8 q);
+ inline uint8 get_octet();
+ inline uint get_bits(int num_bits);
+ inline uint get_bits_no_markers(int numbits);
+ inline int huff_decode(huff_tables* pH);
+ inline int huff_decode(huff_tables* pH, int& extrabits);
+
+ // Clamps a value between 0-255.
+ static inline uint8 clamp(int i)
+ {
+ if (static_cast<uint>(i) > 255)
+ i = (((~i) >> 31) & 0xFF);
+ return static_cast<uint8>(i);
+ }
+ int decode_next_mcu_row();
+
+ static void decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y);
+ static void decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y);
+ static void decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y);
+ static void decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y);
+ };
+
} // namespace jpgd
#endif // JPEG_DECODER_H
diff --git a/thirdparty/jpeg-compressor/jpgd_idct.h b/thirdparty/jpeg-compressor/jpgd_idct.h
new file mode 100644
index 0000000000..876425a959
--- /dev/null
+++ b/thirdparty/jpeg-compressor/jpgd_idct.h
@@ -0,0 +1,462 @@
+// Copyright 2009 Intel Corporation
+// All Rights Reserved
+//
+// Permission is granted to use, copy, distribute and prepare derivative works of this
+// software for any purpose and without fee, provided, that the above copyright notice
+// and this statement appear in all copies. Intel makes no representations about the
+// suitability of this software for any purpose. THIS SOFTWARE IS PROVIDED "AS IS."
+// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
+// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
+// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Intel does not
+// assume any responsibility for any errors which may appear in this software nor any
+// responsibility to update it.
+//
+// From:
+// https://software.intel.com/sites/default/files/m/d/4/1/d/8/UsingIntelAVXToImplementIDCT-r1_5.pdf
+// https://software.intel.com/file/29048
+//
+// Requires SSE
+//
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+#include <immintrin.h>
+
+#ifdef _MSC_VER
+ #define JPGD_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+#else
+ #define JPGD_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+#endif
+
+#define BITS_INV_ACC 4
+#define SHIFT_INV_ROW 16 - BITS_INV_ACC
+#define SHIFT_INV_COL 1 + BITS_INV_ACC
+const short IRND_INV_ROW = 1024 * (6 - BITS_INV_ACC); //1 << (SHIFT_INV_ROW-1)
+const short IRND_INV_COL = 16 * (BITS_INV_ACC - 3); // 1 << (SHIFT_INV_COL-1)
+const short IRND_INV_CORR = IRND_INV_COL - 1; // correction -1.0 and round
+
+JPGD_SIMD_ALIGN(short, shortM128_one_corr[8]) = {1, 1, 1, 1, 1, 1, 1, 1};
+JPGD_SIMD_ALIGN(short, shortM128_round_inv_row[8]) = {IRND_INV_ROW, 0, IRND_INV_ROW, 0, IRND_INV_ROW, 0, IRND_INV_ROW, 0};
+JPGD_SIMD_ALIGN(short, shortM128_round_inv_col[8]) = {IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL};
+JPGD_SIMD_ALIGN(short, shortM128_round_inv_corr[8])= {IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR};
+JPGD_SIMD_ALIGN(short, shortM128_tg_1_16[8]) = {13036, 13036, 13036, 13036, 13036, 13036, 13036, 13036}; // tg * (2<<16) + 0.5
+JPGD_SIMD_ALIGN(short, shortM128_tg_2_16[8]) = {27146, 27146, 27146, 27146, 27146, 27146, 27146, 27146}; // tg * (2<<16) + 0.5
+JPGD_SIMD_ALIGN(short, shortM128_tg_3_16[8]) = {-21746, -21746, -21746, -21746, -21746, -21746, -21746, -21746}; // tg * (2<<16) + 0.5
+JPGD_SIMD_ALIGN(short, shortM128_cos_4_16[8]) = {-19195, -19195, -19195, -19195, -19195, -19195, -19195, -19195};// cos * (2<<16) + 0.5
+
+//-----------------------------------------------------------------------------
+// Table for rows 0,4 - constants are multiplied on cos_4_16
+// w15 w14 w11 w10 w07 w06 w03 w02
+// w29 w28 w25 w24 w21 w20 w17 w16
+// w31 w30 w27 w26 w23 w22 w19 w18
+//movq -> w05 w04 w01 w00
+JPGD_SIMD_ALIGN(short, shortM128_tab_i_04[]) = {
+ 16384, 21407, 16384, 8867,
+ 16384, -8867, 16384, -21407, // w13 w12 w09 w08
+ 16384, 8867, -16384, -21407, // w07 w06 w03 w02
+ -16384, 21407, 16384, -8867, // w15 w14 w11 w10
+ 22725, 19266, 19266, -4520, // w21 w20 w17 w16
+ 12873, -22725, 4520, -12873, // w29 w28 w25 w24
+ 12873, 4520, -22725, -12873, // w23 w22 w19 w18
+ 4520, 19266, 19266, -22725}; // w31 w30 w27 w26
+
+ // Table for rows 1,7 - constants are multiplied on cos_1_16
+//movq -> w05 w04 w01 w00
+JPGD_SIMD_ALIGN(short, shortM128_tab_i_17[]) = {
+ 22725, 29692, 22725, 12299,
+ 22725, -12299, 22725, -29692, // w13 w12 w09 w08
+ 22725, 12299, -22725, -29692, // w07 w06 w03 w02
+ -22725, 29692, 22725, -12299, // w15 w14 w11 w10
+ 31521, 26722, 26722, -6270, // w21 w20 w17 w16
+ 17855, -31521, 6270, -17855, // w29 w28 w25 w24
+ 17855, 6270, -31521, -17855, // w23 w22 w19 w18
+ 6270, 26722, 26722, -31521}; // w31 w30 w27 w26
+
+// Table for rows 2,6 - constants are multiplied on cos_2_16
+//movq -> w05 w04 w01 w00
+JPGD_SIMD_ALIGN(short, shortM128_tab_i_26[]) = {
+ 21407, 27969, 21407, 11585,
+ 21407, -11585, 21407, -27969, // w13 w12 w09 w08
+ 21407, 11585, -21407, -27969, // w07 w06 w03 w02
+ -21407, 27969, 21407, -11585, // w15 w14 w11 w10
+ 29692, 25172, 25172, -5906, // w21 w20 w17 w16
+ 16819, -29692, 5906, -16819, // w29 w28 w25 w24
+ 16819, 5906, -29692, -16819, // w23 w22 w19 w18
+ 5906, 25172, 25172, -29692}; // w31 w30 w27 w26
+// Table for rows 3,5 - constants are multiplied on cos_3_16
+//movq -> w05 w04 w01 w00
+JPGD_SIMD_ALIGN(short, shortM128_tab_i_35[]) = {
+ 19266, 25172, 19266, 10426,
+ 19266, -10426, 19266, -25172, // w13 w12 w09 w08
+ 19266, 10426, -19266, -25172, // w07 w06 w03 w02
+ -19266, 25172, 19266, -10426, // w15 w14 w11 w10
+ 26722, 22654, 22654, -5315, // w21 w20 w17 w16
+ 15137, -26722, 5315, -15137, // w29 w28 w25 w24
+ 15137, 5315, -26722, -15137, // w23 w22 w19 w18
+ 5315, 22654, 22654, -26722}; // w31 w30 w27 w26
+
+JPGD_SIMD_ALIGN(short, shortM128_128[8]) = { 128, 128, 128, 128, 128, 128, 128, 128 };
+
+void idctSSEShortU8(const short *pInput, uint8_t * pOutputUB)
+{
+ __m128i r_xmm0, r_xmm4;
+ __m128i r_xmm1, r_xmm2, r_xmm3, r_xmm5, r_xmm6, r_xmm7;
+ __m128i row0, row1, row2, row3, row4, row5, row6, row7;
+ short * pTab_i_04 = shortM128_tab_i_04;
+ short * pTab_i_26 = shortM128_tab_i_26;
+
+ //Get pointers for this input and output
+ pTab_i_04 = shortM128_tab_i_04;
+ pTab_i_26 = shortM128_tab_i_26;
+
+ //Row 1 and Row 3
+ r_xmm0 = _mm_load_si128((__m128i *) pInput);
+ r_xmm4 = _mm_load_si128((__m128i *) (&pInput[2*8]));
+
+ // *** Work on the data in xmm0
+ //low shuffle mask = 0xd8 = 11 01 10 00
+ //get short 2 and short 0 into ls 32-bits
+ r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
+
+ // copy short 2 and short 0 to all locations
+ r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
+
+ // add to those copies
+ r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
+
+ // shuffle mask = 0x55 = 01 01 01 01
+ // copy short 3 and short 1 to all locations
+ r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
+
+ // high shuffle mask = 0xd8 = 11 01 10 00
+ // get short 6 and short 4 into bit positions 64-95
+ // get short 7 and short 5 into bit positions 96-127
+ r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
+
+ // add to short 3 and short 1
+ r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
+
+ // shuffle mask = 0xaa = 10 10 10 10
+ // copy short 6 and short 4 to all locations
+ r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
+
+ // shuffle mask = 0xaa = 11 11 11 11
+ // copy short 7 and short 5 to all locations
+ r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
+
+ // add to short 6 and short 4
+ r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8]));
+
+ // *** Work on the data in xmm4
+ // high shuffle mask = 0xd8 11 01 10 00
+ // get short 6 and short 4 into bit positions 64-95
+ // get short 7 and short 5 into bit positions 96-127
+ r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
+
+ // (xmm0 short 2 and short 0 plus pSi) + some constants
+ r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
+ r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
+ r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
+ r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
+ r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
+ r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &shortM128_tab_i_26[0]));
+ r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
+ r_xmm2 = r_xmm1;
+ r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
+ r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &shortM128_tab_i_26[8]));
+ r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
+ r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
+ r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
+ r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &shortM128_tab_i_26[16]));
+ r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
+ r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
+ r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
+ r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &shortM128_tab_i_26[24]));
+ r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
+ r_xmm6 = r_xmm5;
+ r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
+ r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
+ row0 = _mm_packs_epi32(r_xmm0, r_xmm2);
+ r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
+ r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
+ r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
+ r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
+ r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
+ r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
+ row2 = _mm_packs_epi32(r_xmm4, r_xmm6);
+
+ //Row 5 and row 7
+ r_xmm0 = _mm_load_si128((__m128i *) (&pInput[4*8]));
+ r_xmm4 = _mm_load_si128((__m128i *) (&pInput[6*8]));
+
+ r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
+ r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
+ r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
+ r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
+ r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
+ r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
+ r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
+ r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
+ r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8]));
+ r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
+ r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
+ r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
+ r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
+ r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
+ r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
+ r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &shortM128_tab_i_26[0]));
+ r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
+ r_xmm2 = r_xmm1;
+ r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
+ r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &shortM128_tab_i_26[8]));
+ r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
+ r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
+ r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
+ r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &shortM128_tab_i_26[16]));
+ r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
+ r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
+ r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
+ r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &shortM128_tab_i_26[24]));
+ r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
+ r_xmm6 = r_xmm5;
+ r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
+ r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
+ row4 = _mm_packs_epi32(r_xmm0, r_xmm2);
+ r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
+ r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
+ r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
+ r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
+ r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
+ r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
+ row6 = _mm_packs_epi32(r_xmm4, r_xmm6);
+
+ //Row 4 and row 2
+ pTab_i_04 = shortM128_tab_i_35;
+ pTab_i_26 = shortM128_tab_i_17;
+ r_xmm0 = _mm_load_si128((__m128i *) (&pInput[3*8]));
+ r_xmm4 = _mm_load_si128((__m128i *) (&pInput[1*8]));
+
+ r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
+ r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
+ r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
+ r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
+ r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
+ r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
+ r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
+ r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
+ r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8]));
+ r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
+ r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
+ r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
+ r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
+ r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
+ r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
+ r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &pTab_i_26[0]));
+ r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
+ r_xmm2 = r_xmm1;
+ r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
+ r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &pTab_i_26[8]));
+ r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
+ r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
+ r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
+ r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &pTab_i_26[16]));
+ r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
+ r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
+ r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
+ r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &pTab_i_26[24]));
+ r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
+ r_xmm6 = r_xmm5;
+ r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
+ r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
+ row3 = _mm_packs_epi32(r_xmm0, r_xmm2);
+ r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
+ r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
+ r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
+ r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
+ r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
+ r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
+ row1 = _mm_packs_epi32(r_xmm4, r_xmm6);
+
+ //Row 6 and row 8
+ r_xmm0 = _mm_load_si128((__m128i *) (&pInput[5*8]));
+ r_xmm4 = _mm_load_si128((__m128i *) (&pInput[7*8]));
+
+ r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
+ r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
+ r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
+ r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
+ r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
+ r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
+ r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
+ r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
+ r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8]));
+ r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
+ r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
+ r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
+ r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
+ r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
+ r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
+ r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &pTab_i_26[0]));
+ r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
+ r_xmm2 = r_xmm1;
+ r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
+ r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &pTab_i_26[8]));
+ r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
+ r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
+ r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
+ r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &pTab_i_26[16]));
+ r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
+ r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
+ r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
+ r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &pTab_i_26[24]));
+ r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
+ r_xmm6 = r_xmm5;
+ r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
+ r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
+ row5 = _mm_packs_epi32(r_xmm0, r_xmm2);
+ r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
+ r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
+ r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
+ r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
+ r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
+ r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
+ row7 = _mm_packs_epi32(r_xmm4, r_xmm6);
+
+ r_xmm1 = _mm_load_si128((__m128i *) shortM128_tg_3_16);
+ r_xmm2 = row5;
+ r_xmm3 = row3;
+ r_xmm0 = _mm_mulhi_epi16(row5, r_xmm1);
+
+ r_xmm1 = _mm_mulhi_epi16(r_xmm1, r_xmm3);
+ r_xmm5 = _mm_load_si128((__m128i *) shortM128_tg_1_16);
+ r_xmm6 = row7;
+ r_xmm4 = _mm_mulhi_epi16(row7, r_xmm5);
+
+ r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm2);
+ r_xmm5 = _mm_mulhi_epi16(r_xmm5, row1);
+ r_xmm1 = _mm_adds_epi16(r_xmm1, r_xmm3);
+ r_xmm7 = row6;
+
+ r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm3);
+ r_xmm3 = _mm_load_si128((__m128i *) shortM128_tg_2_16);
+ r_xmm2 = _mm_subs_epi16(r_xmm2, r_xmm1);
+ r_xmm7 = _mm_mulhi_epi16(r_xmm7, r_xmm3);
+ r_xmm1 = r_xmm0;
+ r_xmm3 = _mm_mulhi_epi16(r_xmm3, row2);
+ r_xmm5 = _mm_subs_epi16(r_xmm5, r_xmm6);
+ r_xmm4 = _mm_adds_epi16(r_xmm4, row1);
+ r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm4);
+ r_xmm0 = _mm_adds_epi16(r_xmm0, *((__m128i *) shortM128_one_corr));
+ r_xmm4 = _mm_subs_epi16(r_xmm4, r_xmm1);
+ r_xmm6 = r_xmm5;
+ r_xmm5 = _mm_subs_epi16(r_xmm5, r_xmm2);
+ r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i *) shortM128_one_corr));
+ r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm2);
+
+ //Intermediate results, needed later
+ __m128i temp3, temp7;
+ temp7 = r_xmm0;
+
+ r_xmm1 = r_xmm4;
+ r_xmm0 = _mm_load_si128((__m128i *) shortM128_cos_4_16);
+ r_xmm4 = _mm_adds_epi16(r_xmm4, r_xmm5);
+ r_xmm2 = _mm_load_si128((__m128i *) shortM128_cos_4_16);
+ r_xmm2 = _mm_mulhi_epi16(r_xmm2, r_xmm4);
+
+ //Intermediate results, needed later
+ temp3 = r_xmm6;
+
+ r_xmm1 = _mm_subs_epi16(r_xmm1, r_xmm5);
+ r_xmm7 = _mm_adds_epi16(r_xmm7, row2);
+ r_xmm3 = _mm_subs_epi16(r_xmm3, row6);
+ r_xmm6 = row0;
+ r_xmm0 = _mm_mulhi_epi16(r_xmm0, r_xmm1);
+ r_xmm5 = row4;
+ r_xmm5 = _mm_adds_epi16(r_xmm5, r_xmm6);
+ r_xmm6 = _mm_subs_epi16(r_xmm6, row4);
+ r_xmm4 = _mm_adds_epi16(r_xmm4, r_xmm2);
+
+ r_xmm4 = _mm_or_si128(r_xmm4, *((__m128i *) shortM128_one_corr));
+ r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm1);
+ r_xmm0 = _mm_or_si128(r_xmm0, *((__m128i *) shortM128_one_corr));
+
+ r_xmm2 = r_xmm5;
+ r_xmm5 = _mm_adds_epi16(r_xmm5, r_xmm7);
+ r_xmm1 = r_xmm6;
+ r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i *) shortM128_round_inv_col));
+ r_xmm2 = _mm_subs_epi16(r_xmm2, r_xmm7);
+ r_xmm7 = temp7;
+ r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm3);
+ r_xmm6 = _mm_adds_epi16(r_xmm6, *((__m128i *) shortM128_round_inv_col));
+ r_xmm7 = _mm_adds_epi16(r_xmm7, r_xmm5);
+ r_xmm7 = _mm_srai_epi16(r_xmm7, SHIFT_INV_COL);
+ r_xmm1 = _mm_subs_epi16(r_xmm1, r_xmm3);
+ r_xmm1 = _mm_adds_epi16(r_xmm1, *((__m128i *) shortM128_round_inv_corr));
+ r_xmm3 = r_xmm6;
+ r_xmm2 = _mm_adds_epi16(r_xmm2, *((__m128i *) shortM128_round_inv_corr));
+ r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm4);
+
+ //Store results for row 0
+ //_mm_store_si128((__m128i *) pOutput, r_xmm7);
+ __m128i r0 = r_xmm7;
+
+ r_xmm6 = _mm_srai_epi16(r_xmm6, SHIFT_INV_COL);
+ r_xmm7 = r_xmm1;
+ r_xmm1 = _mm_adds_epi16(r_xmm1, r_xmm0);
+
+ //Store results for row 1
+ //_mm_store_si128((__m128i *) (&pOutput[1*8]), r_xmm6);
+ __m128i r1 = r_xmm6;
+
+ r_xmm1 = _mm_srai_epi16(r_xmm1, SHIFT_INV_COL);
+ r_xmm6 = temp3;
+ r_xmm7 = _mm_subs_epi16(r_xmm7, r_xmm0);
+ r_xmm7 = _mm_srai_epi16(r_xmm7, SHIFT_INV_COL);
+
+ //Store results for row 2
+ //_mm_store_si128((__m128i *) (&pOutput[2*8]), r_xmm1);
+ __m128i r2 = r_xmm1;
+
+ r_xmm5 = _mm_subs_epi16(r_xmm5, temp7);
+ r_xmm5 = _mm_srai_epi16(r_xmm5, SHIFT_INV_COL);
+
+ //Store results for row 7
+ //_mm_store_si128((__m128i *) (&pOutput[7*8]), r_xmm5);
+ __m128i r7 = r_xmm5;
+
+ r_xmm3 = _mm_subs_epi16(r_xmm3, r_xmm4);
+ r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm2);
+ r_xmm2 = _mm_subs_epi16(r_xmm2, temp3);
+ r_xmm6 = _mm_srai_epi16(r_xmm6, SHIFT_INV_COL);
+ r_xmm2 = _mm_srai_epi16(r_xmm2, SHIFT_INV_COL);
+
+ //Store results for row 3
+ //_mm_store_si128((__m128i *) (&pOutput[3*8]), r_xmm6);
+ __m128i r3 = r_xmm6;
+
+ r_xmm3 = _mm_srai_epi16(r_xmm3, SHIFT_INV_COL);
+
+ //Store results for rows 4, 5, and 6
+ //_mm_store_si128((__m128i *) (&pOutput[4*8]), r_xmm2);
+ //_mm_store_si128((__m128i *) (&pOutput[5*8]), r_xmm7);
+ //_mm_store_si128((__m128i *) (&pOutput[6*8]), r_xmm3);
+
+ __m128i r4 = r_xmm2;
+ __m128i r5 = r_xmm7;
+ __m128i r6 = r_xmm3;
+
+ r0 = _mm_add_epi16(*(const __m128i *)shortM128_128, r0);
+ r1 = _mm_add_epi16(*(const __m128i *)shortM128_128, r1);
+ r2 = _mm_add_epi16(*(const __m128i *)shortM128_128, r2);
+ r3 = _mm_add_epi16(*(const __m128i *)shortM128_128, r3);
+ r4 = _mm_add_epi16(*(const __m128i *)shortM128_128, r4);
+ r5 = _mm_add_epi16(*(const __m128i *)shortM128_128, r5);
+ r6 = _mm_add_epi16(*(const __m128i *)shortM128_128, r6);
+ r7 = _mm_add_epi16(*(const __m128i *)shortM128_128, r7);
+
+ ((__m128i *)pOutputUB)[0] = _mm_packus_epi16(r0, r1);
+ ((__m128i *)pOutputUB)[1] = _mm_packus_epi16(r2, r3);
+ ((__m128i *)pOutputUB)[2] = _mm_packus_epi16(r4, r5);
+ ((__m128i *)pOutputUB)[3] = _mm_packus_epi16(r6, r7);
+}
diff --git a/thirdparty/jpeg-compressor/patches/fix-msvc2017-build.patch b/thirdparty/jpeg-compressor/patches/fix-msvc2017-build.patch
new file mode 100644
index 0000000000..7b338de084
--- /dev/null
+++ b/thirdparty/jpeg-compressor/patches/fix-msvc2017-build.patch
@@ -0,0 +1,31 @@
+diff --git a/thirdparty/jpeg-compressor/jpgd.cpp b/thirdparty/jpeg-compressor/jpgd.cpp
+index a0c494db61..257d0b7574 100644
+--- a/thirdparty/jpeg-compressor/jpgd.cpp
++++ b/thirdparty/jpeg-compressor/jpgd.cpp
+@@ -2126,7 +2126,7 @@ namespace jpgd {
+
+ int jpeg_decoder::decode_next_mcu_row()
+ {
+- if (setjmp(m_jmp_state))
++ if (::setjmp(m_jmp_state))
+ return JPGD_FAILED;
+
+ const bool chroma_y_filtering = ((m_flags & cFlagBoxChromaFiltering) == 0) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2));
+@@ -3042,7 +3042,7 @@ namespace jpgd {
+
+ jpeg_decoder::jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags)
+ {
+- if (setjmp(m_jmp_state))
++ if (::setjmp(m_jmp_state))
+ return;
+ decode_init(pStream, flags);
+ }
+@@ -3055,7 +3055,7 @@ namespace jpgd {
+ if (m_error_code)
+ return JPGD_FAILED;
+
+- if (setjmp(m_jmp_state))
++ if (::setjmp(m_jmp_state))
+ return JPGD_FAILED;
+
+ decode_start();
diff --git a/thirdparty/mbedtls/include/mbedtls/check_config.h b/thirdparty/mbedtls/include/mbedtls/check_config.h
index d076c2352f..93de091c4d 100644
--- a/thirdparty/mbedtls/include/mbedtls/check_config.h
+++ b/thirdparty/mbedtls/include/mbedtls/check_config.h
@@ -546,6 +546,23 @@
#error "MBEDTLS_SSL_PROTO_TLS1_2 defined, but not all prerequisites"
#endif
+#if (defined(MBEDTLS_SSL_PROTO_SSL3) || defined(MBEDTLS_SSL_PROTO_TLS1) || \
+ defined(MBEDTLS_SSL_PROTO_TLS1_1) || defined(MBEDTLS_SSL_PROTO_TLS1_2)) && \
+ !(defined(MBEDTLS_KEY_EXCHANGE_RSA_ENABLED) || \
+ defined(MBEDTLS_KEY_EXCHANGE_DHE_RSA_ENABLED) || \
+ defined(MBEDTLS_KEY_EXCHANGE_ECDHE_RSA_ENABLED) || \
+ defined(MBEDTLS_KEY_EXCHANGE_ECDHE_ECDSA_ENABLED) || \
+ defined(MBEDTLS_KEY_EXCHANGE_ECDH_RSA_ENABLED) || \
+ defined(MBEDTLS_KEY_EXCHANGE_ECDH_ECDSA_ENABLED) || \
+ defined(MBEDTLS_KEY_EXCHANGE_PSK_ENABLED) || \
+ defined(MBEDTLS_KEY_EXCHANGE_DHE_PSK_ENABLED) || \
+ defined(MBEDTLS_KEY_EXCHANGE_RSA_PSK_ENABLED) || \
+ defined(MBEDTLS_KEY_EXCHANGE_ECDHE_PSK_ENABLED) || \
+ defined(MBEDTLS_KEY_EXCHANGE_ECJPAKE_ENABLED) )
+#error "One or more versions of the TLS protocol are enabled " \
+ "but no key exchange methods defined with MBEDTLS_KEY_EXCHANGE_xxxx"
+#endif
+
#if defined(MBEDTLS_SSL_PROTO_DTLS) && \
!defined(MBEDTLS_SSL_PROTO_TLS1_1) && \
!defined(MBEDTLS_SSL_PROTO_TLS1_2)
@@ -669,6 +686,10 @@
#error "MBEDTLS_X509_CREATE_C defined, but not all prerequisites"
#endif
+#if defined(MBEDTLS_CERTS_C) && !defined(MBEDTLS_X509_USE_C)
+#error "MBEDTLS_CERTS_C defined, but not all prerequisites"
+#endif
+
#if defined(MBEDTLS_X509_CRT_PARSE_C) && ( !defined(MBEDTLS_X509_USE_C) )
#error "MBEDTLS_X509_CRT_PARSE_C defined, but not all prerequisites"
#endif
diff --git a/thirdparty/mbedtls/include/mbedtls/version.h b/thirdparty/mbedtls/include/mbedtls/version.h
index 8e2ce03c32..e0a2e7f6d6 100644
--- a/thirdparty/mbedtls/include/mbedtls/version.h
+++ b/thirdparty/mbedtls/include/mbedtls/version.h
@@ -40,16 +40,16 @@
*/
#define MBEDTLS_VERSION_MAJOR 2
#define MBEDTLS_VERSION_MINOR 16
-#define MBEDTLS_VERSION_PATCH 5
+#define MBEDTLS_VERSION_PATCH 6
/**
* The single version number has the following structure:
* MMNNPP00
* Major version | Minor version | Patch version
*/
-#define MBEDTLS_VERSION_NUMBER 0x02100500
-#define MBEDTLS_VERSION_STRING "2.16.5"
-#define MBEDTLS_VERSION_STRING_FULL "mbed TLS 2.16.5"
+#define MBEDTLS_VERSION_NUMBER 0x02100600
+#define MBEDTLS_VERSION_STRING "2.16.6"
+#define MBEDTLS_VERSION_STRING_FULL "mbed TLS 2.16.6"
#if defined(MBEDTLS_VERSION_C)
diff --git a/thirdparty/mbedtls/library/ecp.c b/thirdparty/mbedtls/library/ecp.c
index 040c20bd38..725e176df2 100644
--- a/thirdparty/mbedtls/library/ecp.c
+++ b/thirdparty/mbedtls/library/ecp.c
@@ -1938,6 +1938,20 @@ static int ecp_mul_comb_after_precomp( const mbedtls_ecp_group *grp,
final_norm:
#endif
+ /*
+ * Knowledge of the jacobian coordinates may leak the last few bits of the
+ * scalar [1], and since our MPI implementation isn't constant-flow,
+ * inversion (used for coordinate normalization) may leak the full value
+ * of its input via side-channels [2].
+ *
+ * [1] https://eprint.iacr.org/2003/191
+ * [2] https://eprint.iacr.org/2020/055
+ *
+ * Avoid the leak by randomizing coordinates before we normalize them.
+ */
+ if( f_rng != 0 )
+ MBEDTLS_MPI_CHK( ecp_randomize_jac( grp, RR, f_rng, p_rng ) );
+
MBEDTLS_ECP_BUDGET( MBEDTLS_ECP_OPS_INV );
MBEDTLS_MPI_CHK( ecp_normalize_jac( grp, RR ) );
@@ -2308,6 +2322,20 @@ static int ecp_mul_mxz( mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
MBEDTLS_MPI_CHK( mbedtls_mpi_safe_cond_swap( &R->Z, &RP.Z, b ) );
}
+ /*
+ * Knowledge of the projective coordinates may leak the last few bits of the
+ * scalar [1], and since our MPI implementation isn't constant-flow,
+ * inversion (used for coordinate normalization) may leak the full value
+ * of its input via side-channels [2].
+ *
+ * [1] https://eprint.iacr.org/2003/191
+ * [2] https://eprint.iacr.org/2020/055
+ *
+ * Avoid the leak by randomizing coordinates before we normalize them.
+ */
+ if( f_rng != NULL )
+ MBEDTLS_MPI_CHK( ecp_randomize_mxz( grp, R, f_rng, p_rng ) );
+
MBEDTLS_MPI_CHK( ecp_normalize_mxz( grp, R ) );
cleanup:
diff --git a/thirdparty/mbedtls/library/ssl_cli.c b/thirdparty/mbedtls/library/ssl_cli.c
index afced7a99c..c5c3af69df 100644
--- a/thirdparty/mbedtls/library/ssl_cli.c
+++ b/thirdparty/mbedtls/library/ssl_cli.c
@@ -1417,6 +1417,19 @@ static int ssl_parse_hello_verify_request( mbedtls_ssl_context *ssl )
MBEDTLS_SSL_DEBUG_MSG( 2, ( "=> parse hello verify request" ) );
+ /* Check that there is enough room for:
+ * - 2 bytes of version
+ * - 1 byte of cookie_len
+ */
+ if( mbedtls_ssl_hs_hdr_len( ssl ) + 3 > ssl->in_msglen )
+ {
+ MBEDTLS_SSL_DEBUG_MSG( 1,
+ ( "incoming HelloVerifyRequest message is too short" ) );
+ mbedtls_ssl_send_alert_message( ssl, MBEDTLS_SSL_ALERT_LEVEL_FATAL,
+ MBEDTLS_SSL_ALERT_MSG_DECODE_ERROR );
+ return( MBEDTLS_ERR_SSL_BAD_HS_SERVER_HELLO );
+ }
+
/*
* struct {
* ProtocolVersion server_version;
@@ -1445,8 +1458,6 @@ static int ssl_parse_hello_verify_request( mbedtls_ssl_context *ssl )
}
cookie_len = *p++;
- MBEDTLS_SSL_DEBUG_BUF( 3, "cookie", p, cookie_len );
-
if( ( ssl->in_msg + ssl->in_msglen ) - p < cookie_len )
{
MBEDTLS_SSL_DEBUG_MSG( 1,
@@ -1455,6 +1466,7 @@ static int ssl_parse_hello_verify_request( mbedtls_ssl_context *ssl )
MBEDTLS_SSL_ALERT_MSG_DECODE_ERROR );
return( MBEDTLS_ERR_SSL_BAD_HS_SERVER_HELLO );
}
+ MBEDTLS_SSL_DEBUG_BUF( 3, "cookie", p, cookie_len );
mbedtls_free( ssl->handshake->verify_cookie );
diff --git a/thirdparty/mbedtls/library/ssl_tls.c b/thirdparty/mbedtls/library/ssl_tls.c
index b8f35fec5d..cbec74fe8c 100644
--- a/thirdparty/mbedtls/library/ssl_tls.c
+++ b/thirdparty/mbedtls/library/ssl_tls.c
@@ -1004,8 +1004,6 @@ int mbedtls_ssl_derive_keys( mbedtls_ssl_context *ssl )
#if defined(MBEDTLS_SSL_HW_RECORD_ACCEL)
if( mbedtls_ssl_hw_record_init != NULL )
{
- int ret = 0;
-
MBEDTLS_SSL_DEBUG_MSG( 2, ( "going for mbedtls_ssl_hw_record_init()" ) );
if( ( ret = mbedtls_ssl_hw_record_init( ssl, key1, key2, transform->keylen,
@@ -2885,15 +2883,18 @@ static void ssl_dtls_replay_reset( mbedtls_ssl_context *ssl );
/*
* Swap transform_out and out_ctr with the alternative ones
*/
-static void ssl_swap_epochs( mbedtls_ssl_context *ssl )
+static int ssl_swap_epochs( mbedtls_ssl_context *ssl )
{
mbedtls_ssl_transform *tmp_transform;
unsigned char tmp_out_ctr[8];
+#if defined(MBEDTLS_SSL_HW_RECORD_ACCEL)
+ int ret;
+#endif /* MBEDTLS_SSL_HW_RECORD_ACCEL */
if( ssl->transform_out == ssl->handshake->alt_transform_out )
{
MBEDTLS_SSL_DEBUG_MSG( 3, ( "skip swap epochs" ) );
- return;
+ return( 0 );
}
MBEDTLS_SSL_DEBUG_MSG( 3, ( "swap epochs" ) );
@@ -2920,7 +2921,9 @@ static void ssl_swap_epochs( mbedtls_ssl_context *ssl )
return( MBEDTLS_ERR_SSL_HW_ACCEL_FAILED );
}
}
-#endif
+#endif /* MBEDTLS_SSL_HW_RECORD_ACCEL */
+
+ return( 0 );
}
/*
@@ -2957,7 +2960,8 @@ int mbedtls_ssl_flight_transmit( mbedtls_ssl_context *ssl )
ssl->handshake->cur_msg = ssl->handshake->flight;
ssl->handshake->cur_msg_p = ssl->handshake->flight->p + 12;
- ssl_swap_epochs( ssl );
+ if( ( ret = ssl_swap_epochs( ssl ) ) != 0 )
+ return( ret );
ssl->handshake->retransmit_state = MBEDTLS_SSL_RETRANS_SENDING;
}
@@ -2980,7 +2984,8 @@ int mbedtls_ssl_flight_transmit( mbedtls_ssl_context *ssl )
if( is_finished && ssl->handshake->cur_msg_p == ( cur->p + 12 ) )
{
MBEDTLS_SSL_DEBUG_MSG( 2, ( "swap epochs to send finished message" ) );
- ssl_swap_epochs( ssl );
+ if( ( ret = ssl_swap_epochs( ssl ) ) != 0 )
+ return( ret );
}
ret = ssl_get_remaining_payload_in_datagram( ssl );
@@ -3017,7 +3022,10 @@ int mbedtls_ssl_flight_transmit( mbedtls_ssl_context *ssl )
if( ( max_frag_len < 12 ) || ( max_frag_len == 12 && hs_len != 0 ) )
{
if( is_finished )
- ssl_swap_epochs( ssl );
+ {
+ if( ( ret = ssl_swap_epochs( ssl ) ) != 0 )
+ return( ret );
+ }
if( ( ret = mbedtls_ssl_flush_output( ssl ) ) != 0 )
return( ret );
@@ -3997,17 +4005,23 @@ static int ssl_handle_possible_reconnect( mbedtls_ssl_context *ssl )
if( ret == MBEDTLS_ERR_SSL_HELLO_VERIFY_REQUIRED )
{
+ int send_ret;
+ MBEDTLS_SSL_DEBUG_MSG( 1, ( "sending HelloVerifyRequest" ) );
+ MBEDTLS_SSL_DEBUG_BUF( 4, "output record sent to network",
+ ssl->out_buf, len );
/* Don't check write errors as we can't do anything here.
* If the error is permanent we'll catch it later,
* if it's not, then hopefully it'll work next time. */
- (void) ssl->f_send( ssl->p_bio, ssl->out_buf, len );
+ send_ret = ssl->f_send( ssl->p_bio, ssl->out_buf, len );
+ MBEDTLS_SSL_DEBUG_RET( 2, "ssl->f_send", send_ret );
+ (void) send_ret;
return( MBEDTLS_ERR_SSL_HELLO_VERIFY_REQUIRED );
}
if( ret == 0 )
{
- /* Got a valid cookie, partially reset context */
+ MBEDTLS_SSL_DEBUG_MSG( 1, ( "cookie is valid, resetting context" ) );
if( ( ret = ssl_session_reset_int( ssl, 1 ) ) != 0 )
{
MBEDTLS_SSL_DEBUG_RET( 1, "reset", ret );
diff --git a/thirdparty/mbedtls/library/x509.c b/thirdparty/mbedtls/library/x509.c
index 2e0b0e8f6c..4d25303206 100644
--- a/thirdparty/mbedtls/library/x509.c
+++ b/thirdparty/mbedtls/library/x509.c
@@ -1063,7 +1063,7 @@ cleanup:
mbedtls_x509_crt_free( &clicert );
#else
((void) verbose);
-#endif /* MBEDTLS_CERTS_C && MBEDTLS_SHA1_C */
+#endif /* MBEDTLS_CERTS_C && MBEDTLS_SHA256_C */
return( ret );
}