summaryrefslogtreecommitdiff
path: root/thirdparty/thekla_atlas/nvmath/Solver.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/thekla_atlas/nvmath/Solver.cpp')
-rw-r--r--thirdparty/thekla_atlas/nvmath/Solver.cpp744
1 files changed, 744 insertions, 0 deletions
diff --git a/thirdparty/thekla_atlas/nvmath/Solver.cpp b/thirdparty/thekla_atlas/nvmath/Solver.cpp
new file mode 100644
index 0000000000..191793ee29
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Solver.cpp
@@ -0,0 +1,744 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "Solver.h"
+#include "Sparse.h"
+
+#include "nvcore/Array.inl"
+
+using namespace nv;
+
+namespace
+{
+ class Preconditioner
+ {
+ public:
+ // Virtual dtor.
+ virtual ~Preconditioner() { }
+
+ // Apply preconditioning step.
+ virtual void apply(const FullVector & x, FullVector & y) const = 0;
+ };
+
+
+ // Jacobi preconditioner.
+ class JacobiPreconditioner : public Preconditioner
+ {
+ public:
+
+ JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
+ {
+ nvCheck(M.isSquare());
+
+ for(uint x = 0; x < M.width(); x++)
+ {
+ float elem = M.getCoefficient(x, x);
+ //nvDebugCheck( elem != 0.0f ); // This can be zero in the presence of zero area triangles.
+
+ if (symmetric)
+ {
+ m_inverseDiagonal[x] = (elem != 0) ? 1.0f / sqrtf(fabsf(elem)) : 1.0f;
+ }
+ else
+ {
+ m_inverseDiagonal[x] = (elem != 0) ? 1.0f / elem : 1.0f;
+ }
+ }
+ }
+
+ void apply(const FullVector & x, FullVector & y) const
+ {
+ nvDebugCheck(x.dimension() == m_inverseDiagonal.dimension());
+ nvDebugCheck(y.dimension() == m_inverseDiagonal.dimension());
+
+ // @@ Wrap vector component-wise product into a separate function.
+ const uint D = x.dimension();
+ for (uint i = 0; i < D; i++)
+ {
+ y[i] = m_inverseDiagonal[i] * x[i];
+ }
+ }
+
+ private:
+
+ FullVector m_inverseDiagonal;
+
+ };
+
+} // namespace
+
+
+static bool ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
+static bool ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
+
+
+// Solve the symmetric system: At·A·x = At·b
+bool nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
+{
+ nvDebugCheck(A.width() == x.dimension());
+ nvDebugCheck(A.height() == b.dimension());
+ nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
+
+ const uint D = A.width();
+
+ SparseMatrix At(A.height(), A.width());
+ transpose(A, At);
+
+ FullVector Atb(D);
+ //mult(Transposed, A, b, Atb);
+ mult(At, b, Atb);
+
+ SparseMatrix AtA(D);
+ //mult(Transposed, A, NoTransposed, A, AtA);
+ mult(At, A, AtA);
+
+ return SymmetricSolver(AtA, Atb, x, epsilon);
+}
+
+
+// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
+bool nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
+{
+ nvDebugCheck(A.width() == x.dimension());
+ nvDebugCheck(A.height() == b.dimension());
+ nvDebugCheck(A.height() >= A.width() - lockedCount);
+
+ // @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
+
+ const uint D = A.width() - lockedCount;
+ nvDebugCheck(D > 0);
+
+ // Compute: b - Al * xl
+ FullVector b_Alxl(b);
+
+ for (uint y = 0; y < A.height(); y++)
+ {
+ const uint count = A.getRow(y).count();
+ for (uint e = 0; e < count; e++)
+ {
+ uint column = A.getRow(y)[e].x;
+
+ bool isFree = true;
+ for (uint i = 0; i < lockedCount; i++)
+ {
+ isFree &= (lockedParameters[i] != column);
+ }
+
+ if (!isFree)
+ {
+ b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
+ }
+ }
+ }
+
+ // Remove locked columns from A.
+ SparseMatrix Af(D, A.height());
+
+ for (uint y = 0; y < A.height(); y++)
+ {
+ const uint count = A.getRow(y).count();
+ for (uint e = 0; e < count; e++)
+ {
+ uint column = A.getRow(y)[e].x;
+ uint ix = column;
+
+ bool isFree = true;
+ for (uint i = 0; i < lockedCount; i++)
+ {
+ isFree &= (lockedParameters[i] != column);
+ if (column > lockedParameters[i]) ix--; // shift columns
+ }
+
+ if (isFree)
+ {
+ Af.setCoefficient(ix, y, A.getRow(y)[e].v);
+ }
+ }
+ }
+
+ // Remove elements from x
+ FullVector xf(D);
+
+ for (uint i = 0, j = 0; i < A.width(); i++)
+ {
+ bool isFree = true;
+ for (uint l = 0; l < lockedCount; l++)
+ {
+ isFree &= (lockedParameters[l] != i);
+ }
+
+ if (isFree)
+ {
+ xf[j++] = x[i];
+ }
+ }
+
+ // Solve reduced system.
+ bool result = LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
+
+ // Copy results back to x.
+ for (uint i = 0, j = 0; i < A.width(); i++)
+ {
+ bool isFree = true;
+ for (uint l = 0; l < lockedCount; l++)
+ {
+ isFree &= (lockedParameters[l] != i);
+ }
+
+ if (isFree)
+ {
+ x[i] = xf[j++];
+ }
+ }
+
+ return result;
+}
+
+
+bool nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
+{
+ nvDebugCheck(A.height() == A.width());
+ nvDebugCheck(A.height() == b.dimension());
+ nvDebugCheck(b.dimension() == x.dimension());
+
+ JacobiPreconditioner jacobi(A, true);
+ return ConjugateGradientSolver(jacobi, A, b, x, epsilon);
+
+ //return ConjugateGradientSolver(A, b, x, epsilon);
+}
+
+
+/**
+* Compute the solution of the sparse linear system Ab=x using the Conjugate
+* Gradient method.
+*
+* Solving sparse linear systems:
+* (1) A·x = b
+*
+* The conjugate gradient algorithm solves (1) only in the case that A is
+* symmetric and positive definite. It is based on the idea of minimizing the
+* function
+*
+* (2) f(x) = 1/2·x·A·x - b·x
+*
+* This function is minimized when its gradient
+*
+* (3) df = A·x - b
+*
+* is zero, which is equivalent to (1). The minimization is carried out by
+* generating a succession of search directions p.k and improved minimizers x.k.
+* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k·p.k),
+* and x.k+1 is set equal to the new point x.k + alfa.k·p.k. The p.k and x.k are
+* built up in such a way that x.k+1 is also the minimizer of f over the whole
+* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N
+* iterations you arrive at the minimizer over the entire vector space, i.e., the
+* solution to (1).
+*
+* For a really good explanation of the method see:
+*
+* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
+* Jonhathan Richard Shewchuk.
+*
+**/
+/*static*/ bool ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
+{
+ nvDebugCheck( A.isSquare() );
+ nvDebugCheck( A.width() == b.dimension() );
+ nvDebugCheck( A.width() == x.dimension() );
+
+ int i = 0;
+ const int D = A.width();
+ const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
+
+ FullVector r(D); // residual
+ FullVector p(D); // search direction
+ FullVector q(D); //
+ float delta_0;
+ float delta_old;
+ float delta_new;
+ float alpha;
+ float beta;
+
+ // r = b - A·x;
+ copy(b, r);
+ sgemv(-1, A, x, 1, r);
+
+ // p = r;
+ copy(r, p);
+
+ delta_new = dot( r, r );
+ delta_0 = delta_new;
+
+ while (i < i_max && delta_new > epsilon*epsilon*delta_0)
+ {
+ i++;
+
+ // q = A·p
+ mult(A, p, q);
+
+ // alpha = delta_new / p·q
+ alpha = delta_new / dot( p, q );
+
+ // x = alfa·p + x
+ saxpy(alpha, p, x);
+
+ if ((i & 31) == 0) // recompute r after 32 steps
+ {
+ // r = b - A·x
+ copy(b, r);
+ sgemv(-1, A, x, 1, r);
+ }
+ else
+ {
+ // r = r - alpha·q
+ saxpy(-alpha, q, r);
+ }
+
+ delta_old = delta_new;
+ delta_new = dot( r, r );
+
+ beta = delta_new / delta_old;
+
+ // p = beta·p + r
+ scal(beta, p);
+ saxpy(1, r, p);
+ }
+
+ return delta_new <= epsilon*epsilon*delta_0;
+}
+
+
+// Conjugate gradient with preconditioner.
+/*static*/ bool ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
+{
+ nvDebugCheck( A.isSquare() );
+ nvDebugCheck( A.width() == b.dimension() );
+ nvDebugCheck( A.width() == x.dimension() );
+
+ int i = 0;
+ const int D = A.width();
+ const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
+
+ FullVector r(D); // residual
+ FullVector p(D); // search direction
+ FullVector q(D); //
+ FullVector s(D); // preconditioned
+ float delta_0;
+ float delta_old;
+ float delta_new;
+ float alpha;
+ float beta;
+
+ // r = b - A·x
+ copy(b, r);
+ sgemv(-1, A, x, 1, r);
+
+
+ // p = M^-1 · r
+ preconditioner.apply(r, p);
+ //copy(r, p);
+
+
+ delta_new = dot(r, p);
+ delta_0 = delta_new;
+
+ while (i < i_max && delta_new > epsilon*epsilon*delta_0)
+ {
+ i++;
+
+ // q = A·p
+ mult(A, p, q);
+
+ // alpha = delta_new / p·q
+ alpha = delta_new / dot(p, q);
+
+ // x = alfa·p + x
+ saxpy(alpha, p, x);
+
+ if ((i & 31) == 0) // recompute r after 32 steps
+ {
+ // r = b - A·x
+ copy(b, r);
+ sgemv(-1, A, x, 1, r);
+ }
+ else
+ {
+ // r = r - alfa·q
+ saxpy(-alpha, q, r);
+ }
+
+ // s = M^-1 · r
+ preconditioner.apply(r, s);
+ //copy(r, s);
+
+ delta_old = delta_new;
+ delta_new = dot( r, s );
+
+ beta = delta_new / delta_old;
+
+ // p = s + beta·p
+ scal(beta, p);
+ saxpy(1, s, p);
+ }
+
+ return delta_new <= epsilon*epsilon*delta_0;
+}
+
+
+#if 0 // Nonsymmetric solvers
+
+/** Bi-conjugate gradient method. */
+MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
+ piDebugCheck( A.IsSquare() );
+ piDebugCheck( A.Width() == b.Dim() );
+ piDebugCheck( A.Width() == x.Dim() );
+
+ int i = 0;
+ const int D = A.Width();
+ const int i_max = 4 * D;
+
+ float resid;
+ float rho_1 = 0;
+ float rho_2 = 0;
+ float alpha;
+ float beta;
+
+ DenseVector r(D);
+ DenseVector rtilde(D);
+ DenseVector p(D);
+ DenseVector ptilde(D);
+ DenseVector q(D);
+ DenseVector qtilde(D);
+ DenseVector tmp(D); // temporal vector.
+
+ // r = b - A·x;
+ A.Product( x, tmp );
+ r.Sub( b, tmp );
+
+ // rtilde = r
+ rtilde.Set( r );
+
+ // p = r;
+ p.Set( r );
+
+ // ptilde = rtilde
+ ptilde.Set( rtilde );
+
+
+
+ float normb = b.Norm();
+ if( normb == 0.0 ) normb = 1;
+
+ // test convergence
+ resid = r.Norm() / normb;
+ if( resid < epsilon ) {
+ // method converges?
+ return 0;
+ }
+
+
+ while( i < i_max ) {
+
+ i++;
+
+ rho_1 = DenseVectorDotProduct( r, rtilde );
+
+ if( rho_1 == 0 ) {
+ // method fails.
+ return -i;
+ }
+
+ if (i == 1) {
+ p.Set( r );
+ ptilde.Set( rtilde );
+ }
+ else {
+ beta = rho_1 / rho_2;
+
+ // p = r + beta * p;
+ p.Mad( r, p, beta );
+
+ // ptilde = ztilde + beta * ptilde;
+ ptilde.Mad( rtilde, ptilde, beta );
+ }
+
+ // q = A * p;
+ A.Product( p, q );
+
+ // qtilde = A^t * ptilde;
+ A.TransProduct( ptilde, qtilde );
+
+ alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
+
+ // x += alpha * p;
+ x.Mad( x, p, alpha );
+
+ // r -= alpha * q;
+ r.Mad( r, q, -alpha );
+
+ // rtilde -= alpha * qtilde;
+ rtilde.Mad( rtilde, qtilde, -alpha );
+
+ rho_2 = rho_1;
+
+ // test convergence
+ resid = r.Norm() / normb;
+ if( resid < epsilon ) {
+ // method converges
+ return i;
+ }
+ }
+
+ return i;
+}
+
+
+/** Bi-conjugate gradient stabilized method. */
+int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
+ piDebugCheck( A.IsSquare() );
+ piDebugCheck( A.Width() == b.Dim() );
+ piDebugCheck( A.Width() == x.Dim() );
+
+ int i = 0;
+ const int D = A.Width();
+ const int i_max = 2 * D;
+
+
+ float resid;
+ float rho_1 = 0;
+ float rho_2 = 0;
+ float alpha = 0;
+ float beta = 0;
+ float omega = 0;
+
+ DenseVector p(D);
+ DenseVector phat(D);
+ DenseVector s(D);
+ DenseVector shat(D);
+ DenseVector t(D);
+ DenseVector v(D);
+
+ DenseVector r(D);
+ DenseVector rtilde(D);
+
+ DenseVector tmp(D);
+
+ // r = b - A·x;
+ A.Product( x, tmp );
+ r.Sub( b, tmp );
+
+ // rtilde = r
+ rtilde.Set( r );
+
+
+ float normb = b.Norm();
+ if( normb == 0.0 ) normb = 1;
+
+ // test convergence
+ resid = r.Norm() / normb;
+ if( resid < epsilon ) {
+ // method converges?
+ return 0;
+ }
+
+
+ while( i<i_max ) {
+
+ i++;
+
+ rho_1 = DenseVectorDotProduct( rtilde, r );
+ if( rho_1 == 0 ) {
+ // method fails
+ return -i;
+ }
+
+
+ if( i == 1 ) {
+ p.Set( r );
+ }
+ else {
+ beta = (rho_1 / rho_2) * (alpha / omega);
+
+ // p = r + beta * (p - omega * v);
+ p.Mad( p, v, -omega );
+ p.Mad( r, p, beta );
+ }
+
+ //phat = M.solve(p);
+ phat.Set( p );
+ //Precond( &phat, p );
+
+ //v = A * phat;
+ A.Product( phat, v );
+
+ alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
+
+ // s = r - alpha * v;
+ s.Mad( r, v, -alpha );
+
+
+ resid = s.Norm() / normb;
+ if( resid < epsilon ) {
+ // x += alpha * phat;
+ x.Mad( x, phat, alpha );
+ return i;
+ }
+
+ //shat = M.solve(s);
+ shat.Set( s );
+ //Precond( &shat, s );
+
+ //t = A * shat;
+ A.Product( shat, t );
+
+ omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
+
+ // x += alpha * phat + omega * shat;
+ x.Mad( x, shat, omega );
+ x.Mad( x, phat, alpha );
+
+ //r = s - omega * t;
+ r.Mad( s, t, -omega );
+
+ rho_2 = rho_1;
+
+ resid = r.Norm() / normb;
+ if( resid < epsilon ) {
+ return i;
+ }
+
+ if( omega == 0 ) {
+ return -i; // ???
+ }
+ }
+
+ return i;
+}
+
+
+/** Bi-conjugate gradient stabilized method. */
+int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
+ piDebugCheck( A.IsSquare() );
+ piDebugCheck( A.Width() == b.Dim() );
+ piDebugCheck( A.Width() == x.Dim() );
+
+ int i = 0;
+ const int D = A.Width();
+ const int i_max = D;
+ // const int i_max = 1000;
+
+
+ float resid;
+ float rho_1 = 0;
+ float rho_2 = 0;
+ float alpha = 0;
+ float beta = 0;
+ float omega = 0;
+
+ DenseVector p(D);
+ DenseVector phat(D);
+ DenseVector s(D);
+ DenseVector shat(D);
+ DenseVector t(D);
+ DenseVector v(D);
+
+ DenseVector r(D);
+ DenseVector rtilde(D);
+
+ DenseVector tmp(D);
+
+ // r = b - A·x;
+ A.Product( x, tmp );
+ r.Sub( b, tmp );
+
+ // rtilde = r
+ rtilde.Set( r );
+
+
+ float normb = b.Norm();
+ if( normb == 0.0 ) normb = 1;
+
+ // test convergence
+ resid = r.Norm() / normb;
+ if( resid < epsilon ) {
+ // method converges?
+ return 0;
+ }
+
+
+ while( i<i_max ) {
+
+ i++;
+
+ rho_1 = DenseVectorDotProduct( rtilde, r );
+ if( rho_1 == 0 ) {
+ // method fails
+ return -i;
+ }
+
+
+ if( i == 1 ) {
+ p.Set( r );
+ }
+ else {
+ beta = (rho_1 / rho_2) * (alpha / omega);
+
+ // p = r + beta * (p - omega * v);
+ p.Mad( p, v, -omega );
+ p.Mad( r, p, beta );
+ }
+
+ //phat = M.solve(p);
+ //phat.Set( p );
+ M.Precond( &phat, p );
+
+ //v = A * phat;
+ A.Product( phat, v );
+
+ alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
+
+ // s = r - alpha * v;
+ s.Mad( r, v, -alpha );
+
+
+ resid = s.Norm() / normb;
+
+ //printf( "--- Iteration %d: residual = %f\n", i, resid );
+
+ if( resid < epsilon ) {
+ // x += alpha * phat;
+ x.Mad( x, phat, alpha );
+ return i;
+ }
+
+ //shat = M.solve(s);
+ //shat.Set( s );
+ M.Precond( &shat, s );
+
+ //t = A * shat;
+ A.Product( shat, t );
+
+ omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
+
+ // x += alpha * phat + omega * shat;
+ x.Mad( x, shat, omega );
+ x.Mad( x, phat, alpha );
+
+ //r = s - omega * t;
+ r.Mad( s, t, -omega );
+
+ rho_2 = rho_1;
+
+ resid = r.Norm() / normb;
+ if( resid < epsilon ) {
+ return i;
+ }
+
+ if( omega == 0 ) {
+ return -i; // ???
+ }
+ }
+
+ return i;
+}
+
+#endif