36 files changed, 0 insertions, 9338 deletions
diff --git a/thirdparty/thekla_atlas/nvmath/Basis.cpp b/thirdparty/thekla_atlas/nvmath/Basis.cpp
deleted file mode 100644
index 0824179633..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Basis.cpp
+++ /dev/null
@@ -1,270 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#include "Basis.h"
-
-using namespace nv;
-
-
-/// Normalize basis vectors.
-void Basis::normalize(float epsilon /*= NV_EPSILON*/)
-{
-    normal = ::normalizeSafe(normal, Vector3(0.0f), epsilon);
-    tangent = ::normalizeSafe(tangent, Vector3(0.0f), epsilon);
-    bitangent = ::normalizeSafe(bitangent, Vector3(0.0f), epsilon);
-}
-
-
-/// Gram-Schmidt orthogonalization.
-/// @note Works only if the vectors are close to orthogonal.
-void Basis::orthonormalize(float epsilon /*= NV_EPSILON*/)
-{
-    // N' = |N|
-    // T' = |T - (N' dot T) N'|
-    // B' = |B - (N' dot B) N' - (T' dot B) T'|
-
-    normal = ::normalize(normal, epsilon);
-
-    tangent -= normal * dot(normal, tangent);
-    tangent = ::normalize(tangent, epsilon);
-
-    bitangent -= normal * dot(normal, bitangent);
-    bitangent -= tangent * dot(tangent, bitangent);
-    bitangent = ::normalize(bitangent, epsilon);
-}
-
-
-
-
-/// Robust orthonormalization. 
-/// Returns an orthonormal basis even when the original is degenerate.
-void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
-{
-    // Normalize all vectors.
-    normalize(epsilon);
-
-    if (lengthSquared(normal) < epsilon*epsilon)
-    {
-        // Build normal from tangent and bitangent.
-        normal = cross(tangent, bitangent);
-
-        if (lengthSquared(normal) < epsilon*epsilon)
-        {
-            // Arbitrary basis.
-            tangent   = Vector3(1, 0, 0);
-            bitangent = Vector3(0, 1, 0);
-            normal    = Vector3(0, 0, 1);
-            return;
-        }
-
-        normal = nv::normalize(normal, epsilon);
-    }
-
-    // Project tangents to normal plane.
-    tangent -= normal * dot(normal, tangent);
-    bitangent -= normal * dot(normal, bitangent);
-
-    if (lengthSquared(tangent) < epsilon*epsilon)
-    {
-        if (lengthSquared(bitangent) < epsilon*epsilon)
-        {
-            // Arbitrary basis.
-            buildFrameForDirection(normal);
-        }
-        else
-        {
-            // Build tangent from bitangent.
-            bitangent = nv::normalize(bitangent, epsilon);
-
-            tangent = cross(bitangent, normal);
-            nvDebugCheck(isNormalized(tangent, epsilon));
-        }
-    }
-    else
-    {
-        tangent = nv::normalize(tangent, epsilon);
-#if 0
-        bitangent -= tangent * dot(tangent, bitangent);
-
-        if (lengthSquared(bitangent) < epsilon*epsilon)
-        {
-            bitangent = cross(tangent, normal);
-            nvDebugCheck(isNormalized(bitangent, epsilon));
-        }
-        else
-        {
-            bitangent = nv::normalize(bitangent, epsilon);
-        }
-#else
-        if (lengthSquared(bitangent) < epsilon*epsilon)
-        {
-            // Build bitangent from tangent.
-            bitangent = cross(tangent, normal);
-            nvDebugCheck(isNormalized(bitangent, epsilon));
-        }
-        else
-        {
-            bitangent = nv::normalize(bitangent, epsilon);
-
-            // At this point tangent and bitangent are orthogonal to normal, but we don't know whether their orientation.
-            
-            Vector3 bisector;
-            if (lengthSquared(tangent + bitangent) < epsilon*epsilon)
-            {
-                bisector = tangent;
-            }
-            else
-            {
-                bisector = nv::normalize(tangent + bitangent);
-            }
-            Vector3 axis = nv::normalize(cross(bisector, normal));
-
-            //nvDebugCheck(isNormalized(axis, epsilon));
-            nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon));
-
-            if (dot(axis, tangent) > 0)
-            {
-                tangent = bisector + axis;
-                bitangent = bisector - axis;
-            }
-            else
-            {
-                tangent = bisector - axis;
-                bitangent = bisector + axis;
-            }
-
-            // Make sure the resulting tangents are still perpendicular to the normal.
-            tangent -= normal * dot(normal, tangent);
-            bitangent -= normal * dot(normal, bitangent);
-
-            // Double check.
-            nvDebugCheck(equal(dot(normal, tangent), 0.0f, epsilon));
-            nvDebugCheck(equal(dot(normal, bitangent), 0.0f, epsilon));
-
-            // Normalize.
-            tangent = nv::normalize(tangent);
-            bitangent = nv::normalize(bitangent);
-
-            // If tangent and bitangent are not orthogonal, then derive bitangent from tangent, just in case...
-            if (!equal(dot(tangent, bitangent), 0.0f, epsilon)) {
-                bitangent = cross(tangent, normal);
-                bitangent = nv::normalize(bitangent);
-            }
-        }
-#endif
-    }
-
-    /*// Check vector lengths.
-    if (!isNormalized(normal, epsilon))
-    {
-    nvDebug("%f %f %f\n", normal.x, normal.y, normal.z);
-    nvDebug("%f %f %f\n", tangent.x, tangent.y, tangent.z);
-    nvDebug("%f %f %f\n", bitangent.x, bitangent.y, bitangent.z);
-    }*/
-
-    nvDebugCheck(isNormalized(normal, epsilon));
-    nvDebugCheck(isNormalized(tangent, epsilon));
-    nvDebugCheck(isNormalized(bitangent, epsilon));
-
-    // Check vector angles.
-    nvDebugCheck(equal(dot(normal, tangent), 0.0f, epsilon));
-    nvDebugCheck(equal(dot(normal, bitangent), 0.0f, epsilon));
-    nvDebugCheck(equal(dot(tangent, bitangent), 0.0f, epsilon));
-
-    // Check vector orientation.
-    const float det = dot(cross(normal, tangent), bitangent);
-    nvDebugCheck(equal(det, 1.0f, epsilon) || equal(det, -1.0f, epsilon));
-}
-
-
-/// Build an arbitrary frame for the given direction.
-void Basis::buildFrameForDirection(Vector3::Arg d, float angle/*= 0*/)
-{
-    nvCheck(isNormalized(d));
-    normal = d;
-
-    // Choose minimum axis.
-    if (fabsf(normal.x) < fabsf(normal.y) && fabsf(normal.x) < fabsf(normal.z))
-    {
-        tangent = Vector3(1, 0, 0);
-    }
-    else if (fabsf(normal.y) < fabsf(normal.z))
-    {
-        tangent = Vector3(0, 1, 0);
-    }
-    else
-    {
-        tangent = Vector3(0, 0, 1);
-    }
-
-    // Ortogonalize
-    tangent -= normal * dot(normal, tangent);
-    tangent = ::normalize(tangent);
-
-    bitangent = cross(normal, tangent);
-
-    // Rotate frame around normal according to angle.
-    if (angle != 0.0f) {
-        float c = cosf(angle);
-        float s = sinf(angle);
-        Vector3 tmp = c * tangent - s * bitangent;
-        bitangent = s * tangent + c * bitangent;
-        tangent = tmp;
-    }
-}
-
-bool Basis::isValid() const
-{
-    if (equal(normal, Vector3(0.0f))) return false;
-    if (equal(tangent, Vector3(0.0f))) return false;
-    if (equal(bitangent, Vector3(0.0f))) return false;
-
-    if (equal(determinant(), 0.0f)) return false;
-
-    return true;
-}
-
-
-/// Transform by this basis. (From this basis to object space).
-Vector3 Basis::transform(Vector3::Arg v) const
-{
-    Vector3 o = tangent * v.x;
-    o += bitangent * v.y;
-    o += normal * v.z;
-    return o;
-}
-
-/// Transform by the transpose. (From object space to this basis).
-Vector3 Basis::transformT(Vector3::Arg v)
-{
-    return Vector3(dot(tangent, v), dot(bitangent, v), dot(normal, v));
-}
-
-/// Transform by the inverse. (From object space to this basis).
-/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned.
-Vector3 Basis::transformI(Vector3::Arg v) const
-{
-    const float det = determinant();
-    nvDebugCheck(!equal(det, 0.0f, 0.0f));
-
-    const float idet = 1.0f / det;
-
-    // Rows of the inverse matrix.
-    Vector3 r0(
-        (bitangent.y * normal.z - bitangent.z * normal.y),
-        -(bitangent.x * normal.z - bitangent.z * normal.x),
-        (bitangent.x * normal.y - bitangent.y * normal.x));
-
-    Vector3 r1(
-        -(tangent.y * normal.z - tangent.z * normal.y),
-        (tangent.x * normal.z - tangent.z * normal.x),
-        -(tangent.x * normal.y - tangent.y * normal.x));
-
-    Vector3 r2(
-        (tangent.y * bitangent.z - tangent.z * bitangent.y),
-        -(tangent.x * bitangent.z - tangent.z * bitangent.x),
-        (tangent.x * bitangent.y - tangent.y * bitangent.x));
-
-    return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet;
-}
-
-
diff --git a/thirdparty/thekla_atlas/nvmath/Basis.h b/thirdparty/thekla_atlas/nvmath/Basis.h
deleted file mode 100644
index e8146afdbe..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Basis.h
+++ /dev/null
@@ -1,82 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#pragma once
-#ifndef NV_MATH_BASIS_H
-#define NV_MATH_BASIS_H
-
-#include "nvmath.h"
-#include "Vector.inl"
-#include "Matrix.h"
-
-namespace nv
-{
-
-    /// Basis class to compute tangent space basis, ortogonalizations and to
-    /// transform vectors from one space to another.
-    class Basis
-    {
-    public:
-
-        /// Create a null basis.
-        Basis() : tangent(0, 0, 0), bitangent(0, 0, 0), normal(0, 0, 0) {}
-
-        /// Create a basis given three vectors.
-        Basis(Vector3::Arg n, Vector3::Arg t, Vector3::Arg b) : tangent(t), bitangent(b), normal(n) {}
-
-        /// Create a basis with the given tangent vectors and the handness.
-        Basis(Vector3::Arg n, Vector3::Arg t, float sign)
-        {
-            build(n, t, sign);
-        }
-
-        NVMATH_API void normalize(float epsilon = NV_EPSILON);
-        NVMATH_API void orthonormalize(float epsilon = NV_EPSILON);
-        NVMATH_API void robustOrthonormalize(float epsilon = NV_EPSILON);
-        NVMATH_API void buildFrameForDirection(Vector3::Arg d, float angle = 0);
-
-        /// Calculate the determinant [ F G N ] to obtain the handness of the basis. 
-        float handness() const
-        {
-            return determinant() > 0.0f ? 1.0f : -1.0f;
-        }
-
-        /// Build a basis from 2 vectors and a handness flag.
-        void build(Vector3::Arg n, Vector3::Arg t, float sign)
-        {
-            normal = n;
-            tangent = t;
-            bitangent = sign * cross(t, n);
-        }
-
-        /// Compute the determinant of this basis.
-        float determinant() const
-        {
-            return 
-                tangent.x * bitangent.y * normal.z - tangent.z * bitangent.y * normal.x +
-                tangent.y * bitangent.z * normal.x - tangent.y * bitangent.x * normal.z + 
-                tangent.z * bitangent.x * normal.y - tangent.x * bitangent.z * normal.y;
-        }
-
-        bool isValid() const;
-
-        // Get transform matrix for this basis.
-        NVMATH_API Matrix matrix() const;
-
-        // Transform by this basis. (From this basis to object space).
-        NVMATH_API Vector3 transform(Vector3::Arg v) const;
-
-        // Transform by the transpose. (From object space to this basis).
-        NVMATH_API Vector3 transformT(Vector3::Arg v);
-
-        // Transform by the inverse. (From object space to this basis).
-        NVMATH_API Vector3 transformI(Vector3::Arg v) const;
-
-
-        Vector3 tangent;
-        Vector3 bitangent;
-        Vector3 normal;
-    };
-
-} // nv namespace
-
-#endif // NV_MATH_BASIS_H
diff --git a/thirdparty/thekla_atlas/nvmath/Box.cpp b/thirdparty/thekla_atlas/nvmath/Box.cpp
deleted file mode 100644
index 8f2014a077..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Box.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#include "Box.h"
-#include "Box.inl"
-#include "Sphere.h"
-
-using namespace nv;
-
-
-
-
-// Clip the given segment against this box.
-bool Box::clipSegment(const Vector3 & origin, const Vector3 & dir, float * t_near, float * t_far) const {
-
-	// Avoid aliasing.
-	float tnear = *t_near;
-	float tfar = *t_far;
-
-	// clip ray segment to box
-	for (int i = 0; i < 3; i++)
-	{
-		const float pos = origin.component[i] + tfar * dir.component[i];
-		const float dt = tfar - tnear;
-
-		if (dir.component[i] < 0) {
-			
-			// clip end point
-			if (pos < minCorner.component[i]) {
-                tfar = tnear + dt * (origin.component[i] - minCorner.component[i]) / (origin.component[i] - pos);
-			}
-			
-			// clip start point
-			if (origin.component[i] > maxCorner.component[i]) {
-				tnear = tnear + dt * (origin.component[i] - maxCorner.component[i]) / (tfar * dir.component[i]);
-			}
-		}
-		else {
-
-			// clip end point
-			if (pos > maxCorner.component[i]) {
-				tfar = tnear + dt * (maxCorner.component[i] - origin.component[i]) / (pos - origin.component[i]);
-			}
-
-			// clip start point
-			if (origin.component[i] < minCorner.component[i]) {
-				tnear = tnear + dt * (minCorner.component[i] - origin.component[i]) / (tfar * dir.component[i]);
-			}
-		}
-
-		if (tnear > tfar) {
-			// Clipped away.
-			return false;
-		}
-	}
-
-	// Return result.
-	*t_near = tnear;
-	*t_far = tfar;
-	return true;
-}
-
-
-float nv::distanceSquared(const Box &box, const Vector3 &point) {
-    Vector3 closest;
-
-    if (point.x < box.minCorner.x) closest.x = box.minCorner.x;
-    else if (point.x > box.maxCorner.x) closest.x = box.maxCorner.x;
-    else closest.x = point.x;
-
-    if (point.y < box.minCorner.y) closest.y = box.minCorner.y;
-    else if (point.y > box.maxCorner.y) closest.y = box.maxCorner.y;
-    else closest.y = point.y;
-
-    if (point.z < box.minCorner.z) closest.z = box.minCorner.z;
-    else if (point.z > box.maxCorner.z) closest.z = box.maxCorner.z;
-    else closest.z = point.z;
-
-    return lengthSquared(point - closest);
-}
-
-bool nv::overlap(const Box &box, const Sphere &sphere) {
-    return distanceSquared(box, sphere.center) < sphere.radius * sphere.radius;
-}
-
-
-bool nv::intersect(const Box & box, const Vector3 & p, const Vector3 & id, float * t /*= NULL*/) {
-    // Precompute these in ray structure?
-    int sdx = (id.x < 0);
-    int sdy = (id.y < 0);
-    int sdz = (id.z < 0);
-
-    float tmin = (box.corner(  sdx).x - p.x) * id.x;
-    float tmax = (box.corner(1-sdx).x - p.x) * id.x;
-    float tymin = (box.corner(  sdy).y - p.y) * id.y;
-    float tymax = (box.corner(1-sdy).y - p.y) * id.y;
-
-    if ((tmin > tymax) || (tymin > tmax)) 
-        return false;
-
-    if (tymin > tmin) tmin = tymin;
-    if (tymax < tmax) tmax = tymax;
-
-    float tzmin = (box.corner(  sdz).z - p.z) * id.z;
-    float tzmax = (box.corner(1-sdz).z - p.z) * id.z;
-
-    if ((tmin > tzmax) || (tzmin > tmax)) 
-        return false;
-
-    if (tzmin > tmin) tmin = tzmin;
-    if (tzmax < tmax) tmax = tzmax;
-
-    if (tmax < 0) 
-        return false;
-
-    if (t != NULL) *t = tmin;
-
-    return true;
-}
-
diff --git a/thirdparty/thekla_atlas/nvmath/Box.h b/thirdparty/thekla_atlas/nvmath/Box.h
deleted file mode 100644
index 19b5f2a3a5..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Box.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_BOX_H
-#define NV_MATH_BOX_H
-
-#include "Vector.h"
-
-#include <float.h> // FLT_MAX
-
-namespace nv
-{
-    class Vector;
-    class Stream;
-    class Sphere;
-
-    // Axis Aligned Bounding Box.
-    class Box
-    {
-    public:
-
-        inline Box() {}
-        inline Box(const Box & b) : minCorner(b.minCorner), maxCorner(b.maxCorner) {}
-        inline Box(const Vector3 & mins, const Vector3 & maxs) : minCorner(mins), maxCorner(maxs) {}
-
-        Box & operator=(const Box & b);
-
-        operator const float * () const { return reinterpret_cast<const float *>(this); }
-
-        // Clear the bounds.
-        void clearBounds();
-
-        // min < max
-        bool isValid() const;
-
-        // Build a cube centered on center and with edge = 2*dist
-        void cube(const Vector3 & center, float dist);
-
-        // Build a box, given center and extents.
-        void setCenterExtents(const Vector3 & center, const Vector3 & extents);
-
-        // Get box center.
-        Vector3 center() const;
-
-        // Return extents of the box.
-        Vector3 extents() const;
-
-        // Return extents of the box.
-        float extents(uint axis) const;
-
-        // Add a point to this box.
-        void addPointToBounds(const Vector3 & p);
-
-        // Add a box to this box.
-        void addBoxToBounds(const Box & b);
-
-        // Add sphere to this box.
-        void addSphereToBounds(const Vector3 & p, float r);
-
-        // Translate box.
-        void translate(const Vector3 & v);
-
-        // Scale the box.
-        void scale(float s);
-
-        // Expand the box by a fixed amount.
-        void expand(float r);
-
-        // Get the area of the box.
-        float area() const;
- 
-        // Get the volume of the box.
-        float volume() const;
-
-        // Return true if the box contains the given point.
-        bool contains(const Vector3 & p) const;
-
-        // Split the given box in 8 octants and assign the ith one to this box.
-        void setOctant(const Box & box, const Vector3 & center, int i);
-
-
-        // Clip the given segment against this box.
-        bool clipSegment(const Vector3 & origin, const Vector3 & dir, float * t_near, float * t_far) const;
-
-
-        friend Stream & operator<< (Stream & s, Box & box);
-
-        const Vector3 & corner(int i) const { return (&minCorner)[i]; }
-
-        Vector3 minCorner;
-        Vector3 maxCorner;
-    };
-
-    float distanceSquared(const Box &box, const Vector3 &point);
-    bool overlap(const Box &box, const Sphere &sphere);
-
-    // p is ray origin, id is inverse ray direction.
-    bool intersect(const Box & box, const Vector3 & p, const Vector3 & id, float * t);
-
-} // nv namespace
-
-
-#endif // NV_MATH_BOX_H
diff --git a/thirdparty/thekla_atlas/nvmath/Box.inl b/thirdparty/thekla_atlas/nvmath/Box.inl
deleted file mode 100644
index dcfa70ff96..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Box.inl
+++ /dev/null
@@ -1,154 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_BOX_INL
-#define NV_MATH_BOX_INL
-
-#include "Box.h"
-#include "Vector.inl"
-
-#include <float.h> // FLT_MAX
-
-namespace nv
-{
-    // Default ctor.
-    //inline Box::Box() { };
-
-    // Copy ctor.
-    //inline Box::Box(const Box & b) : minCorner(b.minCorner), maxCorner(b.maxCorner) { }
-
-    // Init ctor.
-    //inline Box::Box(const Vector3 & mins, const Vector3 & maxs) : minCorner(mins), maxCorner(maxs) { }
-
-    // Assignment operator.
-    inline Box & Box::operator=(const Box & b) { minCorner = b.minCorner; maxCorner = b.maxCorner; return *this; }
-
-    // Clear the bounds.
-    inline void Box::clearBounds()
-    {
-        minCorner.set(FLT_MAX, FLT_MAX, FLT_MAX);
-        maxCorner.set(-FLT_MAX, -FLT_MAX, -FLT_MAX);
-    }
-
-    // min < max
-    inline bool Box::isValid() const
-    {
-        return minCorner.x <= maxCorner.x && minCorner.y <= maxCorner.y && minCorner.z <= maxCorner.z;
-    }
-
-    // Build a cube centered on center and with edge = 2*dist
-    inline void Box::cube(const Vector3 & center, float dist)
-    {
-        setCenterExtents(center, Vector3(dist));
-    }
-
-    // Build a box, given center and extents.
-    inline void Box::setCenterExtents(const Vector3 & center, const Vector3 & extents)
-    {
-        minCorner = center - extents;
-        maxCorner = center + extents;
-    }
-
-    // Get box center.
-    inline Vector3 Box::center() const
-    {
-        return (minCorner + maxCorner) * 0.5f;
-    }
-
-    // Return extents of the box.
-    inline Vector3 Box::extents() const
-    {
-        return (maxCorner - minCorner) * 0.5f;
-    }
-
-    // Return extents of the box.
-    inline float Box::extents(uint axis) const
-    {
-        nvDebugCheck(axis < 3);
-        if (axis == 0) return (maxCorner.x - minCorner.x) * 0.5f;
-        if (axis == 1) return (maxCorner.y - minCorner.y) * 0.5f;
-        if (axis == 2) return (maxCorner.z - minCorner.z) * 0.5f;
-        nvUnreachable();
-        return 0.0f;
-    }
-
-    // Add a point to this box.
-    inline void Box::addPointToBounds(const Vector3 & p)
-    {
-        minCorner = min(minCorner, p);
-        maxCorner = max(maxCorner, p);
-    }
-
-    // Add a box to this box.
-    inline void Box::addBoxToBounds(const Box & b)
-    {
-        minCorner = min(minCorner, b.minCorner);
-        maxCorner = max(maxCorner, b.maxCorner);
-    }
-
-    // Add sphere to this box.
-    inline void Box::addSphereToBounds(const Vector3 & p, float r) {
-        minCorner = min(minCorner, p - Vector3(r));
-        maxCorner = min(maxCorner, p + Vector3(r));
-    }
-
-    // Translate box.
-    inline void Box::translate(const Vector3 & v)
-    {
-        minCorner += v;
-        maxCorner += v;
-    }
-
-    // Scale the box.
-    inline void Box::scale(float s)
-    {
-        minCorner *= s;
-        maxCorner *= s;
-    }
-
-    // Expand the box by a fixed amount.
-    inline void Box::expand(float r) {
-        minCorner -= Vector3(r,r,r);
-        maxCorner += Vector3(r,r,r);
-    }
-
-    // Get the area of the box.
-    inline float Box::area() const
-    {
-        const Vector3 d = extents();
-        return 8.0f * (d.x*d.y + d.x*d.z + d.y*d.z);
-    }	
-
-    // Get the volume of the box.
-    inline float Box::volume() const
-    {
-        Vector3 d = extents();
-        return 8.0f * (d.x * d.y * d.z);
-    }
-
-    // Return true if the box contains the given point.
-    inline bool Box::contains(const Vector3 & p) const
-    {
-        return 
-            minCorner.x < p.x && minCorner.y < p.y && minCorner.z < p.z &&
-            maxCorner.x > p.x && maxCorner.y > p.y && maxCorner.z > p.z;
-    }
-
-    // Split the given box in 8 octants and assign the ith one to this box.
-    inline void Box::setOctant(const Box & box, const Vector3 & center, int i)
-    {
-        minCorner = box.minCorner;
-        maxCorner = box.maxCorner;
-
-        if (i & 4) minCorner.x = center.x;
-        else       maxCorner.x = center.x;
-        if (i & 2) minCorner.y = center.y;
-        else       maxCorner.y = center.y;
-        if (i & 1) minCorner.z = center.z;
-        else       maxCorner.z = center.z;
-    }
-
-} // nv namespace
-
-
-#endif // NV_MATH_BOX_INL
diff --git a/thirdparty/thekla_atlas/nvmath/Color.h b/thirdparty/thekla_atlas/nvmath/Color.h
deleted file mode 100644
index 5cdc374bd9..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Color.h
+++ /dev/null
@@ -1,150 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_COLOR_H
-#define NV_MATH_COLOR_H
-
-#include "nvmath.h"
-
-namespace nv
-{
-
-    /// 64 bit color stored as BGRA.
-    class NVMATH_CLASS Color64 
-    {
-    public:
-        Color64() { }
-        Color64(const Color64 & c) : u(c.u) { }
-        Color64(uint16 R, uint16 G, uint16 B, uint16 A) { setRGBA(R, G, B, A); }
-        explicit Color64(uint64 U) : u(U) { }
-
-        void setRGBA(uint16 R, uint16 G, uint16 B, uint16 A)
-        {
-            r = R;
-            g = G;
-            b = B;
-            a = A;
-        }
-
-        operator uint64 () const {
-            return u;
-        }
-
-        union {
-            struct {
-#if NV_LITTLE_ENDIAN
-                uint16 r, a, b, g;
-#else
-                uint16 a: 16;
-                uint16 r: 16;
-                uint16 g: 16;
-                uint16 b: 16;
-#endif
-            };
-            uint64 u;
-        };
-    };
-
-    /// 32 bit color stored as BGRA.
-    class NVMATH_CLASS Color32
-    {
-    public:
-        Color32() { }
-        Color32(const Color32 & c) : u(c.u) { }
-        Color32(uint8 R, uint8 G, uint8 B) { setRGBA(R, G, B, 0xFF); }
-        Color32(uint8 R, uint8 G, uint8 B, uint8 A) { setRGBA( R, G, B, A); }
-        //Color32(uint8 c[4]) { setRGBA(c[0], c[1], c[2], c[3]); }
-        //Color32(float R, float G, float B) { setRGBA(uint(R*255), uint(G*255), uint(B*255), 0xFF); }
-        //Color32(float R, float G, float B, float A) { setRGBA(uint(R*255), uint(G*255), uint(B*255), uint(A*255)); }
-        explicit Color32(uint32 U) : u(U) { }
-
-        void setRGBA(uint8 R, uint8 G, uint8 B, uint8 A)
-        {
-            r = R;
-            g = G;
-            b = B;
-            a = A;
-        }
-
-        void setBGRA(uint8 B, uint8 G, uint8 R, uint8 A = 0xFF)
-        {
-            r = R;
-            g = G;
-            b = B;
-            a = A;
-        }
-
-        operator uint32 () const {
-            return u;
-        }
-
-        union {
-            struct {
-#if NV_LITTLE_ENDIAN
-                uint8 b, g, r, a;
-#else
-                uint8 a: 8;
-                uint8 r: 8;
-                uint8 g: 8;
-                uint8 b: 8;
-#endif
-            };
-            uint8 component[4];
-            uint32 u;
-        };
-    };
-
-
-    /// 16 bit 565 BGR color.
-    class NVMATH_CLASS Color16
-    {
-    public:
-        Color16() { }
-        Color16(const Color16 & c) : u(c.u) { }
-        explicit Color16(uint16 U) : u(U) { }
-
-        union {
-            struct {
-#if NV_LITTLE_ENDIAN
-                uint16 b : 5;
-                uint16 g : 6;
-                uint16 r : 5;
-#else
-                uint16 r : 5;
-                uint16 g : 6;
-                uint16 b : 5;
-#endif
-            };
-            uint16 u;
-        };
-    };
-
-    /// 16 bit 4444 BGRA color.
-    class NVMATH_CLASS Color16_4444
-    {
-    public:
-        Color16_4444() { }
-        Color16_4444(const Color16_4444 & c) : u(c.u) { }
-        explicit Color16_4444(uint16 U) : u(U) { }
-
-        union {
-            struct {
-#if NV_LITTLE_ENDIAN
-                uint16 b : 4;
-                uint16 g : 4;
-                uint16 r : 4;
-                uint16 a : 4;
-#else
-                uint16 a : 4;
-                uint16 r : 4;
-                uint16 g : 4;
-                uint16 b : 4;
-#endif
-            };
-            uint16 u;
-        };
-    };
-
-} // nv namespace
-
-#endif // NV_MATH_COLOR_H
diff --git a/thirdparty/thekla_atlas/nvmath/ConvexHull.cpp b/thirdparty/thekla_atlas/nvmath/ConvexHull.cpp
deleted file mode 100644
index a4a95dace4..0000000000
--- a/thirdparty/thekla_atlas/nvmath/ConvexHull.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#include "ConvexHull.h"
-
-#include "Vector.inl"
-
-#include "nvcore/RadixSort.h"
-#include "nvcore/Array.inl"
-
-using namespace nv;
-
-inline static float triangleArea(Vector2::Arg v1, Vector2::Arg v2, Vector2::Arg v3)
-{
-    return 0.5f * (v3.x * v1.y + v1.x * v2.y + v2.x * v3.y - v2.x * v1.y - v3.x * v2.y - v1.x * v3.y);
-}
-
-
-// Compute the convex hull using Graham Scan.
-void nv::convexHull(const Array<Vector2> & input, Array<Vector2> & output, float epsilon/*=0*/)
-{
-    const uint inputCount = input.count();
-
-    Array<float> coords;
-    coords.resize(inputCount);
-
-    for (uint i = 0; i < inputCount; i++) {
-        coords[i] = input[i].x;
-    }
-
-    RadixSort radix;
-    radix.sort(coords);
-
-    const uint * ranks = radix.ranks();
-
-    Array<Vector2> top(inputCount);
-    Array<Vector2> bottom(inputCount);
-
-    Vector2 P = input[ranks[0]];
-    Vector2 Q = input[ranks[inputCount-1]];
-
-    float topy = max(P.y, Q.y);
-    float boty = min(P.y, Q.y);
-
-    for (uint i = 0; i < inputCount; i++) {
-        Vector2 p = input[ranks[i]];
-        if (p.y >= boty) top.append(p);
-    }
-
-    for (uint i = 0; i < inputCount; i++) {
-        Vector2 p = input[ranks[inputCount-1-i]];
-        if (p.y <= topy) bottom.append(p);
-    }
-
-    // Filter top list.
-    output.clear();
-    output.append(top[0]);
-    output.append(top[1]);
-
-    for (uint i = 2; i < top.count(); ) {
-        Vector2 a = output[output.count()-2];
-        Vector2 b = output[output.count()-1];
-        Vector2 c = top[i];
-
-        float area = triangleArea(a, b, c);
-
-        if (area >= -epsilon) {
-            output.popBack();
-        }
-
-        if (area < -epsilon || output.count() == 1) {
-            output.append(c);
-            i++;
-        }
-    }
-    
-    uint top_count = output.count();
-    output.append(bottom[1]);
-
-    // Filter bottom list.
-    for (uint i = 2; i < bottom.count(); ) {
-        Vector2 a = output[output.count()-2];
-        Vector2 b = output[output.count()-1];
-        Vector2 c = bottom[i];
-
-        float area = triangleArea(a, b, c);
-
-        if (area >= -epsilon) {
-            output.popBack();
-        }
-
-        if (area < -epsilon || output.count() == top_count) {
-            output.append(c);
-            i++;
-        }
-    }
-
-    // Remove duplicate element.
-    nvDebugCheck(output.front() == output.back());
-    output.popBack();
-}
-
-/*
-void testConvexHull() {
-
-    Array<Vector2> points;
-    points.append(Vector2(1.00, 1.00));
-    points.append(Vector2(0.00, 0.00));
-    points.append(Vector2(1.00, 1.00));
-    points.append(Vector2(1.00, -1.00));
-    points.append(Vector2(2.00, 5.00));
-    points.append(Vector2(-5.00, 3.00));
-    points.append(Vector2(-4.00, -3.00));
-    points.append(Vector2(7.00, -4.00));
-
-    Array<Vector2> hull;
-    convexHull(points, hull);
-
-}
-*/
-
diff --git a/thirdparty/thekla_atlas/nvmath/ConvexHull.h b/thirdparty/thekla_atlas/nvmath/ConvexHull.h
deleted file mode 100644
index 6c2db5d73f..0000000000
--- a/thirdparty/thekla_atlas/nvmath/ConvexHull.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#pragma once
-#ifndef NV_MATH_CONVEXHULL_H
-#define NV_MATH_CONVEXHULL_H
-
-#include "nvmath.h"
-#include "nvcore/Array.h"
-
-namespace nv {
-    class Vector2;
- 
-    void convexHull(const Array<Vector2> & input, Array<Vector2> & output, float epsilon = 0);
-
-} // namespace nv
-
-#endif // NV_MATH_CONVEXHULL_H
diff --git a/thirdparty/thekla_atlas/nvmath/Fitting.cpp b/thirdparty/thekla_atlas/nvmath/Fitting.cpp
deleted file mode 100644
index 6cd5cb0f32..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Fitting.cpp
+++ /dev/null
@@ -1,1205 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
-
-#include "Fitting.h"
-#include "Vector.inl"
-#include "Plane.inl"
-
-#include "nvcore/Array.inl"
-#include "nvcore/Utils.h" // max, swap
-
-#include <float.h> // FLT_MAX
-//#include <vector>
-#include <string.h>
-
-using namespace nv;
-
-// @@ Move to EigenSolver.h
-
-// @@ We should be able to do something cheaper...
-static Vector3 estimatePrincipalComponent(const float * __restrict matrix)
-{
-	const Vector3 row0(matrix[0], matrix[1], matrix[2]);
-	const Vector3 row1(matrix[1], matrix[3], matrix[4]);
-	const Vector3 row2(matrix[2], matrix[4], matrix[5]);
-
-	float r0 = lengthSquared(row0);
-	float r1 = lengthSquared(row1);
-	float r2 = lengthSquared(row2);
-
-	if (r0 > r1 && r0 > r2) return row0;
-	if (r1 > r2) return row1;
-	return row2;
-}
-
-
-static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix)
-{
-    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
-    {
-        return Vector3(0.0f);
-    }
-
-    Vector3 v = estimatePrincipalComponent(matrix);
-
-    const int NUM = 8;
-    for (int i = 0; i < NUM; i++)
-    {
-        float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
-        float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
-        float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
-
-        float norm = max(max(x, y), z);
-
-        v = Vector3(x, y, z) / norm;
-    }
-
-    return v;
-}
-
-
-Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points)
-{
-    Vector3 centroid(0.0f);
-
-    for (int i = 0; i < n; i++)
-    {
-        centroid += points[i];
-    }
-    centroid /= float(n);
-
-    return centroid;
-}
-
-Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
-{
-    Vector3 centroid(0.0f);
-    float total = 0.0f;
-
-    for (int i = 0; i < n; i++)
-    {
-        total += weights[i];
-        centroid += weights[i]*points[i];
-    }
-    centroid /= total;
-
-    return centroid;
-}
-
-Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points)
-{
-    Vector4 centroid(0.0f);
-
-    for (int i = 0; i < n; i++)
-    {
-        centroid += points[i];
-    }
-    centroid /= float(n);
-
-    return centroid;
-}
-
-Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric)
-{
-    Vector4 centroid(0.0f);
-    float total = 0.0f;
-
-    for (int i = 0; i < n; i++)
-    {
-        total += weights[i];
-        centroid += weights[i]*points[i];
-    }
-    centroid /= total;
-
-    return centroid;
-}
-
-
-
-Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance)
-{
-    // compute the centroid
-    Vector3 centroid = computeCentroid(n, points);
-
-    // compute covariance matrix
-    for (int i = 0; i < 6; i++)
-    {
-        covariance[i] = 0.0f;
-    }
-
-    for (int i = 0; i < n; i++)
-    {
-        Vector3 v = points[i] - centroid;
-
-        covariance[0] += v.x * v.x;
-        covariance[1] += v.x * v.y;
-        covariance[2] += v.x * v.z;
-        covariance[3] += v.y * v.y;
-        covariance[4] += v.y * v.z;
-        covariance[5] += v.z * v.z;
-    }
-
-    return centroid;
-}
-
-Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance)
-{
-    // compute the centroid
-    Vector3 centroid = computeCentroid(n, points, weights, metric);
-
-    // compute covariance matrix
-    for (int i = 0; i < 6; i++)
-    {
-        covariance[i] = 0.0f;
-    }
-
-    for (int i = 0; i < n; i++)
-    {
-        Vector3 a = (points[i] - centroid) * metric;
-        Vector3 b = weights[i]*a;
-
-        covariance[0] += a.x * b.x;
-        covariance[1] += a.x * b.y;
-        covariance[2] += a.x * b.z;
-        covariance[3] += a.y * b.y;
-        covariance[4] += a.y * b.z;
-        covariance[5] += a.z * b.z;
-    }
-
-    return centroid;
-}
-
-Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, float *__restrict covariance)
-{
-    // compute the centroid
-    Vector4 centroid = computeCentroid(n, points);
-
-    // compute covariance matrix
-    for (int i = 0; i < 10; i++)
-    {
-        covariance[i] = 0.0f;
-    }
-
-    for (int i = 0; i < n; i++)
-    {
-        Vector4 v = points[i] - centroid;
-
-        covariance[0] += v.x * v.x;
-        covariance[1] += v.x * v.y;
-        covariance[2] += v.x * v.z;
-        covariance[3] += v.x * v.w;
-
-		covariance[4] += v.y * v.y;
-        covariance[5] += v.y * v.z;
-        covariance[6] += v.y * v.w;
-
-		covariance[7] += v.z * v.z;
-		covariance[8] += v.z * v.w;
-
-		covariance[9] += v.w * v.w;
-	}
-
-    return centroid;
-}
-
-Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric, float *__restrict covariance)
-{
-    // compute the centroid
-    Vector4 centroid = computeCentroid(n, points, weights, metric);
-
-    // compute covariance matrix
-    for (int i = 0; i < 10; i++)
-    {
-        covariance[i] = 0.0f;
-    }
-
-    for (int i = 0; i < n; i++)
-    {
-        Vector4 a = (points[i] - centroid) * metric;
-        Vector4 b = weights[i]*a;
-
-        covariance[0] += a.x * b.x;
-        covariance[1] += a.x * b.y;
-        covariance[2] += a.x * b.z;
-        covariance[3] += a.x * b.w;
-
-		covariance[4] += a.y * b.y;
-        covariance[5] += a.y * b.z;
-        covariance[6] += a.y * b.w;
-
-		covariance[7] += a.z * b.z;
-		covariance[8] += a.z * b.w;
-
-		covariance[9] += a.w * b.w;
-    }
-
-    return centroid;
-}
-
-
-
-Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points)
-{
-    float matrix[6];
-    computeCovariance(n, points, matrix);
-
-    return firstEigenVector_PowerMethod(matrix);
-}
-
-Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
-{
-    float matrix[6];
-    computeCovariance(n, points, weights, metric, matrix);
-
-    return firstEigenVector_PowerMethod(matrix);
-}
-
-
-
-static inline Vector3 firstEigenVector_EigenSolver3(const float *__restrict matrix)
-{
-    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
-    {
-        return Vector3(0.0f);
-    }
-
-    float eigenValues[3];
-    Vector3 eigenVectors[3];
-	if (!nv::Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors))
-	{
-		return Vector3(0.0f);
-	}
-
-	return eigenVectors[0];
-}
-
-Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points)
-{
-    float matrix[6];
-    computeCovariance(n, points, matrix);
-
-    return firstEigenVector_EigenSolver3(matrix);
-}
-
-Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
-{
-    float matrix[6];
-    computeCovariance(n, points, weights, metric, matrix);
-
-    return firstEigenVector_EigenSolver3(matrix);
-}
-
-
-
-static inline Vector4 firstEigenVector_EigenSolver4(const float *__restrict matrix)
-{
-    if (matrix[0] == 0 && matrix[4] == 0 && matrix[7] == 0&& matrix[9] == 0)
-    {
-        return Vector4(0.0f);
-    }
-
-    float eigenValues[4];
-    Vector4 eigenVectors[4];
-	if (!nv::Fit::eigenSolveSymmetric4(matrix, eigenValues, eigenVectors))
-	{
-		return Vector4(0.0f);
-	}
-
-	return eigenVectors[0];
-}
-
-Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points)
-{
-    float matrix[10];
-    computeCovariance(n, points, matrix);
-
-    return firstEigenVector_EigenSolver4(matrix);
-}
-
-Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric)
-{
-    float matrix[10];
-    computeCovariance(n, points, weights, metric, matrix);
-
-    return firstEigenVector_EigenSolver4(matrix);
-}
-
-
-
-void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R);
-
-Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points)
-{
-	// Store the points in an n x n matrix
-    Array<float> Q; Q.resize(n*n, 0.0f);
-	for (int i = 0; i < n; ++i)
-	{
-		Q[i*n+0] = points[i].x;
-		Q[i*n+1] = points[i].y;
-		Q[i*n+2] = points[i].z;
-	}
-
-	// Alloc space for the SVD outputs
-    Array<float> diag; diag.resize(n, 0.0f);
-    Array<float> R; R.resize(n*n, 0.0f);
-
-	ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
-
-	// Get the principal component
-	return Vector3(R[0], R[1], R[2]);
-}
-
-Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points)
-{
-	// Store the points in an n x n matrix
-    Array<float> Q; Q.resize(n*n, 0.0f);
-	for (int i = 0; i < n; ++i)
-	{
-		Q[i*n+0] = points[i].x;
-		Q[i*n+1] = points[i].y;
-		Q[i*n+2] = points[i].z;
-		Q[i*n+3] = points[i].w;
-	}
-
-	// Alloc space for the SVD outputs
-    Array<float> diag; diag.resize(n, 0.0f);
-    Array<float> R; R.resize(n*n, 0.0f);
-
-	ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
-
-	// Get the principal component
-	return Vector4(R[0], R[1], R[2], R[3]);
-}
-
-
-
-Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points)
-{
-    // compute the centroid and covariance
-    float matrix[6];
-    Vector3 centroid = computeCovariance(n, points, matrix);
-
-    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
-    {
-        // If no plane defined, then return a horizontal plane.
-        return Plane(Vector3(0, 0, 1), centroid);
-    }
-
-    float eigenValues[3];
-    Vector3 eigenVectors[3];
-    if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
-        // If no plane defined, then return a horizontal plane.
-        return Plane(Vector3(0, 0, 1), centroid);
-    }
-
-    return Plane(eigenVectors[2], centroid);
-}
-
-bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON*/)
-{
-    // compute the centroid and covariance
-    float matrix[6];
-    computeCovariance(n, points, matrix);
-
-    float eigenValues[3];
-    Vector3 eigenVectors[3];
-    if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
-        return false;
-    }
-
-    return eigenValues[2] < epsilon;
-}
-
-
-
-// Tridiagonal solver from Charles Bloom. 
-// Householder transforms followed by QL decomposition. 
-// Seems to be based on the code from Numerical Recipes in C.
-
-static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd);
-static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd);
-
-bool nv::Fit::eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3])
-{
-    nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
-
-    float subd[3];
-    float diag[3];
-    float work[3][3];
-
-    work[0][0] = matrix[0];
-    work[0][1] = work[1][0] = matrix[1];
-    work[0][2] = work[2][0] = matrix[2];
-    work[1][1] = matrix[3];
-    work[1][2] = work[2][1] = matrix[4];
-    work[2][2] = matrix[5];
-
-    EigenSolver3_Tridiagonal(work, diag, subd);
-    if (!EigenSolver3_QLAlgorithm(work, diag, subd))
-    {
-        for (int i = 0; i < 3; i++) {
-            eigenValues[i] = 0;
-            eigenVectors[i] = Vector3(0);
-        }
-        return false;
-    }
-
-    for (int i = 0; i < 3; i++) {
-        eigenValues[i] = (float)diag[i];
-    }
-
-    // eigenvectors are the columns; make them the rows :
-
-    for (int i=0; i < 3; i++)
-    {
-        for (int j = 0; j < 3; j++)
-        {
-            eigenVectors[j].component[i] = (float) work[i][j];
-        }
-    }
-
-    // shuffle to sort by singular value :
-    if (eigenValues[2] > eigenValues[0] && eigenValues[2] > eigenValues[1])
-    {
-        swap(eigenValues[0], eigenValues[2]);
-        swap(eigenVectors[0], eigenVectors[2]);
-    }
-    if (eigenValues[1] > eigenValues[0])
-    {
-        swap(eigenValues[0], eigenValues[1]);
-        swap(eigenVectors[0], eigenVectors[1]);
-    }
-    if (eigenValues[2] > eigenValues[1])
-    {
-        swap(eigenValues[1], eigenValues[2]);
-        swap(eigenVectors[1], eigenVectors[2]);
-    }
-
-    nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2]);
-    nvDebugCheck(eigenValues[1] >= eigenValues[2]);
-
-    return true;
-}
-
-static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd)
-{
-    // Householder reduction T = Q^t M Q
-    //   Input:   
-    //     mat, symmetric 3x3 matrix M
-    //   Output:  
-    //     mat, orthogonal matrix Q
-    //     diag, diagonal entries of T
-    //     subd, subdiagonal entries of T (T is symmetric)
-    const float epsilon = 1e-08f;
-
-    float a = mat[0][0];
-    float b = mat[0][1];
-    float c = mat[0][2];
-    float d = mat[1][1];
-    float e = mat[1][2];
-    float f = mat[2][2];
-
-    diag[0] = a;
-    subd[2] = 0.f;
-    if (fabsf(c) >= epsilon)
-    {
-        const float ell = sqrtf(b*b+c*c);
-        b /= ell;
-        c /= ell;
-        const float q = 2*b*e+c*(f-d);
-        diag[1] = d+c*q;
-        diag[2] = f-c*q;
-        subd[0] = ell;
-        subd[1] = e-b*q;
-        mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0;
-        mat[1][0] = 0; mat[1][1] = b; mat[1][2] = c;
-        mat[2][0] = 0; mat[2][1] = c; mat[2][2] = -b;
-    }
-    else
-    {
-        diag[1] = d;
-        diag[2] = f;
-        subd[0] = b;
-        subd[1] = e;
-        mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0;
-        mat[1][0] = 0; mat[1][1] = 1; mat[1][2] = 0;
-        mat[2][0] = 0; mat[2][1] = 0; mat[2][2] = 1;
-    }
-}
-
-static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd)
-{
-    // QL iteration with implicit shifting to reduce matrix from tridiagonal
-    // to diagonal
-    const int maxiter = 32;
-
-    for (int ell = 0; ell < 3; ell++)
-    {
-        int iter;
-        for (iter = 0; iter < maxiter; iter++)
-        {
-            int m;
-            for (m = ell; m <= 1; m++)
-            {
-                float dd = fabsf(diag[m]) + fabsf(diag[m+1]);
-                if ( fabsf(subd[m]) + dd == dd )
-                    break;
-            }
-            if ( m == ell )
-                break;
-
-            float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
-            float r = sqrtf(g*g+1);
-            if ( g < 0 )
-                g = diag[m]-diag[ell]+subd[ell]/(g-r);
-            else
-                g = diag[m]-diag[ell]+subd[ell]/(g+r);
-            float s = 1, c = 1, p = 0;
-            for (int i = m-1; i >= ell; i--)
-            {
-                float f = s*subd[i], b = c*subd[i];
-                if ( fabsf(f) >= fabsf(g) )
-                {
-                    c = g/f;
-                    r = sqrtf(c*c+1);
-                    subd[i+1] = f*r;
-                    c *= (s = 1/r);
-                }
-                else
-                {
-                    s = f/g;
-                    r = sqrtf(s*s+1);
-                    subd[i+1] = g*r;
-                    s *= (c = 1/r);
-                }
-                g = diag[i+1]-p;
-                r = (diag[i]-g)*s+2*b*c;
-                p = s*r;
-                diag[i+1] = g+p;
-                g = c*r-b;
-
-                for (int k = 0; k < 3; k++)
-                {
-                    f = mat[k][i+1];
-                    mat[k][i+1] = s*mat[k][i]+c*f;
-                    mat[k][i] = c*mat[k][i]-s*f;
-                }
-            }
-            diag[ell] -= p;
-            subd[ell] = g;
-            subd[m] = 0;
-        }
-
-        if ( iter == maxiter )
-            // should not get here under normal circumstances
-            return false;
-    }
-
-    return true;
-}
-
-
-
-// Tridiagonal solver for 4x4 symmetric matrices.
-
-static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd);
-static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd);
-
-bool nv::Fit::eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4])
-{
-    nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
-
-    float subd[4];
-    float diag[4];
-    float work[4][4];
-
-    work[0][0] = matrix[0];
-    work[0][1] = work[1][0] = matrix[1];
-    work[0][2] = work[2][0] = matrix[2];
-    work[0][3] = work[3][0] = matrix[3];
-    work[1][1] = matrix[4];
-    work[1][2] = work[2][1] = matrix[5];
-    work[1][3] = work[3][1] = matrix[6];
-    work[2][2] = matrix[7];
-    work[2][3] = work[3][2] = matrix[8];
-    work[3][3] = matrix[9];
-
-    EigenSolver4_Tridiagonal(work, diag, subd);
-    if (!EigenSolver4_QLAlgorithm(work, diag, subd))
-    {
-        for (int i = 0; i < 4; i++) {
-            eigenValues[i] = 0;
-            eigenVectors[i] = Vector4(0);
-        }
-        return false;
-    }
-
-    for (int i = 0; i < 4; i++) {
-        eigenValues[i] = (float)diag[i];
-    }
-
-    // eigenvectors are the columns; make them the rows
-
-    for (int i = 0; i < 4; i++)
-    {
-        for (int j = 0; j < 4; j++)
-        {
-            eigenVectors[j].component[i] = (float) work[i][j];
-        }
-    }
-
-    // sort by singular value
-
-	for (int i = 0; i < 3; ++i)
-	{
-		for (int j = i+1; j < 4; ++j)
-		{
-			if (eigenValues[j] > eigenValues[i])
-			{
-				swap(eigenValues[i], eigenValues[j]);
-				swap(eigenVectors[i], eigenVectors[j]);
-			}
-		}
-	}
-
-    nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2] && eigenValues[0] >= eigenValues[3]);
-    nvDebugCheck(eigenValues[1] >= eigenValues[2] && eigenValues[1] >= eigenValues[3]);
-    nvDebugCheck(eigenValues[2] >= eigenValues[2]);
-
-    return true;
-}
-
-#include "nvmath/Matrix.inl"
-
-inline float signNonzero(float x)
-{
-	return (x >= 0.0f) ? 1.0f : -1.0f;
-}
-
-static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd)
-{
-    // Householder reduction T = Q^t M Q
-    //   Input:   
-    //     mat, symmetric 3x3 matrix M
-    //   Output:  
-    //     mat, orthogonal matrix Q
-    //     diag, diagonal entries of T
-    //     subd, subdiagonal entries of T (T is symmetric)
-
-	static const int n = 4;
-
-	// Set epsilon relative to size of elements in matrix
-	static const float relEpsilon = 1e-6f;
-	float maxElement = FLT_MAX;
-	for (int i = 0; i < n; ++i)
-		for (int j = 0; j < n; ++j)
-			maxElement = max(maxElement, fabsf(mat[i][j]));
-	float epsilon = relEpsilon * maxElement;
-
-	// Iterative algorithm, works for any size of matrix but might be slower than
-	// a closed-form solution for symmetric 4x4 matrices.  Based on this article:
-	// http://en.wikipedia.org/wiki/Householder_transformation#Tridiagonalization
-
-	Matrix A, Q(identity);
-	memcpy(&A, mat, sizeof(float)*n*n);
-
-	// We proceed from left to right, making the off-tridiagonal entries zero in
-	// one column of the matrix at a time.
-	for (int k = 0; k < n - 2; ++k)
-	{
-		float sum = 0.0f;
-		for (int j = k+1; j < n; ++j)
-			sum += A(j,k)*A(j,k);
-		float alpha = -signNonzero(A(k+1,k)) * sqrtf(sum);
-		float r = sqrtf(0.5f * (alpha*alpha - A(k+1,k)*alpha));
-
-		// If r is zero, skip this column - already in tridiagonal form
-		if (fabsf(r) < epsilon)
-			continue;
-
-		float v[n] = {};
-		v[k+1] = 0.5f * (A(k+1,k) - alpha) / r;
-		for (int j = k+2; j < n; ++j)
-			v[j] = 0.5f * A(j,k) / r;
-
-		Matrix P(identity);
-		for (int i = 0; i < n; ++i)
-			for (int j = 0; j < n; ++j)
-				P(i,j) -= 2.0f * v[i] * v[j];
-
-		A = mul(mul(P, A), P);
-		Q = mul(Q, P);
-	}
-
-	nvDebugCheck(fabsf(A(2,0)) < epsilon);
-	nvDebugCheck(fabsf(A(0,2)) < epsilon);
-	nvDebugCheck(fabsf(A(3,0)) < epsilon);
-	nvDebugCheck(fabsf(A(0,3)) < epsilon);
-	nvDebugCheck(fabsf(A(3,1)) < epsilon);
-	nvDebugCheck(fabsf(A(1,3)) < epsilon);
-
-	for (int i = 0; i < n; ++i)
-		diag[i] = A(i,i);
-	for (int i = 0; i < n - 1; ++i)
-		subd[i] = A(i+1,i);
-	subd[n-1] = 0.0f;
-
-	memcpy(mat, &Q, sizeof(float)*n*n);
-}
-
-static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd)
-{
-    // QL iteration with implicit shifting to reduce matrix from tridiagonal
-    // to diagonal
-    const int maxiter = 32;
-
-    for (int ell = 0; ell < 4; ell++)
-    {
-        int iter;
-        for (iter = 0; iter < maxiter; iter++)
-        {
-            int m;
-            for (m = ell; m < 3; m++)
-            {
-                float dd = fabsf(diag[m]) + fabsf(diag[m+1]);
-                if ( fabsf(subd[m]) + dd == dd )
-                    break;
-            }
-            if ( m == ell )
-                break;
-
-            float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
-            float r = sqrtf(g*g+1);
-            if ( g < 0 )
-                g = diag[m]-diag[ell]+subd[ell]/(g-r);
-            else
-                g = diag[m]-diag[ell]+subd[ell]/(g+r);
-            float s = 1, c = 1, p = 0;
-            for (int i = m-1; i >= ell; i--)
-            {
-                float f = s*subd[i], b = c*subd[i];
-                if ( fabsf(f) >= fabsf(g) )
-                {
-                    c = g/f;
-                    r = sqrtf(c*c+1);
-                    subd[i+1] = f*r;
-                    c *= (s = 1/r);
-                }
-                else
-                {
-                    s = f/g;
-                    r = sqrtf(s*s+1);
-                    subd[i+1] = g*r;
-                    s *= (c = 1/r);
-                }
-                g = diag[i+1]-p;
-                r = (diag[i]-g)*s+2*b*c;
-                p = s*r;
-                diag[i+1] = g+p;
-                g = c*r-b;
-
-                for (int k = 0; k < 4; k++)
-                {
-                    f = mat[k][i+1];
-                    mat[k][i+1] = s*mat[k][i]+c*f;
-                    mat[k][i] = c*mat[k][i]-s*f;
-                }
-            }
-            diag[ell] -= p;
-            subd[ell] = g;
-            subd[m] = 0;
-        }
-
-        if ( iter == maxiter )
-            // should not get here under normal circumstances
-            return false;
-    }
-
-    return true;
-}
-
-
-
-int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster)
-{
-    // Compute principal component.
-    float matrix[6];
-    Vector3 centroid = computeCovariance(n, points, weights, metric, matrix);
-    Vector3 principal = firstEigenVector_PowerMethod(matrix);
-
-    // Pick initial solution.
-    int mini, maxi;
-    mini = maxi = 0;
-
-    float mindps, maxdps;
-    mindps = maxdps = dot(points[0] - centroid, principal);
-
-    for (int i = 1; i < n; ++i)
-    {
-        float dps = dot(points[i] - centroid, principal);
-
-        if (dps < mindps) {
-            mindps = dps;
-            mini = i;
-        }
-        else {
-            maxdps = dps;
-            maxi = i;
-        }
-    }
-
-    cluster[0] = centroid + mindps * principal;
-    cluster[1] = centroid + maxdps * principal;
-    cluster[2] = (2.0f * cluster[0] + cluster[1]) / 3.0f;
-    cluster[3] = (2.0f * cluster[1] + cluster[0]) / 3.0f;
-
-    // Now we have to iteratively refine the clusters.
-    while (true)
-    {
-        Vector3 newCluster[4] = { Vector3(0.0f), Vector3(0.0f), Vector3(0.0f), Vector3(0.0f) };
-        float total[4] = {0, 0, 0, 0};
-
-        for (int i = 0; i < n; ++i)
-        {
-            // Find nearest cluster.
-            int nearest = 0;
-            float mindist = FLT_MAX;
-            for (int j = 0; j < 4; j++)
-            {
-                float dist = lengthSquared((cluster[j] - points[i]) * metric);
-                if (dist < mindist)
-                {
-                    mindist = dist;
-                    nearest = j;
-                }
-            }
-
-            newCluster[nearest] += weights[i] * points[i];
-            total[nearest] += weights[i];
-        }
-
-        for (int j = 0; j < 4; j++)
-        {
-            if (total[j] != 0)
-                newCluster[j] /= total[j];
-        }
-
-        if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) && 
-            equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3]))
-        {
-            return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0);
-        }
-
-        cluster[0] = newCluster[0];
-        cluster[1] = newCluster[1];
-        cluster[2] = newCluster[2];
-        cluster[3] = newCluster[3];
-
-        // Sort clusters by weight.
-        for (int i = 0; i < 4; i++)
-        {
-            for (int j = i; j > 0 && total[j] > total[j - 1]; j--)
-            {
-                swap( total[j], total[j - 1] );
-                swap( cluster[j], cluster[j - 1] );
-            }
-        }
-    }
-}
-
-
-
-// Adaptation of James Arvo's SVD code, as found in ZOH.
-
-inline float Sqr(float x) { return x*x; }
-
-inline float svd_pythag( float a, float b )
-{
-	float at = fabsf(a);
-	float bt = fabsf(b);
-	if( at > bt )
-		return at * sqrtf( 1.0f + Sqr( bt / at ) );
-	else if( bt > 0.0f )
-		return bt * sqrtf( 1.0f + Sqr( at / bt ) );
-	else return 0.0f;
-}
-
-inline float SameSign( float a, float b ) 
-{
-	float t;
-	if( b >= 0.0f ) t = fabsf( a );
-	else t = -fabsf( a );
-	return t;
-}
-
-void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R)
-{
-	static const int MaxIterations = 30;
-
-	int    i, j, k, l, p, q, iter;
-	float  c, f, h, s, x, y, z;
-	float  norm  = 0.0f;
-	float  g     = 0.0f;
-	float  scale = 0.0f;
-
-    Array<float> temp; temp.resize(cols, 0.0f);
-
-	for( i = 0; i < cols; i++ ) 
-	{
-		temp[i] = scale * g;
-		scale   = 0.0f;
-		g       = 0.0f;
-		s       = 0.0f;
-		l       = i + 1;
-
-		if( i < rows )
-		{
-			for( k = i; k < rows; k++ ) scale += fabsf( Q[k*cols+i] );
-			if( scale != 0.0f ) 
-			{
-				for( k = i; k < rows; k++ ) 
-				{
-					Q[k*cols+i] /= scale;
-					s += Sqr( Q[k*cols+i] );
-				}
-				f = Q[i*cols+i];
-				g = -SameSign( sqrtf(s), f );
-				h = f * g - s;
-				Q[i*cols+i] = f - g;
-				if( i != cols - 1 )
-				{
-					for( j = l; j < cols; j++ ) 
-					{
-						s = 0.0f;
-						for( k = i; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
-						f = s / h;
-						for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
-					}
-				}
-				for( k = i; k < rows; k++ ) Q[k*cols+i] *= scale;
-			}
-		}
-
-		diag[i] = scale * g;
-		g       = 0.0f;
-		s       = 0.0f;
-		scale   = 0.0f;
-
-		if( i < rows && i != cols - 1 ) 
-		{
-			for( k = l; k < cols; k++ ) scale += fabsf( Q[i*cols+k] );
-			if( scale != 0.0f ) 
-			{
-				for( k = l; k < cols; k++ ) 
-				{
-					Q[i*cols+k] /= scale;
-					s += Sqr( Q[i*cols+k] );
-				}
-				f = Q[i*cols+l];
-				g = -SameSign( sqrtf(s), f );
-				h = f * g - s;
-				Q[i*cols+l] = f - g;
-				for( k = l; k < cols; k++ ) temp[k] = Q[i*cols+k] / h;
-				if( i != rows - 1 ) 
-				{
-					for( j = l; j < rows; j++ ) 
-					{
-						s = 0.0f;
-						for( k = l; k < cols; k++ ) s += Q[j*cols+k] * Q[i*cols+k];
-						for( k = l; k < cols; k++ ) Q[j*cols+k] += s * temp[k];
-					}
-				}
-				for( k = l; k < cols; k++ ) Q[i*cols+k] *= scale;
-			}
-		}
-		norm = max( norm, fabsf( diag[i] ) + fabsf( temp[i] ) );
-	}
-
-
-	for( i = cols - 1; i >= 0; i-- ) 
-	{
-		if( i < cols - 1 ) 
-		{
-			if( g != 0.0f ) 
-			{
-				for( j = l; j < cols; j++ ) R[i*cols+j] = ( Q[i*cols+j] / Q[i*cols+l] ) / g;
-				for( j = l; j < cols; j++ ) 
-				{
-					s = 0.0f;
-					for( k = l; k < cols; k++ ) s += Q[i*cols+k] * R[j*cols+k];
-					for( k = l; k < cols; k++ ) R[j*cols+k] += s * R[i*cols+k];
-				}
-			}
-			for( j = l; j < cols; j++ ) 
-			{
-				R[i*cols+j] = 0.0f;
-				R[j*cols+i] = 0.0f;
-			}
-		}
-		R[i*cols+i] = 1.0f;
-		g = temp[i];
-		l = i;
-	}
-
-
-	for( i = cols - 1; i >= 0; i-- ) 
-	{
-		l = i + 1;
-		g = diag[i];
-		if( i < cols - 1 ) for( j = l; j < cols; j++ ) Q[i*cols+j] = 0.0f;
-		if( g != 0.0f ) 
-		{
-			g = 1.0f / g;
-			if( i != cols - 1 ) 
-			{
-				for( j = l; j < cols; j++ ) 
-				{
-					s = 0.0f;
-					for( k = l; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
-					f = ( s / Q[i*cols+i] ) * g;
-					for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
-				}
-			}
-			for( j = i; j < rows; j++ ) Q[j*cols+i] *= g;
-		} 
-		else 
-		{
-			for( j = i; j < rows; j++ ) Q[j*cols+i] = 0.0f;
-		}
-		Q[i*cols+i] += 1.0f;
-	}
-
-
-	for( k = cols - 1; k >= 0; k-- ) 
-	{
-		for( iter = 1; iter <= MaxIterations; iter++ ) 
-		{
-			int jump = 0;
-
-			for( l = k; l >= 0; l-- )
-			{
-				q = l - 1;
-				if( fabsf( temp[l] ) + norm == norm ) { jump = 1; break; }
-				if( fabsf( diag[q] ) + norm == norm ) { jump = 0; break; }
-			}
-
-			if( !jump )
-			{
-				c = 0.0f;
-				s = 1.0f;
-				for( i = l; i <= k; i++ )
-				{
-					f = s * temp[i];
-					temp[i] *= c;
-					if( fabsf( f ) + norm == norm ) break;
-					g = diag[i];
-					h = svd_pythag( f, g );
-					diag[i] = h;
-					h = 1.0f / h;
-					c = g * h;
-					s = -f * h;
-					for( j = 0; j < rows; j++ ) 
-					{
-						y = Q[j*cols+q];
-						z = Q[j*cols+i];
-						Q[j*cols+q] = y * c + z * s;
-						Q[j*cols+i] = z * c - y * s;
-					}
-				}
-			}
-
-			z = diag[k];
-			if( l == k ) 
-			{
-				if( z < 0.0f ) 
-				{
-					diag[k] = -z;
-					for( j = 0; j < cols; j++ ) R[k*cols+j] *= -1.0f; 
-				}
-				break;
-			}
-			if( iter >= MaxIterations ) return;
-			x = diag[l];
-			q = k - 1;
-			y = diag[q];
-			g = temp[q];
-			h = temp[k];
-			f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0f * h * y );
-			g = svd_pythag( f, 1.0f );
-			f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x;
-			c = 1.0f;
-			s = 1.0f;
-			for( j = l; j <= q; j++ ) 
-			{
-				i = j + 1;
-				g = temp[i];
-				y = diag[i];
-				h = s * g;
-				g = c * g;
-				z = svd_pythag( f, h );
-				temp[j] = z;
-				c = f / z;
-				s = h / z;
-				f = x * c + g * s;
-				g = g * c - x * s;
-				h = y * s;
-				y = y * c;
-				for( p = 0; p < cols; p++ ) 
-				{
-					x = R[j*cols+p];
-					z = R[i*cols+p];
-					R[j*cols+p] = x * c + z * s;
-					R[i*cols+p] = z * c - x * s;
-				}
-				z = svd_pythag( f, h );
-				diag[j] = z;
-				if( z != 0.0f ) 
-				{
-					z = 1.0f / z;
-					c = f * z;
-					s = h * z;
-				}
-				f = c * g + s * y;
-				x = c * y - s * g;
-				for( p = 0; p < rows; p++ ) 
-				{
-					y = Q[p*cols+j];
-					z = Q[p*cols+i];
-					Q[p*cols+j] = y * c + z * s;
-					Q[p*cols+i] = z * c - y * s;
-				}
-			}
-			temp[l] = 0.0f;
-			temp[k] = f;
-			diag[k] = x;
-		}
-	}
-
-	// Sort the singular values into descending order.
-
-	for( i = 0; i < cols - 1; i++ )
-	{
-		float biggest = diag[i];  // Biggest singular value so far.
-		int   bindex  = i;        // The row/col it occurred in.
-		for( j = i + 1; j < cols; j++ )
-		{
-			if( diag[j] > biggest ) 
-			{
-				biggest = diag[j];
-				bindex  = j;
-			}            
-		}
-		if( bindex != i )  // Need to swap rows and columns.
-		{
-			// Swap columns in Q.
-			for (int j = 0; j < rows; ++j)
-				swap(Q[j*cols+i], Q[j*cols+bindex]);
-
-			// Swap rows in R.
-			for (int j = 0; j < rows; ++j)
-				swap(R[i*cols+j], R[bindex*cols+j]);
-
-			// Swap elements in diag.
-			swap(diag[i], diag[bindex]);
-		}
-	}
-}
diff --git a/thirdparty/thekla_atlas/nvmath/Fitting.h b/thirdparty/thekla_atlas/nvmath/Fitting.h
deleted file mode 100644
index 7a88cd28fd..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Fitting.h
+++ /dev/null
@@ -1,50 +0,0 @@
-// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
-
-#pragma once
-#ifndef NV_MATH_FITTING_H
-#define NV_MATH_FITTING_H
-
-#include "Vector.h"
-#include "Plane.h"
-
-namespace nv
-{
-    namespace Fit
-    {
-        Vector3 computeCentroid(int n, const Vector3 * points);
-        Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
-
-        Vector4 computeCentroid(int n, const Vector4 * points);
-        Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
-
-        Vector3 computeCovariance(int n, const Vector3 * points, float * covariance);
-        Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance);
-
-        Vector4 computeCovariance(int n, const Vector4 * points, float * covariance);
-        Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance);
-
-        Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points);
-        Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
-
-        Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points);
-        Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
-
-		Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points);
-        Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
-
-        Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points);
-        Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points);
-
-        Plane bestPlane(int n, const Vector3 * points);
-        bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON);
-
-        bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]);
-        bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]);
-
-        // Returns number of clusters [1-4].
-        int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster);
-    }
-
-} // nv namespace
-
-#endif // NV_MATH_FITTING_H
diff --git a/thirdparty/thekla_atlas/nvmath/KahanSum.h b/thirdparty/thekla_atlas/nvmath/KahanSum.h
deleted file mode 100644
index 18d475e7cb..0000000000
--- a/thirdparty/thekla_atlas/nvmath/KahanSum.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#pragma once
-#ifndef NV_MATH_KAHANSUM_H
-#define NV_MATH_KAHANSUM_H
-
-#include "nvmath.h"
-
-namespace nv
-{
-
-    class KahanSum
-    {
-    public:
-        KahanSum() : accum(0.0f), err(0) {};
-
-        void add(float f)
-        {
-            float compensated = f + err;
-            float tmp = accum + compensated;
-            err = accum - tmp;
-            err += compensated;
-            accum = tmp;
-        }
-
-        float sum() const
-        {
-            return accum;
-        }
-
-    private:
-        float accum;
-        float err;
-    };
-
-} // nv namespace
-
-
-#endif // NV_MATH_KAHANSUM_H
diff --git a/thirdparty/thekla_atlas/nvmath/Matrix.cpp b/thirdparty/thekla_atlas/nvmath/Matrix.cpp
deleted file mode 100644
index 29bd19f5f8..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Matrix.cpp
+++ /dev/null
@@ -1,441 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#include "Matrix.inl"
-#include "Vector.inl"
-
-#include "nvcore/Array.inl"
-
-#include <float.h>
-
-#if !NV_CC_MSVC && !NV_OS_ORBIS
-#include <alloca.h>
-#endif
-
-using namespace nv;
-
-
-// Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise
-// permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above;
-// indx[1..n] is an output vector that records the row permutation effected by the partial
-// pivoting; d is output as -1 depending on whether the number of row interchanges was even
-// or odd, respectively. This routine is used in combination with lubksb to solve linear equations
-// or invert a matrix.
-static bool ludcmp(float **a, int n, int *indx, float *d)
-{
-    const float TINY = 1.0e-20f;
-
-    float * vv = (float*)alloca(sizeof(float) * n);    // vv stores the implicit scaling of each row.
-
-    *d = 1.0; // No row interchanges yet.
-    for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information.
-    
-        float big = 0.0;
-        for (int j = 0; j < n; j++) {
-            big = max(big, fabsf(a[i][j]));
-        }
-        if (big == 0) {
-            return false;   // Singular matrix
-        }
-        
-        // No nonzero largest element.
-        vv[i] = 1.0f / big; // Save the scaling.
-    }
-
-    for (int j = 0; j < n; j++) {       // This is the loop over columns of Crout's method.
-        for (int i = 0; i < j; i++) {   // This is equation (2.3.12) except for i = j.
-            float sum = a[i][j];
-            for (int k = 0; k < i; k++) sum -= a[i][k]*a[k][j];
-            a[i][j] = sum;
-        }
-
-        int imax = -1;
-        float big = 0.0;                // Initialize for the search for largest pivot element.
-        for (int i = j; i < n; i++) {   // This is i = j of equation (2.3.12) and i = j+ 1 : : : N
-            float sum = a[i][j];              // of equation (2.3.13).
-            for (int k = 0; k < j; k++) {
-                sum -= a[i][k]*a[k][j];
-            }
-            a[i][j]=sum;
-
-            float dum = vv[i]*fabs(sum);
-            if (dum >= big) {
-                // Is the figure of merit for the pivot better than the best so far?
-                big = dum;
-                imax = i;
-            }
-        }
-        nvDebugCheck(imax != -1);
-
-        if (j != imax) {                // Do we need to interchange rows?
-            for (int k = 0; k < n; k++) {   // Yes, do so...
-                swap(a[imax][k], a[j][k]);
-            }
-            *d = -(*d); // ...and change the parity of d.
-            vv[imax]=vv[j]; // Also interchange the scale factor.
-        }
-
-        indx[j]=imax;
-        if (a[j][j] == 0.0) a[j][j] = TINY;
-        
-        // If the pivot element is zero the matrix is singular (at least to the precision of the
-        // algorithm). For some applications on singular matrices, it is desirable to substitute
-        // TINY for zero.
-        if (j != n-1) { // Now, finally, divide by the pivot element.
-            float dum = 1.0f / a[j][j];
-            for (int i = j+1; i < n; i++) a[i][j] *= dum;
-        }
-    } // Go back for the next column in the reduction.
-
-    return true;
-}
-
-
-// Solves the set of n linear equations Ax = b. Here a[1..n][1..n] is input, not as the matrix
-// A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input
-// as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector
-// B, and returns with the solution vector X. a, n, and indx are not modified by this routine
-// and can be left in place for successive calls with different right-hand sides b. This routine takes
-// into account the possibility that b will begin with many zero elements, so it is efficient for use
-// in matrix inversion.
-static void lubksb(float **a, int n, int *indx, float b[])
-{
-    int ii = 0;
-    for (int i=0; i<n; i++) {   // When ii is set to a positive value, it will become 
-        int ip = indx[i];       // the index of the first nonvanishing element of b. We now 
-        float sum = b[ip];      // do the forward substitution, equation (2.3.6). The 
-        b[ip] = b[i];           // only new wrinkle is to unscramble the permutation as we go.
-        if (ii != 0) {
-            for (int j = ii-1; j < i; j++) sum -= a[i][j]*b[j];
-        }
-        else if (sum != 0.0f) {
-            ii = i+1;             // A nonzero element was encountered, so from now on we 
-        }
-        b[i] = sum;             // will have to do the sums in the loop above.
-    }
-    for (int i=n-1; i>=0; i--) {  // Now we do the backsubstitution, equation (2.3.7).
-        float sum = b[i];
-        for (int j = i+1; j < n; j++) {
-            sum -= a[i][j]*b[j];
-        }
-        b[i] = sum/a[i][i];     // Store a component of the solution vector X.
-    } // All done!
-}
-
-
-bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x)
-{
-    nvDebugCheck(x != NULL);
-
-    float m[4][4];
-    float *a[4] = {m[0], m[1], m[2], m[3]};
-    int idx[4];
-    float d;
-
-    for (int y = 0; y < 4; y++) {
-        for (int x = 0; x < 4; x++) {
-            a[x][y] = A(x, y);
-        }
-    }
-
-    // Create LU decomposition.
-    if (!ludcmp(a, 4, idx, &d)) {
-        // Singular matrix.
-        return false;
-    }
-
-    // Init solution.
-    *x = b;
-
-    // Do back substitution.
-    lubksb(a, 4, idx, x->component);
-
-    return true;
-}
-
-// @@ Not tested.
-Matrix nv::inverseLU(const Matrix & A)
-{
-    Vector4 Ai[4];
-
-    solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]);
-    solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]);
-    solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]);
-    solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]);
-
-    return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]);
-}
-
-
-
-bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x)
-{
-    nvDebugCheck(x != NULL);
-
-    float m[3][3];
-    float *a[3] = {m[0], m[1], m[2]};
-    int idx[3];
-    float d;
-
-    for (int y = 0; y < 3; y++) {
-        for (int x = 0; x < 3; x++) {
-            a[x][y] = A(x, y);
-        }
-    }
-
-    // Create LU decomposition.
-    if (!ludcmp(a, 3, idx, &d)) {
-        // Singular matrix.
-        return false;
-    }
-
-    // Init solution.
-    *x = b;
-
-    // Do back substitution.
-    lubksb(a, 3, idx, x->component);
-
-    return true;
-}
-
-
-bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x)
-{
-    nvDebugCheck(x != NULL);
-
-    *x = transform(inverseCramer(A), b);
-    
-    return true; // @@ Return false if determinant(A) == 0 !
-}
-
-bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
-{
-    nvDebugCheck(x != NULL);
-
-    const float det = A.determinant();
-    if (equal(det, 0.0f)) {   // @@ Use input epsilon.
-        return false;
-    }
-
-    Matrix3 Ai = inverseCramer(A);
-
-    *x = transform(Ai, b);
-    
-    return true;
-}
-
-
-
-// Inverse using gaussian elimination. From Jon's code.
-Matrix nv::inverse(const Matrix & m) {
-
-    Matrix A = m;
-    Matrix B(identity);
-
-    int i, j, k;
-    float max, t, det, pivot;
-
-    det = 1.0;
-    for (i=0; i<4; i++) {               /* eliminate in column i, below diag */
-        max = -1.;
-        for (k=i; k<4; k++)             /* find pivot for column i */
-            if (fabs(A(k, i)) > max) {
-                max = fabs(A(k, i));
-                j = k;
-            }
-        if (max<=0.) return B;         /* if no nonzero pivot, PUNT */
-        if (j!=i) {                     /* swap rows i and j */
-            for (k=i; k<4; k++)
-                swap(A(i, k), A(j, k));
-            for (k=0; k<4; k++)
-                swap(B(i, k), B(j, k));
-            det = -det;
-        }
-        pivot = A(i, i);
-        det *= pivot;
-        for (k=i+1; k<4; k++)           /* only do elems to right of pivot */
-            A(i, k) /= pivot;
-        for (k=0; k<4; k++)
-            B(i, k) /= pivot;
-        /* we know that A(i, i) will be set to 1, so don't bother to do it */
-
-        for (j=i+1; j<4; j++) {         /* eliminate in rows below i */
-            t = A(j, i);                /* we're gonna zero this guy */
-            for (k=i+1; k<4; k++)       /* subtract scaled row i from row j */
-                A(j, k) -= A(i, k)*t;   /* (ignore k<=i, we know they're 0) */
-            for (k=0; k<4; k++)
-                B(j, k) -= B(i, k)*t;
-        }
-    }
-
-    /*---------- backward elimination ----------*/
-
-    for (i=4-1; i>0; i--) {             /* eliminate in column i, above diag */
-        for (j=0; j<i; j++) {           /* eliminate in rows above i */
-            t = A(j, i);                /* we're gonna zero this guy */
-            for (k=0; k<4; k++)         /* subtract scaled row i from row j */
-                B(j, k) -= B(i, k)*t;
-        }
-    }
-
-    return B;
-}
-
-
-Matrix3 nv::inverse(const Matrix3 & m) {
-
-    Matrix3 A = m;
-    Matrix3 B(identity);
-
-    int i, j, k;
-    float max, t, det, pivot;
-
-    det = 1.0;
-    for (i=0; i<3; i++) {               /* eliminate in column i, below diag */
-        max = -1.;
-        for (k=i; k<3; k++)             /* find pivot for column i */
-            if (fabs(A(k, i)) > max) {
-                max = fabs(A(k, i));
-                j = k;
-            }
-        if (max<=0.) return B;         /* if no nonzero pivot, PUNT */
-        if (j!=i) {                     /* swap rows i and j */
-            for (k=i; k<3; k++)
-                swap(A(i, k), A(j, k));
-            for (k=0; k<3; k++)
-                swap(B(i, k), B(j, k));
-            det = -det;
-        }
-        pivot = A(i, i);
-        det *= pivot;
-        for (k=i+1; k<3; k++)           /* only do elems to right of pivot */
-            A(i, k) /= pivot;
-        for (k=0; k<3; k++)
-            B(i, k) /= pivot;
-        /* we know that A(i, i) will be set to 1, so don't bother to do it */
-
-        for (j=i+1; j<3; j++) {         /* eliminate in rows below i */
-            t = A(j, i);                /* we're gonna zero this guy */
-            for (k=i+1; k<3; k++)       /* subtract scaled row i from row j */
-                A(j, k) -= A(i, k)*t;   /* (ignore k<=i, we know they're 0) */
-            for (k=0; k<3; k++)
-                B(j, k) -= B(i, k)*t;
-        }
-    }
-
-    /*---------- backward elimination ----------*/
-
-    for (i=3-1; i>0; i--) {             /* eliminate in column i, above diag */
-        for (j=0; j<i; j++) {           /* eliminate in rows above i */
-            t = A(j, i);                /* we're gonna zero this guy */
-            for (k=0; k<3; k++)         /* subtract scaled row i from row j */
-                B(j, k) -= B(i, k)*t;
-        }
-    }
-
-    return B;
-}
-
-
-
-
-
-#if 0 
-
-// Copyright (C) 1999-2004 Michael Garland.
-// 
-// Permission is hereby granted, free of charge, to any person obtaining a
-// copy of this software and associated documentation files (the
-// "Software"), to deal in the Software without restriction, including
-// without limitation the rights to use, copy, modify, merge, publish,
-// distribute, and/or sell copies of the Software, and to permit persons
-// to whom the Software is furnished to do so, provided that the above
-// copyright notice(s) and this permission notice appear in all copies of
-// the Software and that both the above copyright notice(s) and this
-// permission notice appear in supporting documentation.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
-// OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
-// HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
-// INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
-// FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
-// NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
-// WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-// 
-// Except as contained in this notice, the name of a copyright holder
-// shall not be used in advertising or otherwise to promote the sale, use
-// or other dealings in this Software without prior written authorization
-// of the copyright holder.
-
-
-// Matrix inversion code for 4x4 matrices using Gaussian elimination
-// with partial pivoting.  This is a specialized version of a
-// procedure originally due to Paul Heckbert <ph@cs.cmu.edu>.
-//
-// Returns determinant of A, and B=inverse(A)
-// If matrix A is singular, returns 0 and leaves trash in B.
-//
-#define SWAP(a, b, t)   {t = a; a = b; b = t;}
-double invert(Mat4& B, const Mat4& m)
-{
-    Mat4 A = m;
-    int i, j, k;
-    double max, t, det, pivot;
-
-    /*---------- forward elimination ----------*/
-
-    for (i=0; i<4; i++)                 /* put identity matrix in B */
-        for (j=0; j<4; j++)
-            B(i, j) = (double)(i==j);
-
-    det = 1.0;
-    for (i=0; i<4; i++) {               /* eliminate in column i, below diag */
-        max = -1.;
-        for (k=i; k<4; k++)             /* find pivot for column i */
-            if (fabs(A(k, i)) > max) {
-                max = fabs(A(k, i));
-                j = k;
-            }
-        if (max<=0.) return 0.;         /* if no nonzero pivot, PUNT */
-        if (j!=i) {                     /* swap rows i and j */
-            for (k=i; k<4; k++)
-                SWAP(A(i, k), A(j, k), t);
-            for (k=0; k<4; k++)
-                SWAP(B(i, k), B(j, k), t);
-            det = -det;
-        }
-        pivot = A(i, i);
-        det *= pivot;
-        for (k=i+1; k<4; k++)           /* only do elems to right of pivot */
-            A(i, k) /= pivot;
-        for (k=0; k<4; k++)
-            B(i, k) /= pivot;
-        /* we know that A(i, i) will be set to 1, so don't bother to do it */
-
-        for (j=i+1; j<4; j++) {         /* eliminate in rows below i */
-            t = A(j, i);                /* we're gonna zero this guy */
-            for (k=i+1; k<4; k++)       /* subtract scaled row i from row j */
-                A(j, k) -= A(i, k)*t;   /* (ignore k<=i, we know they're 0) */
-            for (k=0; k<4; k++)
-                B(j, k) -= B(i, k)*t;
-        }
-    }
-
-    /*---------- backward elimination ----------*/
-
-    for (i=4-1; i>0; i--) {             /* eliminate in column i, above diag */
-        for (j=0; j<i; j++) {           /* eliminate in rows above i */
-            t = A(j, i);                /* we're gonna zero this guy */
-            for (k=0; k<4; k++)         /* subtract scaled row i from row j */
-                B(j, k) -= B(i, k)*t;
-        }
-    }
-
-    return det;
-}
-
-#endif // 0
-
-
-
diff --git a/thirdparty/thekla_atlas/nvmath/Matrix.h b/thirdparty/thekla_atlas/nvmath/Matrix.h
deleted file mode 100644
index 506bdad1ca..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Matrix.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_MATRIX_H
-#define NV_MATH_MATRIX_H
-
-#include "Vector.h"
-
-// - Matrices are stored in memory in *column major* order.
-// - Points are to be though of as column vectors.
-// - Transformation of a point p by a matrix M is: p' = M * p
-
-namespace nv
-{
-    enum identity_t { identity };
-
-    // 3x3 matrix.
-    class NVMATH_CLASS Matrix3
-    {
-    public:
-        Matrix3();
-        explicit Matrix3(float f);
-        explicit Matrix3(identity_t);
-        Matrix3(const Matrix3 & m);
-        Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2);
-
-        float data(uint idx) const;
-        float & data(uint idx);
-        float get(uint row, uint col) const;
-        float operator()(uint row, uint col) const;
-        float & operator()(uint row, uint col);
-
-        Vector3 row(uint i) const;
-        Vector3 column(uint i) const;
-
-        void operator*=(float s);
-        void operator/=(float s);
-        void operator+=(const Matrix3 & m);
-        void operator-=(const Matrix3 & m);
-
-        void scale(float s);
-        void scale(Vector3::Arg s);
-        float determinant() const;
-
-    private:
-        float m_data[9];
-    };
-
-    // Solve equation system using LU decomposition and back-substitution.
-    extern bool solveLU(const Matrix3 & m, const Vector3 & b, Vector3 * x);
-
-    // Solve equation system using Cramer's inverse.
-    extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x);
-
-
-    // 4x4 matrix.
-    class NVMATH_CLASS Matrix
-    {
-    public:
-        typedef Matrix const & Arg;
-
-        Matrix();
-        explicit Matrix(float f);
-        explicit Matrix(identity_t);
-        Matrix(const Matrix3 & m);
-        Matrix(const Matrix & m);
-        Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
-        //explicit Matrix(const float m[]);	// m is assumed to contain 16 elements
-
-        float data(uint idx) const;
-        float & data(uint idx);
-        float get(uint row, uint col) const;
-        float operator()(uint row, uint col) const;
-        float & operator()(uint row, uint col);
-        const float * ptr() const;
-
-        Vector4 row(uint i) const;
-        Vector4 column(uint i) const;
-
-        void zero();
-        void identity();
-
-        void scale(float s);
-        void scale(Vector3::Arg s);
-        void translate(Vector3::Arg t);
-        void rotate(float theta, float v0, float v1, float v2);
-        float determinant() const;
-
-        void operator+=(const Matrix & m);
-        void operator-=(const Matrix & m);
-
-        void apply(Matrix::Arg m);
-
-    private:
-        float m_data[16];
-    };
-
-    // Solve equation system using LU decomposition and back-substitution.
-    extern bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x);
-
-    // Solve equation system using Cramer's inverse.
-    extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x);
-
-    // Compute inverse using LU decomposition.
-    extern Matrix inverseLU(const Matrix & m);
-
-    // Compute inverse using Gaussian elimination and partial pivoting.
-    extern Matrix inverse(const Matrix & m);
-    extern Matrix3 inverse(const Matrix3 & m);
-
-} // nv namespace
-
-#endif // NV_MATH_MATRIX_H
diff --git a/thirdparty/thekla_atlas/nvmath/Matrix.inl b/thirdparty/thekla_atlas/nvmath/Matrix.inl
deleted file mode 100644
index c0d99d9fe0..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Matrix.inl
+++ /dev/null
@@ -1,1274 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_MATRIX_INL
-#define NV_MATH_MATRIX_INL
-
-#include "Matrix.h"
-
-namespace nv
-{
-    inline Matrix3::Matrix3() {}
-    
-    inline Matrix3::Matrix3(float f)
-    {
-        for(int i = 0; i < 9; i++) {
-            m_data[i] = f;
-        }
-    }
-
-    inline Matrix3::Matrix3(identity_t)
-    {
-        for(int i = 0; i < 3; i++) {
-            for(int j = 0; j < 3; j++) {
-                m_data[3*j+i] = (i == j) ? 1.0f : 0.0f;
-            }
-        }
-    }
-
-    inline Matrix3::Matrix3(const Matrix3 & m)
-    {
-        for(int i = 0; i < 9; i++) {
-            m_data[i] = m.m_data[i];
-        }
-    }
-    
-    inline Matrix3::Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2)
-    {
-        m_data[0] = v0.x; m_data[1] = v0.y; m_data[2] = v0.z;
-        m_data[3] = v1.x; m_data[4] = v1.y; m_data[5] = v1.z;
-        m_data[6] = v2.x; m_data[7] = v2.y; m_data[8] = v2.z;
-    }
-
-    inline float Matrix3::data(uint idx) const
-    {
-        nvDebugCheck(idx < 9);
-        return m_data[idx];
-    }
-    inline float & Matrix3::data(uint idx)
-    {
-        nvDebugCheck(idx < 9);
-        return m_data[idx];
-    }
-    inline float Matrix3::get(uint row, uint col) const
-    {
-        nvDebugCheck(row < 3 && col < 3);
-        return m_data[col * 3 + row];
-    }
-    inline float Matrix3::operator()(uint row, uint col) const
-    {
-        nvDebugCheck(row < 3 && col < 3);
-        return m_data[col * 3 + row];
-    }
-    inline float & Matrix3::operator()(uint row, uint col)
-    {
-        nvDebugCheck(row < 3 && col < 3);
-        return m_data[col * 3 + row];
-    }
-
-    inline Vector3 Matrix3::row(uint i) const
-    {
-        nvDebugCheck(i < 3);
-        return Vector3(get(i, 0), get(i, 1), get(i, 2));
-    }
-    inline Vector3 Matrix3::column(uint i) const
-    {
-        nvDebugCheck(i < 3);
-        return Vector3(get(0, i), get(1, i), get(2, i));
-    }
-
-    inline void Matrix3::operator*=(float s)
-    {
-        for(int i = 0; i < 9; i++) {
-            m_data[i] *= s;
-        }
-    }
-
-    inline void Matrix3::operator/=(float s)
-    {
-        float is = 1.0f /s;
-        for(int i = 0; i < 9; i++) {
-            m_data[i] *= is;
-        }
-    }
-
-    inline void Matrix3::operator+=(const Matrix3 & m)
-    {
-        for(int i = 0; i < 9; i++) {
-            m_data[i] += m.m_data[i];
-        }
-    }
-
-    inline void Matrix3::operator-=(const Matrix3 & m)
-    {
-        for(int i = 0; i < 9; i++) {
-            m_data[i] -= m.m_data[i];
-        }
-    }
-
-    inline Matrix3 operator+(const Matrix3 & a, const Matrix3 & b)
-    {
-        Matrix3 m = a;
-        m += b;
-        return m;
-    }
-
-    inline Matrix3 operator-(const Matrix3 & a, const Matrix3 & b)
-    {
-        Matrix3 m = a;
-        m -= b;
-        return m;
-    }
-
-    inline Matrix3 operator*(const Matrix3 & a, float s)
-    {
-        Matrix3 m = a;
-        m *= s;
-        return m;
-    }
-
-    inline Matrix3 operator*(float s, const Matrix3 & a)
-    {
-        Matrix3 m = a;
-        m *= s;
-        return m;
-    }
-
-    inline Matrix3 operator/(const Matrix3 & a, float s)
-    {
-        Matrix3 m = a;
-        m /= s;
-        return m;
-    }
-
-    inline Matrix3 mul(const Matrix3 & a, const Matrix3 & b)
-    {
-        Matrix3 m;
-
-        for(int i = 0; i < 3; i++) {
-            const float ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2);
-            m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0) + ai2 * b(2,0);
-            m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1) + ai2 * b(2,1);
-            m(i, 2) = ai0 * b(0,2) + ai1 * b(1,2) + ai2 * b(2,2);
-        }
-
-        return m;
-    }
-
-    inline Matrix3 operator*(const Matrix3 & a, const Matrix3 & b)
-    {
-        return mul(a, b);
-    }
-
-    // Transform the given 3d vector with the given matrix.
-    inline Vector3 transform(const Matrix3 & m, const Vector3 & p)
-    {
-        return Vector3(
-            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2),
-            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2),
-            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2));
-    }
-
-    inline void Matrix3::scale(float s)
-    {
-        for (int i = 0; i < 9; i++) {
-            m_data[i] *= s;
-        }
-    }
-
-    inline void Matrix3::scale(Vector3::Arg s)
-    {
-        m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x;
-        m_data[3] *= s.y; m_data[4] *= s.y; m_data[5] *= s.y;
-        m_data[6] *= s.z; m_data[7] *= s.z; m_data[8] *= s.z;
-    }
-
-    inline float Matrix3::determinant() const
-    {
-        return 
-            get(0,0) * get(1,1) * get(2,2) + 
-            get(0,1) * get(1,2) * get(2,0) + 
-            get(0,2) * get(1,0) * get(2,1) -
-            get(0,2) * get(1,1) * get(2,0) - 
-            get(0,1) * get(1,0) * get(2,2) -
-            get(0,0) * get(1,2) * get(2,1);
-    }
-
-    // Inverse using Cramer's rule.
-    inline Matrix3 inverseCramer(const Matrix3 & m)
-    {
-        const float det = m.determinant();
-        if (equal(det, 0.0f, 0.0f)) {
-            return Matrix3(0);
-        }
-
-        Matrix3 r;
-
-        r.data(0) =  - m.data(5) * m.data(7) + m.data(4) * m.data(8);
-        r.data(1) =  + m.data(5) * m.data(6) - m.data(3) * m.data(8);
-        r.data(2) =  - m.data(4) * m.data(6) + m.data(3) * m.data(7);
-
-        r.data(3) =  + m.data(2) * m.data(7) - m.data(1) * m.data(8);
-        r.data(4) =  - m.data(2) * m.data(6) + m.data(0) * m.data(8);
-        r.data(5) =  + m.data(1) * m.data(6) - m.data(0) * m.data(7);
-
-        r.data(6) =  - m.data(2) * m.data(4) + m.data(1) * m.data(5);
-        r.data(7) =  + m.data(2) * m.data(3) - m.data(0) * m.data(5);
-        r.data(8) =  - m.data(1) * m.data(3) + m.data(0) * m.data(4);
-
-        r.scale(1.0f / det);
-
-        return r;
-    }
-
-
-
-    inline Matrix::Matrix()
-    {
-    }
-
-    inline Matrix::Matrix(float f)
-    {
-        for(int i = 0; i < 16; i++) {
-            m_data[i] = 0.0f;
-        }
-    }
-
-    inline Matrix::Matrix(identity_t)
-    {
-        for(int i = 0; i < 4; i++) {
-            for(int j = 0; j < 4; j++) {
-                m_data[4*j+i] = (i == j) ? 1.0f : 0.0f;
-            }
-        }
-    }
-
-    inline Matrix::Matrix(const Matrix & m)
-    {
-        for(int i = 0; i < 16; i++) {
-            m_data[i] = m.m_data[i];
-        }
-    }
-
-    inline Matrix::Matrix(const Matrix3 & m)
-    {
-        for(int i = 0; i < 3; i++) {
-            for(int j = 0; j < 3; j++) {
-                operator()(i, j) = m.get(i, j);
-            }
-        }
-        for(int i = 0; i < 4; i++) {
-            operator()(3, i) = 0;
-            operator()(i, 3) = 0;
-        }
-    }
-
-    inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3)
-    {
-        m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w;
-        m_data[ 4] = v1.x; m_data[ 5] = v1.y; m_data[ 6] = v1.z; m_data[ 7] = v1.w;
-        m_data[ 8] = v2.x; m_data[ 9] = v2.y; m_data[10] = v2.z; m_data[11] = v2.w;
-        m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w;
-    }
-
-    /*inline Matrix::Matrix(const float m[])
-    {
-        for(int i = 0; i < 16; i++) {
-            m_data[i] = m[i];
-        }
-    }*/
-
-
-    // Accessors
-    inline float Matrix::data(uint idx) const
-    {
-        nvDebugCheck(idx < 16);
-        return m_data[idx];
-    }
-    inline float & Matrix::data(uint idx)
-    {
-        nvDebugCheck(idx < 16);
-        return m_data[idx];
-    }
-    inline float Matrix::get(uint row, uint col) const
-    {
-        nvDebugCheck(row < 4 && col < 4);
-        return m_data[col * 4 + row];
-    }
-    inline float Matrix::operator()(uint row, uint col) const
-    {
-        nvDebugCheck(row < 4 && col < 4);
-        return m_data[col * 4 + row];
-    }
-    inline float & Matrix::operator()(uint row, uint col)
-    {
-        nvDebugCheck(row < 4 && col < 4);
-        return m_data[col * 4 + row];
-    }
-
-    inline const float * Matrix::ptr() const
-    {
-        return m_data;
-    }
-
-    inline Vector4 Matrix::row(uint i) const
-    {
-        nvDebugCheck(i < 4);
-        return Vector4(get(i, 0), get(i, 1), get(i, 2), get(i, 3));
-    }
-
-    inline Vector4 Matrix::column(uint i) const
-    {
-        nvDebugCheck(i < 4);
-        return Vector4(get(0, i), get(1, i), get(2, i), get(3, i));
-    }
-
-    inline void Matrix::zero()
-    {
-        m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0;
-        m_data[4] = 0; m_data[5] = 0; m_data[6] = 0; m_data[7] = 0;
-        m_data[8] = 0; m_data[9] = 0; m_data[10] = 0; m_data[11] = 0;
-        m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 0;
-    }
-
-    inline void Matrix::identity()
-    {
-        m_data[0] = 1; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0;
-        m_data[4] = 0; m_data[5] = 1; m_data[6] = 0; m_data[7] = 0;
-        m_data[8] = 0; m_data[9] = 0; m_data[10] = 1; m_data[11] = 0;
-        m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 1;
-    }
-
-    // Apply scale.
-    inline void Matrix::scale(float s)
-    {
-        m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s;
-        m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s;
-        m_data[8] *= s; m_data[9] *= s; m_data[10] *= s; m_data[11] *= s;
-        m_data[12] *= s; m_data[13] *= s; m_data[14] *= s; m_data[15] *= s;
-    }
-
-    // Apply scale.
-    inline void Matrix::scale(Vector3::Arg s)
-    {
-        m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x; m_data[3] *= s.x;
-        m_data[4] *= s.y; m_data[5] *= s.y; m_data[6] *= s.y; m_data[7] *= s.y;
-        m_data[8] *= s.z; m_data[9] *= s.z; m_data[10] *= s.z; m_data[11] *= s.z;
-    }
-
-    // Apply translation.
-    inline void Matrix::translate(Vector3::Arg t)
-    {
-        m_data[12] = m_data[0] * t.x + m_data[4] * t.y + m_data[8]  * t.z + m_data[12];
-        m_data[13] = m_data[1] * t.x + m_data[5] * t.y + m_data[9]  * t.z + m_data[13];
-        m_data[14] = m_data[2] * t.x + m_data[6] * t.y + m_data[10] * t.z + m_data[14];
-        m_data[15] = m_data[3] * t.x + m_data[7] * t.y + m_data[11] * t.z + m_data[15];
-    }
-
-    Matrix rotation(float theta, float v0, float v1, float v2);
-
-    // Apply rotation.
-    inline void Matrix::rotate(float theta, float v0, float v1, float v2)
-    {
-        Matrix R(rotation(theta, v0, v1, v2));
-        apply(R);
-    }
-
-    // Apply transform.
-    inline void Matrix::apply(Matrix::Arg m)
-    {
-        nvDebugCheck(this != &m);
-
-        for(int i = 0; i < 4; i++) {
-            const float ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3);
-            m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0);
-            m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1);
-            m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2);
-            m_data[12+ i] = ai0 * m(0,3) + ai1 * m(1,3) + ai2 * m(2,3) + ai3 * m(3,3);
-        }
-    }
-
-    // Get scale matrix.
-    inline Matrix scale(Vector3::Arg s)
-    {
-        Matrix m(identity);
-        m(0,0) = s.x;
-        m(1,1) = s.y;
-        m(2,2) = s.z;
-        return m;
-    }
-
-    // Get scale matrix.
-    inline Matrix scale(float s)
-    {
-        Matrix m(identity);
-        m(0,0) = m(1,1) = m(2,2) = s;
-        return m;
-    }
-
-    // Get translation matrix.
-    inline Matrix translation(Vector3::Arg t)
-    {
-        Matrix m(identity);
-        m(0,3) = t.x;
-        m(1,3) = t.y;
-        m(2,3) = t.z;
-        return m;
-    }
-
-    // Get rotation matrix.
-    inline Matrix rotation(float theta, float v0, float v1, float v2)
-    {
-        float cost = cosf(theta);
-        float sint = sinf(theta);
-
-        Matrix m(identity);
-
-        if( 1 == v0 && 0 == v1 && 0 == v2 ) {
-            m(1,1) = cost; m(2,1) = -sint;
-            m(1,2) = sint; m(2,2) = cost;
-        }
-        else if( 0 == v0  && 1 == v1 && 0 == v2 ) {
-            m(0,0) = cost; m(2,0) = sint;
-            m(1,2) = -sint; m(2,2) = cost;
-        }
-        else if( 0 == v0 && 0 == v1 && 1 == v2 ) {
-            m(0,0) = cost; m(1,0) = -sint;
-            m(0,1) = sint; m(1,1) = cost;
-        } 
-        else {
-            float a2, b2, c2;
-            a2 = v0 * v0;
-            b2 = v1 * v1;
-            c2 = v2 * v2;
-
-            float iscale = 1.0f / sqrtf(a2 + b2 + c2);
-            v0 *= iscale;
-            v1 *= iscale;
-            v2 *= iscale;
-
-            float abm, acm, bcm;
-            float mcos, asin, bsin, csin;
-            mcos = 1.0f - cost;
-            abm = v0 * v1 * mcos;
-            acm = v0 * v2 * mcos;
-            bcm = v1 * v2 * mcos;
-            asin = v0 * sint;
-            bsin = v1 * sint;
-            csin = v2 * sint;
-            m(0,0) = a2 * mcos + cost;
-            m(1,0) = abm - csin;
-            m(2,0) = acm + bsin;
-            m(3,0) = abm + csin;
-            m(1,1) = b2 * mcos + cost;
-            m(2,1) = bcm - asin;
-            m(3,1) = acm - bsin;
-            m(1,2) = bcm + asin;
-            m(2,2) = c2 * mcos + cost;
-        }
-        return m;
-    }
-
-    //Matrix rotation(float yaw, float pitch, float roll);
-    //Matrix skew(float angle, Vector3::Arg v1, Vector3::Arg v2);
-
-    // Get frustum matrix.
-    inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
-    {
-        Matrix m(0.0f);
-
-        float doubleznear = 2.0f * zNear;
-        float one_deltax = 1.0f / (xmax - xmin);
-        float one_deltay = 1.0f / (ymax - ymin);
-        float one_deltaz = 1.0f / (zFar - zNear);
-
-        m(0,0) = doubleznear * one_deltax;
-        m(1,1) = doubleznear * one_deltay;
-        m(0,2) = (xmax + xmin) * one_deltax;
-        m(1,2) = (ymax + ymin) * one_deltay;
-        m(2,2) = -(zFar + zNear) * one_deltaz;
-        m(3,2) = -1.0f;
-        m(2,3) = -(zFar * doubleznear) * one_deltaz;
-
-        return m;
-    }
-
-    // Get inverse frustum matrix.
-    inline Matrix frustumInverse(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
-    {
-        Matrix m(0.0f);
-
-        float one_doubleznear = 1.0f / (2.0f * zNear);
-        float one_doubleznearzfar = 1.0f / (2.0f * zNear * zFar);
-
-        m(0,0) = (xmax - xmin) * one_doubleznear;
-        m(0,3) = (xmax + xmin) * one_doubleznear;
-        m(1,1) = (ymax - ymin) * one_doubleznear;
-        m(1,3) = (ymax + ymin) * one_doubleznear;
-        m(2,3) = -1;
-        m(3,2) = -(zFar - zNear) * one_doubleznearzfar;
-        m(3,3) = (zFar + zNear) * one_doubleznearzfar;
-
-        return m;
-    }
-
-    // Get infinite frustum matrix.
-    inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear)
-    {
-        Matrix m(0.0f);
-
-        float doubleznear = 2.0f * zNear;
-        float one_deltax = 1.0f / (xmax - xmin);
-        float one_deltay = 1.0f / (ymax - ymin);
-        float nudge = 1.0; // 0.999;
-
-        m(0,0) = doubleznear * one_deltax;
-        m(1,1) = doubleznear * one_deltay;
-        m(0,2) = (xmax + xmin) * one_deltax;
-        m(1,2) = (ymax + ymin) * one_deltay;
-        m(2,2) = -1.0f * nudge;
-        m(3,2) = -1.0f;
-        m(2,3) = -doubleznear * nudge;
-
-        return m;
-    }
-
-    // Get perspective matrix.
-    inline Matrix perspective(float fovy, float aspect, float zNear, float zFar)
-    {
-        float xmax = zNear * tan(fovy / 2);
-        float xmin = -xmax;
-
-        float ymax = xmax / aspect;
-        float ymin = -ymax;
-
-        return frustum(xmin, xmax, ymin, ymax, zNear, zFar);	
-    }
-
-    // Get inverse perspective matrix.
-    inline Matrix perspectiveInverse(float fovy, float aspect, float zNear, float zFar)
-    {
-        float xmax = zNear * tan(fovy / 2);
-        float xmin = -xmax;
-
-        float ymax = xmax / aspect;
-        float ymin = -ymax;
-
-        return frustumInverse(xmin, xmax, ymin, ymax, zNear, zFar);	
-    }
-
-    // Get infinite perspective matrix.
-    inline Matrix perspective(float fovy, float aspect, float zNear)
-    {
-        float x = zNear * tan(fovy / 2);
-        float y = x / aspect;
-        return frustum( -x, x, -y, y, zNear );	
-    }
-
-    // Get matrix determinant.
-    inline float Matrix::determinant() const
-    {
-        return 
-            m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] +
-            m_data[2] * m_data[5] * m_data[11] * m_data[12] - m_data[1] * m_data[6] * m_data[11] * m_data[12] - m_data[3] * m_data[6] * m_data[ 8] * m_data[13] + m_data[2] * m_data[7] * m_data[ 8] * m_data[13] +
-            m_data[3] * m_data[4] * m_data[10] * m_data[13] - m_data[0] * m_data[7] * m_data[10] * m_data[13] - m_data[2] * m_data[4] * m_data[11] * m_data[13] + m_data[0] * m_data[6] * m_data[11] * m_data[13] +
-            m_data[3] * m_data[5] * m_data[ 8] * m_data[14] - m_data[1] * m_data[7] * m_data[ 8] * m_data[14] - m_data[3] * m_data[4] * m_data[ 9] * m_data[14] + m_data[0] * m_data[7] * m_data[ 9] * m_data[14] +
-            m_data[1] * m_data[4] * m_data[11] * m_data[14] - m_data[0] * m_data[5] * m_data[11] * m_data[14] - m_data[2] * m_data[5] * m_data[ 8] * m_data[15] + m_data[1] * m_data[6] * m_data[ 8] * m_data[15] +
-            m_data[2] * m_data[4] * m_data[ 9] * m_data[15] - m_data[0] * m_data[6] * m_data[ 9] * m_data[15] - m_data[1] * m_data[4] * m_data[10] * m_data[15] + m_data[0] * m_data[5] * m_data[10] * m_data[15];
-    }
-
-    inline Matrix transpose(Matrix::Arg m)
-    {
-        Matrix r;
-        for (int i = 0; i < 4; i++)
-        {
-            for (int j = 0; j < 4; j++)
-            {
-                r(i, j) = m(j, i);
-            }
-        }
-        return r;
-    }
-
-    // Inverse using Cramer's rule.
-    inline Matrix inverseCramer(Matrix::Arg m)
-    {
-        Matrix r;
-        r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15);
-        r.data( 1) = m.data(3)*m.data(10)*m.data(13) - m.data(2)*m.data(11)*m.data(13) - m.data(3)*m.data(9)*m.data(14) + m.data(1)*m.data(11)*m.data(14) + m.data(2)*m.data(9)*m.data(15) - m.data(1)*m.data(10)*m.data(15);
-        r.data( 2) = m.data(2)*m.data( 7)*m.data(13) - m.data(3)*m.data( 6)*m.data(13) + m.data(3)*m.data(5)*m.data(14) - m.data(1)*m.data( 7)*m.data(14) - m.data(2)*m.data(5)*m.data(15) + m.data(1)*m.data( 6)*m.data(15);
-        r.data( 3) = m.data(3)*m.data( 6)*m.data( 9) - m.data(2)*m.data( 7)*m.data( 9) - m.data(3)*m.data(5)*m.data(10) + m.data(1)*m.data( 7)*m.data(10) + m.data(2)*m.data(5)*m.data(11) - m.data(1)*m.data( 6)*m.data(11);
-        r.data( 4) = m.data(7)*m.data(10)*m.data(12) - m.data(6)*m.data(11)*m.data(12) - m.data(7)*m.data(8)*m.data(14) + m.data(4)*m.data(11)*m.data(14) + m.data(6)*m.data(8)*m.data(15) - m.data(4)*m.data(10)*m.data(15);
-        r.data( 5) = m.data(2)*m.data(11)*m.data(12) - m.data(3)*m.data(10)*m.data(12) + m.data(3)*m.data(8)*m.data(14) - m.data(0)*m.data(11)*m.data(14) - m.data(2)*m.data(8)*m.data(15) + m.data(0)*m.data(10)*m.data(15);
-        r.data( 6) = m.data(3)*m.data( 6)*m.data(12) - m.data(2)*m.data( 7)*m.data(12) - m.data(3)*m.data(4)*m.data(14) + m.data(0)*m.data( 7)*m.data(14) + m.data(2)*m.data(4)*m.data(15) - m.data(0)*m.data( 6)*m.data(15);
-        r.data( 7) = m.data(2)*m.data( 7)*m.data( 8) - m.data(3)*m.data( 6)*m.data( 8) + m.data(3)*m.data(4)*m.data(10) - m.data(0)*m.data( 7)*m.data(10) - m.data(2)*m.data(4)*m.data(11) + m.data(0)*m.data( 6)*m.data(11);
-        r.data( 8) = m.data(5)*m.data(11)*m.data(12) - m.data(7)*m.data( 9)*m.data(12) + m.data(7)*m.data(8)*m.data(13) - m.data(4)*m.data(11)*m.data(13) - m.data(5)*m.data(8)*m.data(15) + m.data(4)*m.data( 9)*m.data(15);
-        r.data( 9) = m.data(3)*m.data( 9)*m.data(12) - m.data(1)*m.data(11)*m.data(12) - m.data(3)*m.data(8)*m.data(13) + m.data(0)*m.data(11)*m.data(13) + m.data(1)*m.data(8)*m.data(15) - m.data(0)*m.data( 9)*m.data(15);
-        r.data(10) = m.data(1)*m.data( 7)*m.data(12) - m.data(3)*m.data( 5)*m.data(12) + m.data(3)*m.data(4)*m.data(13) - m.data(0)*m.data( 7)*m.data(13) - m.data(1)*m.data(4)*m.data(15) + m.data(0)*m.data( 5)*m.data(15);
-        r.data(11) = m.data(3)*m.data( 5)*m.data( 8) - m.data(1)*m.data( 7)*m.data( 8) - m.data(3)*m.data(4)*m.data( 9) + m.data(0)*m.data( 7)*m.data( 9) + m.data(1)*m.data(4)*m.data(11) - m.data(0)*m.data( 5)*m.data(11);
-        r.data(12) = m.data(6)*m.data( 9)*m.data(12) - m.data(5)*m.data(10)*m.data(12) - m.data(6)*m.data(8)*m.data(13) + m.data(4)*m.data(10)*m.data(13) + m.data(5)*m.data(8)*m.data(14) - m.data(4)*m.data( 9)*m.data(14);
-        r.data(13) = m.data(1)*m.data(10)*m.data(12) - m.data(2)*m.data( 9)*m.data(12) + m.data(2)*m.data(8)*m.data(13) - m.data(0)*m.data(10)*m.data(13) - m.data(1)*m.data(8)*m.data(14) + m.data(0)*m.data( 9)*m.data(14);
-        r.data(14) = m.data(2)*m.data( 5)*m.data(12) - m.data(1)*m.data( 6)*m.data(12) - m.data(2)*m.data(4)*m.data(13) + m.data(0)*m.data( 6)*m.data(13) + m.data(1)*m.data(4)*m.data(14) - m.data(0)*m.data( 5)*m.data(14);
-        r.data(15) = m.data(1)*m.data( 6)*m.data( 8) - m.data(2)*m.data( 5)*m.data( 8) + m.data(2)*m.data(4)*m.data( 9) - m.data(0)*m.data( 6)*m.data( 9) - m.data(1)*m.data(4)*m.data(10) + m.data(0)*m.data( 5)*m.data(10);
-        r.scale(1.0f / m.determinant());
-        return r;
-    }
-
-    inline Matrix isometryInverse(Matrix::Arg m)
-    {
-        Matrix r(identity);
-
-        // transposed 3x3 upper left matrix
-        for (int i = 0; i < 3; i++)
-        {
-            for (int j = 0; j < 3; j++)
-            {
-                r(i, j) = m(j, i);
-            }
-        }
-
-        // translate by the negative offsets
-        r.translate(-Vector3(m.data(12), m.data(13), m.data(14)));
-
-        return r;
-    }
-
-    // Transform the given 3d point with the given matrix.
-    inline Vector3 transformPoint(Matrix::Arg m, Vector3::Arg p)
-    {
-        return Vector3(
-            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + m(0,3),
-            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + m(1,3),
-            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + m(2,3));
-    }
-
-    // Transform the given 3d vector with the given matrix.
-    inline Vector3 transformVector(Matrix::Arg m, Vector3::Arg p)
-    {
-        return Vector3(
-            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2),
-            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2),
-            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2));
-    }
-
-    // Transform the given 4d vector with the given matrix.
-    inline Vector4 transform(Matrix::Arg m, Vector4::Arg p)
-    {
-        return Vector4(
-            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + p.w * m(0,3),
-            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + p.w * m(1,3),
-            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + p.w * m(2,3),
-            p.x * m(3,0) + p.y * m(3,1) + p.z * m(3,2) + p.w * m(3,3));
-    }
-
-    inline Matrix mul(Matrix::Arg a, Matrix::Arg b)
-    {
-        // @@ Is this the right order? mul(a, b) = b * a
-        Matrix m = a;
-        m.apply(b);
-        return m;
-    }
-
-    inline void Matrix::operator+=(const Matrix & m)
-    {
-        for(int i = 0; i < 16; i++) {
-            m_data[i] += m.m_data[i];
-        }
-    }
-
-    inline void Matrix::operator-=(const Matrix & m)
-    {
-        for(int i = 0; i < 16; i++) {
-            m_data[i] -= m.m_data[i];
-        }
-    }
-
-    inline Matrix operator+(const Matrix & a, const Matrix & b)
-    {
-        Matrix m = a;
-        m += b;
-        return m;
-    }
-
-    inline Matrix operator-(const Matrix & a, const Matrix & b)
-    {
-        Matrix m = a;
-        m -= b;
-        return m;
-    }
-
-
-} // nv namespace
-
-
-#if 0 // old code.
-/** @name Special matrices. */
-//@{
-/** Generate a translation matrix. */
-void TranslationMatrix(const Vec3 & v) {
-    data[0] = 1; data[1] = 0; data[2] = 0; data[3] = 0;
-    data[4] = 0; data[5] = 1; data[6] = 0; data[7] = 0;
-    data[8] = 0; data[9] = 0; data[10] = 1; data[11] = 0;
-    data[12] = v.x; data[13] = v.y; data[14] = v.z; data[15] = 1;
-}
-
-/** Rotate theta degrees around v. */
-void RotationMatrix( float theta, float v0, float v1, float v2 ) {
-    float cost = cos(theta);
-    float sint = sin(theta);
-
-    if( 1 == v0 && 0 == v1 && 0 == v2 ) {
-        data[0] = 1.0f;	data[1] = 0.0f;	data[2] = 0.0f;	data[3] = 0.0f;
-        data[4] = 0.0f;	data[5] = cost;	data[6] = -sint;data[7] = 0.0f;
-        data[8] = 0.0f;	data[9] = sint;	data[10] = cost;data[11] = 0.0f;
-        data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
-    }
-    else if( 0 == v0  && 1 == v1 && 0 == v2 ) {
-        data[0] = cost;	data[1] = 0.0f;	data[2] = sint;	data[3] = 0.0f;
-        data[4] = 0.0f;	data[5] = 1.0f;	data[6] = 0.0f;	data[7] = 0.0f;
-        data[8] = -sint;data[9] = 0.0f;data[10] = cost;	data[11] = 0.0f;
-        data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
-    }
-    else if( 0 == v0 && 0 == v1 && 1 == v2 ) {
-        data[0] = cost;	data[1] = -sint;data[2] = 0.0f;	data[3] = 0.0f;
-        data[4] = sint; data[5] = cost;	data[6] = 0.0f;	data[7] = 0.0f;
-        data[8] = 0.0f;	data[9] = 0.0f;	data[10] = 1.0f;data[11] = 0.0f;
-        data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
-    } 
-    else {
-        //we need scale a,b,c to unit length.
-        float a2, b2, c2;
-        a2 = v0 * v0;
-        b2 = v1 * v1;
-        c2 = v2 * v2;
-
-        float iscale = 1.0f / sqrtf(a2 + b2 + c2);
-        v0 *= iscale;
-        v1 *= iscale;
-        v2 *= iscale;
-
-        float abm, acm, bcm;
-        float mcos, asin, bsin, csin;
-        mcos = 1.0f - cost;
-        abm = v0 * v1 * mcos;
-        acm = v0 * v2 * mcos;
-        bcm = v1 * v2 * mcos;
-        asin = v0 * sint;
-        bsin = v1 * sint;
-        csin = v2 * sint;
-        data[0] = a2 * mcos + cost;
-        data[1] = abm - csin;
-        data[2] = acm + bsin;
-        data[3] = abm + csin;
-        data[4] = 0.0f;
-        data[5] = b2 * mcos + cost;
-        data[6] = bcm - asin;
-        data[7] = acm - bsin;
-        data[8] = 0.0f;
-        data[9] = bcm + asin;
-        data[10] = c2 * mcos + cost;
-        data[11] = 0.0f;
-        data[12] = 0.0f;
-        data[13] = 0.0f;
-        data[14] = 0.0f;
-        data[15] = 1.0f;
-    }
-}
-
-/*
-void SkewMatrix(float angle, const Vec3 & v1, const Vec3 & v2) {
-v1.Normalize();
-v2.Normalize();
-
-Vec3 v3;
-v3.Cross(v1, v2);
-v3.Normalize();
-
-// Get skew factor.
-float costheta = Vec3DotProduct(v1, v2);
-float sintheta = Real.Sqrt(1 - costheta * costheta);
-float skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta;
-
-// Build orthonormal matrix.
-v1 = FXVector3.Cross(v3, v2);
-v1.Normalize();
-
-Matrix R = Matrix::Identity;
-R[0, 0] = v3.X;	// Not sure this is in the correct order...
-R[1, 0] = v3.Y;
-R[2, 0] = v3.Z;
-R[0, 1] = v1.X;
-R[1, 1] = v1.Y;
-R[2, 1] = v1.Z;
-R[0, 2] = v2.X;
-R[1, 2] = v2.Y;
-R[2, 2] = v2.Z;
-
-// Build skew matrix.
-Matrix S = Matrix::Identity;
-S[2, 1] = -skew;
-
-// Return skew transform.
-return R * S * R.Transpose;	// Not sure this is in the correct order...
-}
-*/
-
-/**
-* Generate rotation matrix for the euler angles. This is the same as computing
-* 3 rotation matrices and multiplying them together in our custom order.
-*
-* @todo Have to recompute this code for our new convention.
-**/
-void RotationMatrix( float yaw, float pitch, float roll ) {
-    float sy = sin(yaw+ToRadian(90));
-    float cy = cos(yaw+ToRadian(90));
-    float sp = sin(pitch-ToRadian(90));
-    float cp = cos(pitch-ToRadian(90));
-    float sr = sin(roll);
-    float cr = cos(roll);
-
-    data[0] = cr*cy + sr*sp*sy;
-    data[1] = cp*sy;
-    data[2] = -sr*cy + cr*sp*sy;
-    data[3] = 0;
-
-    data[4] = -cr*sy + sr*sp*cy;
-    data[5] = cp*cy;
-    data[6] = sr*sy + cr*sp*cy;
-    data[7] = 0;
-
-    data[8] = sr*cp;
-    data[9] = -sp;
-    data[10] = cr*cp;
-    data[11] = 0;
-
-    data[12] = 0;
-    data[13] = 0;
-    data[14] = 0;
-    data[15] = 1;
-}
-
-/** Create a frustum matrix with the far plane at the infinity. */
-void Frustum( float xmin, float xmax, float ymin, float ymax, float zNear, float zFar ) {
-    float one_deltax, one_deltay, one_deltaz, doubleznear;
-
-    doubleznear = 2.0f * zNear;
-    one_deltax = 1.0f / (xmax - xmin);
-    one_deltay = 1.0f / (ymax - ymin);
-    one_deltaz = 1.0f / (zFar - zNear);
-
-    data[0] = (float)(doubleznear * one_deltax);
-    data[1] = 0.0f;
-    data[2] = 0.0f;
-    data[3] = 0.0f;
-    data[4] = 0.0f;
-    data[5] = (float)(doubleznear * one_deltay);
-    data[6] = 0.f;
-    data[7] = 0.f;
-    data[8] = (float)((xmax + xmin) * one_deltax);
-    data[9] = (float)((ymax + ymin) * one_deltay);
-    data[10] = (float)(-(zFar + zNear) * one_deltaz);
-    data[11] = -1.f;
-    data[12] = 0.f;
-    data[13] = 0.f;
-    data[14] = (float)(-(zFar * doubleznear) * one_deltaz);
-    data[15] = 0.f;
-}
-
-/** Create a frustum matrix with the far plane at the infinity. */
-void FrustumInf( float xmin, float xmax, float ymin, float ymax, float zNear ) {
-    float one_deltax, one_deltay, doubleznear, nudge;
-
-    doubleznear = 2.0f * zNear;
-    one_deltax = 1.0f / (xmax - xmin);
-    one_deltay = 1.0f / (ymax - ymin);
-    nudge = 1.0; // 0.999;
-
-    data[0] = doubleznear * one_deltax;
-    data[1] = 0.0f;
-    data[2] = 0.0f;
-    data[3] = 0.0f;
-
-    data[4] = 0.0f;
-    data[5] = doubleznear * one_deltay;
-    data[6] = 0.f;
-    data[7] = 0.f;
-
-    data[8] = (xmax + xmin) * one_deltax;
-    data[9] = (ymax + ymin) * one_deltay;
-    data[10] = -1.0f * nudge;
-    data[11] = -1.0f;
-
-    data[12] = 0.f;
-    data[13] = 0.f;
-    data[14] = -doubleznear * nudge;
-    data[15] = 0.f;
-}
-
-/** Create an inverse frustum matrix with the far plane at the infinity. */
-void FrustumInfInv( float left, float right, float bottom, float top, float zNear ) {
-    // this matrix is wrong (not tested floatly) I think it should be transposed.
-    data[0] = (right - left) / (2 * zNear);
-    data[1] = 0;
-    data[2] = 0;
-    data[3] = (right + left) / (2 * zNear);
-    data[4] = 0;
-    data[5] = (top - bottom) / (2 * zNear);
-    data[6] = 0;
-    data[7] = (top + bottom) / (2 * zNear);
-    data[8] = 0;
-    data[9] = 0;
-    data[10] = 0;
-    data[11] = -1;
-    data[12] = 0;
-    data[13] = 0;
-    data[14] = -1 / (2 * zNear);
-    data[15] = 1 / (2 * zNear);
-}
-
-/** Create an homogeneous projection matrix. */
-void Perspective( float fov, float aspect, float zNear, float zFar ) {
-    float xmin, xmax, ymin, ymax;
-
-    xmax = zNear * tan( fov/2 );
-    xmin = -xmax;
-
-    ymax = xmax / aspect;
-    ymin = -ymax;
-
-    Frustum(xmin, xmax, ymin, ymax, zNear, zFar);
-}
-
-/** Create a projection matrix with the far plane at the infinity. */
-void PerspectiveInf( float fov, float aspect, float zNear ) {
-    float x = zNear * tan( fov/2 );
-    float y = x / aspect;
-    FrustumInf( -x, x, -y, y, zNear );
-}
-
-/** Create an inverse projection matrix with far plane at the infinity. */
-void PerspectiveInfInv( float fov, float aspect, float zNear ) {
-    float x = zNear * tan( fov/2 );
-    float y = x / aspect;
-    FrustumInfInv( -x, x, -y, y, zNear );
-}
-
-/** Build bone matrix from quatertion and offset. */
-void BoneMatrix(const Quat & q, const Vec3 & offset) {
-    float x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz;
-
-    // calculate coefficients
-    x2 = q.x + q.x;
-    y2 = q.y + q.y;
-    z2 = q.z + q.z;
-
-    xx = q.x * x2;   xy = q.x * y2;   xz = q.x * z2;
-    yy = q.y * y2;   yz = q.y * z2;   zz = q.z * z2;
-    wx = q.w * x2;   wy = q.w * y2;   wz = q.w * z2;
-
-    data[0] = 1.0f - (yy + zz); 	
-    data[1] = xy - wz;
-    data[2] = xz + wy;		
-    data[3] = 0.0f;
-
-    data[4] = xy + wz;		
-    data[5] = 1.0f - (xx + zz);
-    data[6] = yz - wx;		
-    data[7] = 0.0f;
-
-    data[8] = xz - wy;		
-    data[9] = yz + wx;
-    data[10] = 1.0f - (xx + yy);		
-    data[11] = 0.0f;
-
-    data[12] = offset.x;
-    data[13] = offset.y;
-    data[14] = offset.z;			
-    data[15] = 1.0f;
-}
-
-//@}
-
-
-/** @name Transformations: */
-//@{
-
-/** Apply a general scale. */
-void Scale( float x, float y, float z ) {
-    data[0] *= x;	data[4] *= y;	data[8]  *= z;
-    data[1] *= x;	data[5] *= y;	data[9]  *= z;
-    data[2] *= x;	data[6] *= y;	data[10] *= z;
-    data[3] *= x;	data[7] *= y;	data[11] *= z;
-}
-
-/** Apply a rotation of theta degrees around the axis v*/
-void Rotate( float theta, const Vec3 & v ) {
-    Matrix b;
-    b.RotationMatrix( theta, v[0], v[1], v[2] );
-    Multiply4x3( b );
-}
-
-/** Apply a rotation of theta degrees around the axis v*/
-void Rotate( float theta, float v0, float v1, float v2 ) {
-    Matrix b;
-    b.RotationMatrix( theta, v0, v1, v2 );
-    Multiply4x3( b );
-}
-
-/**
-* Translate the matrix by t. This is the same as multiplying by a
-* translation matrix with the given offset.
-* this = T * this
-*/
-void Translate( const Vec3 &t ) {
-    data[12] = data[0] * t.x + data[4] * t.y + data[8]  * t.z + data[12];
-    data[13] = data[1] * t.x + data[5] * t.y + data[9]  * t.z + data[13];
-    data[14] = data[2] * t.x + data[6] * t.y + data[10] * t.z + data[14];
-    data[15] = data[3] * t.x + data[7] * t.y + data[11] * t.z + data[15];
-}
-
-/** 
-* Translate the matrix by x, y, z. This is the same as multiplying by a 
-* translation matrix with the given offsets.
-*/
-void Translate( float x, float y, float z ) {
-    data[12] = data[0] * x + data[4] * y + data[8]  * z + data[12];
-    data[13] = data[1] * x + data[5] * y + data[9]  * z + data[13];
-    data[14] = data[2] * x + data[6] * y + data[10] * z + data[14];
-    data[15] = data[3] * x + data[7] * y + data[11] * z + data[15];
-}
-
-/** Compute the transposed matrix. */
-void Transpose() {
-    piSwap(data[1], data[4]);
-    piSwap(data[2], data[8]);
-    piSwap(data[6], data[9]);
-    piSwap(data[3], data[12]);
-    piSwap(data[7], data[13]);
-    piSwap(data[11], data[14]);
-}
-
-/** Compute the inverse of a rigid-body/isometry/orthonormal matrix. */
-void IsometryInverse() {
-    // transposed 3x3 upper left matrix
-    piSwap(data[1], data[4]);
-    piSwap(data[2], data[8]);
-    piSwap(data[6], data[9]);
-
-    // translate by the negative offsets
-    Vec3 v(-data[12], -data[13], -data[14]);
-    data[12] = data[13] = data[14] = 0;
-    Translate(v);
-}
-
-/** Compute the inverse of the affine portion of this matrix. */
-void AffineInverse() {
-    data[12] = data[13] = data[14] = 0;
-    Transpose();
-}
-//@}
-
-/** @name Matrix operations: */
-//@{
-
-/** Return the determinant of this matrix. */
-float Determinant() const {
-    return	data[0] * data[5] * data[10] * data[15] + 
-        data[1] * data[6] * data[11] * data[12] +
-        data[2] * data[7] * data[ 8] * data[13] +
-        data[3] * data[4] * data[ 9] * data[14] -
-        data[3] * data[6] * data[ 9] * data[12] -
-        data[2] * data[5] * data[ 8] * data[15] -
-        data[1] * data[4] * data[11] * data[14] -
-        data[0] * data[7] * data[10] * data[12];
-}
-
-
-/** Standard matrix product: this *= B. */
-void Multiply4x4( const Matrix & restrict B ) {
-    Multiply4x4(*this, B);
-}
-
-/** Standard matrix product: this = A * B. this != B*/
-void Multiply4x4( const Matrix & A, const Matrix & restrict B ) {
-    piDebugCheck(this != &B);
-
-    for(int i = 0; i < 4; i++) {
-        const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
-        GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
-        GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
-        GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
-        GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3);
-    }
-
-    /* Unrolled but does not allow this == A
-    data[0] = A.data[0] * B.data[0] + A.data[4] * B.data[1] + A.data[8] * B.data[2] + A.data[12] * B.data[3];
-    data[1] = A.data[1] * B.data[0] + A.data[5] * B.data[1] + A.data[9] * B.data[2] + A.data[13] * B.data[3];
-    data[2] = A.data[2] * B.data[0] + A.data[6] * B.data[1] + A.data[10] * B.data[2] + A.data[14] * B.data[3];
-    data[3] = A.data[3] * B.data[0] + A.data[7] * B.data[1] + A.data[11] * B.data[2] + A.data[15] * B.data[3];
-    data[4] = A.data[0] * B.data[4] + A.data[4] * B.data[5] + A.data[8] * B.data[6] + A.data[12] * B.data[7];
-    data[5] = A.data[1] * B.data[4] + A.data[5] * B.data[5] + A.data[9] * B.data[6] + A.data[13] * B.data[7];
-    data[6] = A.data[2] * B.data[4] + A.data[6] * B.data[5] + A.data[10] * B.data[6] + A.data[14] * B.data[7];
-    data[7] = A.data[3] * B.data[4] + A.data[7] * B.data[5] + A.data[11] * B.data[6] + A.data[15] * B.data[7];
-    data[8] = A.data[0] * B.data[8] + A.data[4] * B.data[9] + A.data[8] * B.data[10] + A.data[12] * B.data[11];
-    data[9] = A.data[1] * B.data[8] + A.data[5] * B.data[9] + A.data[9] * B.data[10] + A.data[13] * B.data[11];
-    data[10]= A.data[2] * B.data[8] + A.data[6] * B.data[9] + A.data[10] * B.data[10] + A.data[14] * B.data[11];
-    data[11]= A.data[3] * B.data[8] + A.data[7] * B.data[9] + A.data[11] * B.data[10] + A.data[15] * B.data[11];
-    data[12]= A.data[0] * B.data[12] + A.data[4] * B.data[13] + A.data[8] * B.data[14] + A.data[12] * B.data[15];
-    data[13]= A.data[1] * B.data[12] + A.data[5] * B.data[13] + A.data[9] * B.data[14] + A.data[13] * B.data[15];
-    data[14]= A.data[2] * B.data[12] + A.data[6] * B.data[13] + A.data[10] * B.data[14] + A.data[14] * B.data[15];
-    data[15]= A.data[3] * B.data[12] + A.data[7] * B.data[13] + A.data[11] * B.data[14] + A.data[15] * B.data[15];
-    */
-}
-
-/** Standard matrix product: this *= B. */
-void Multiply4x3( const Matrix & restrict B ) {
-    Multiply4x3(*this, B);
-}
-
-/** Standard product of matrices, where the last row is [0 0 0 1]. */
-void Multiply4x3( const Matrix & A, const Matrix & restrict B ) {
-    piDebugCheck(this != &B);
-
-    for(int i = 0; i < 3; i++) {
-        const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
-        GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
-        GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
-        GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
-        GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3);
-    }
-    data[3] = 0.0f; data[7] = 0.0f; data[11] = 0.0f; data[15] = 1.0f;
-
-    /* Unrolled but does not allow this == A
-    data[0] = a.data[0] * b.data[0] + a.data[4] * b.data[1] + a.data[8] * b.data[2] + a.data[12] * b.data[3];
-    data[1] = a.data[1] * b.data[0] + a.data[5] * b.data[1] + a.data[9] * b.data[2] + a.data[13] * b.data[3];
-    data[2] = a.data[2] * b.data[0] + a.data[6] * b.data[1] + a.data[10] * b.data[2] + a.data[14] * b.data[3];
-    data[3] = 0.0f;
-    data[4] = a.data[0] * b.data[4] + a.data[4] * b.data[5] + a.data[8] * b.data[6] + a.data[12] * b.data[7];
-    data[5] = a.data[1] * b.data[4] + a.data[5] * b.data[5] + a.data[9] * b.data[6] + a.data[13] * b.data[7];
-    data[6] = a.data[2] * b.data[4] + a.data[6] * b.data[5] + a.data[10] * b.data[6] + a.data[14] * b.data[7];
-    data[7] = 0.0f;
-    data[8] = a.data[0] * b.data[8] + a.data[4] * b.data[9] + a.data[8] * b.data[10] + a.data[12] * b.data[11];
-    data[9] = a.data[1] * b.data[8] + a.data[5] * b.data[9] + a.data[9] * b.data[10] + a.data[13] * b.data[11];
-    data[10]= a.data[2] * b.data[8] + a.data[6] * b.data[9] + a.data[10] * b.data[10] + a.data[14] * b.data[11];
-    data[11]= 0.0f;
-    data[12]= a.data[0] * b.data[12] + a.data[4] * b.data[13] + a.data[8] * b.data[14] + a.data[12] * b.data[15];
-    data[13]= a.data[1] * b.data[12] + a.data[5] * b.data[13] + a.data[9] * b.data[14] + a.data[13] * b.data[15];
-    data[14]= a.data[2] * b.data[12] + a.data[6] * b.data[13] + a.data[10] * b.data[14] + a.data[14] * b.data[15];
-    data[15]= 1.0f;
-    */
-}
-//@}
-
-
-/** @name Vector operations: */
-//@{
-
-/** Transform 3d vector (w=0). */
-void TransformVec3(const Vec3 & restrict orig, Vec3 * restrict dest) const {
-    piDebugCheck(&orig != dest);
-    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8];
-    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9];
-    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10];
-}
-/** Transform 3d vector by the transpose (w=0). */
-void TransformVec3T(const Vec3 & restrict orig, Vec3 * restrict dest) const {
-    piDebugCheck(&orig != dest);
-    dest->x = orig.x * data[0] + orig.y * data[1] + orig.z * data[2];
-    dest->y = orig.x * data[4] + orig.y * data[5] + orig.z * data[6];
-    dest->z = orig.x * data[8] + orig.y * data[9] + orig.z * data[10];
-}
-
-/** Transform a 3d homogeneous vector, where the fourth coordinate is assumed to be 1. */
-void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const {
-    piDebugCheck(&orig != dest);
-    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
-    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
-    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
-}
-
-/** Transform a point, normalize it, and return w. */
-float TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const {
-    piDebugCheck(&orig != dest);
-    float w;
-    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
-    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
-    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
-    w = 1 / (orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]);
-    *dest *= w;
-    return w;
-}
-
-/** Transform a point and return w. */
-float TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const {
-    piDebugCheck(&orig != dest);
-    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
-    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
-    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
-    return orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15];
-}
-
-/** Transform a normalized 3d point by a 4d matrix and return the resulting 4d vector. */
-void TransformVec4(const Vec3 & orig, Vec4 * dest) const {
-    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
-    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
-    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
-    dest->w = orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15];
-}
-//@}
-
-/** @name Matrix analysis. */
-//@{
-
-/** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */
-void GetEulerAnglesZYZ(float * s, float * t, float * r) const {
-    if( GetElem(2,2) < 1.0f ) {
-        if( GetElem(2,2) > -1.0f ) {
-            // 	cs*ct*cr-ss*sr 		-ss*ct*cr-cs*sr		st*cr
-            //	cs*ct*sr+ss*cr		-ss*ct*sr+cs*cr		st*sr
-            //	-cs*st				ss*st				ct
-            *s = atan2(GetElem(1,2), -GetElem(0,2));
-            *t = acos(GetElem(2,2));
-            *r = atan2(GetElem(2,1), GetElem(2,0));		
-        }
-        else {
-            // 	-c(s-r)	 	s(s-r)		0
-            //	s(s-r)		c(s-r)		0
-            //	0			0			-1
-            *s = atan2(GetElem(0, 1), -GetElem(0, 0)); // = s-r
-            *t = PI;
-            *r = 0;
-        }
-    }
-    else {
-        // 	c(s+r)		-s(s+r)		0
-        //	s(s+r)		c(s+r)		0
-        //	0			0			1
-        *s = atan2(GetElem(0, 1), GetElem(0, 0)); // = s+r
-        *t = 0;
-        *r = 0;
-    }
-}
-
-//@}
-
-MATHLIB_API friend PiStream & operator<< ( PiStream & s, Matrix & m );
-
-/** Print to debug output. */
-void Print() const {
-    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[0], data[4], data[8], data[12] );
-    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[1], data[5], data[9], data[13] );
-    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[2], data[6], data[10], data[14] );
-    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[3], data[7], data[11], data[15] );
-}
-
-
-public:
-
-    float data[16];
-
-};
-#endif
-
-
-#endif // NV_MATH_MATRIX_INL
diff --git a/thirdparty/thekla_atlas/nvmath/Morton.h b/thirdparty/thekla_atlas/nvmath/Morton.h
deleted file mode 100644
index 10e0d8152a..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Morton.h
+++ /dev/null
@@ -1,83 +0,0 @@
-
-// Code from ryg:
-// http://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
-
-
-// "Insert" a 0 bit after each of the 16 low bits of x
-inline uint32 part1By1(uint32 x)
-{
-	x &= 0x0000ffff;                  // x = ---- ---- ---- ---- fedc ba98 7654 3210
-	x = (x ^ (x <<  8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
-	x = (x ^ (x <<  4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
-	x = (x ^ (x <<  2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
-	x = (x ^ (x <<  1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
-	return x;
-}
-
-// "Insert" two 0 bits after each of the 10 low bits of x
-inline uint32 part1By2(uint32 x)
-{
-	x &= 0x000003ff;                  // x = ---- ---- ---- ---- ---- --98 7654 3210
-	x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
-	x = (x ^ (x <<  8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
-	x = (x ^ (x <<  4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
-	x = (x ^ (x <<  2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
-	return x;
-}
-
-inline uint32 encodeMorton2(uint32 x, uint32 y)
-{
-	return (part1By1(y) << 1) + part1By1(x);
-}
-
-inline uint32 encodeMorton3(uint32 x, uint32 y, uint32 z)
-{
-	return (part1By2(z) << 2) + (part1By2(y) << 1) + part1By2(x);
-}
-
-// Inverse of part1By1 - "delete" all odd-indexed bits
-inline uint32 compact1By1(uint32 x)
-{
-	x &= 0x55555555;                  // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
-	x = (x ^ (x >>  1)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
-	x = (x ^ (x >>  2)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
-	x = (x ^ (x >>  4)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
-	x = (x ^ (x >>  8)) & 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210
-	return x;
-}
-
-// Inverse of part1By2 - "delete" all bits not at positions divisible by 3
-inline uint32 compact1By2(uint32 x)
-{
-	x &= 0x09249249;                  // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
-	x = (x ^ (x >>  2)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
-	x = (x ^ (x >>  4)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
-	x = (x ^ (x >>  8)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
-	x = (x ^ (x >> 16)) & 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210
-	return x;
-}
-
-inline uint32 decodeMorton2X(uint32 code)
-{
-	return compact1By1(code >> 0);
-}
-
-inline uint32 decodeMorton2Y(uint32 code)
-{
-	return compact1By1(code >> 1);
-}
-
-inline uint32 decodeMorton3X(uint32 code)
-{
-	return compact1By2(code >> 0);
-}
-
-inline uint32 decodeMorton3Y(uint32 code)
-{
-	return compact1By2(code >> 1);
-}
-
-inline uint32 decodeMorton3Z(uint32 code)
-{
-	return compact1By2(code >> 2);
-}
-\ No newline at end of file
diff --git a/thirdparty/thekla_atlas/nvmath/Plane.cpp b/thirdparty/thekla_atlas/nvmath/Plane.cpp
deleted file mode 100644
index 8b54f829ad..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Plane.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#include "Plane.h"
-#include "Plane.inl"
-#include "Matrix.inl"
-
-namespace nv
-{
-    Plane transformPlane(const Matrix & m, const Plane & p)
-    {
-        Vector3 newVec = transformVector(m, p.vector());
-
-        Vector3 ptInPlane = p.offset() * p.vector();
-        ptInPlane = transformPoint(m, ptInPlane);
-
-        return Plane(newVec, ptInPlane);
-    }
-
-    Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c)
-    {
-        return dot(a.vector(), cross(b.vector(), c.vector())) * (
-            a.offset() * cross(b.vector(), c.vector()) + 
-            c.offset() * cross(a.vector(), b.vector()) +
-            b.offset() * cross(c.vector(), a.vector()));
-    }
-
-} // nv namespace
diff --git a/thirdparty/thekla_atlas/nvmath/Plane.h b/thirdparty/thekla_atlas/nvmath/Plane.h
deleted file mode 100644
index dc468b28e2..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Plane.h
+++ /dev/null
@@ -1,42 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#pragma once
-#ifndef NV_MATH_PLANE_H
-#define NV_MATH_PLANE_H
-
-#include "nvmath.h"
-#include "Vector.h"
-
-namespace nv
-{
-    class Matrix;
-
-    class NVMATH_CLASS Plane
-    {
-    public:
-        Plane();
-        Plane(float x, float y, float z, float w);
-        Plane(const Vector4 & v);
-        Plane(const Vector3 & v, float d);
-        Plane(const Vector3 & normal, const Vector3 & point);
-        Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2);
-
-        const Plane & operator=(const Plane & v);
-
-        Vector3 vector() const;
-        float offset() const;
-        Vector3 normal() const;
-
-        void operator*=(float s);
-
-        Vector4 v;
-    };
-
-    Plane transformPlane(const Matrix &, const Plane &);
-
-    Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c);
-
-
-} // nv namespace
-
-#endif // NV_MATH_PLANE_H
diff --git a/thirdparty/thekla_atlas/nvmath/Plane.inl b/thirdparty/thekla_atlas/nvmath/Plane.inl
deleted file mode 100644
index 2277e38cd5..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Plane.inl
+++ /dev/null
@@ -1,50 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#pragma once
-#ifndef NV_MATH_PLANE_INL
-#define NV_MATH_PLANE_INL
-
-#include "Plane.h"
-#include "Vector.inl"
-
-namespace nv
-{
-    inline Plane::Plane() {}
-    inline Plane::Plane(float x, float y, float z, float w) : v(x, y, z, w) {}
-    inline Plane::Plane(const Vector4 & v) : v(v) {}
-    inline Plane::Plane(const Vector3 & v, float d) : v(v, d) {}
-    inline Plane::Plane(const Vector3 & normal, const Vector3 & point) : v(normal, -dot(normal, point)) {}
-    inline Plane::Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2) {
-        Vector3 n = cross(v1-v0, v2-v0);
-        float d = -dot(n, v0);
-        v = Vector4(n, d);
-    }
-
-    inline const Plane & Plane::operator=(const Plane & p) { v = p.v; return *this; }
-
-    inline Vector3 Plane::vector() const { return v.xyz(); }
-    inline float Plane::offset() const { return v.w; }
-    inline Vector3 Plane::normal() const { return normalize(vector(), 0.0f); }
-
-    // Normalize plane.
-    inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON)
-    {
-        const float len = length(plane.vector());
-        const float inv = isZero(len, epsilon) ? 0 : 1.0f / len;
-        return Plane(plane.v * inv);
-    }
-
-    // Get the signed distance from the given point to this plane.
-    inline float distance(const Plane & plane, const Vector3 & point)
-    {
-        return dot(plane.vector(), point) + plane.offset();
-    }
-
-    inline void Plane::operator*=(float s)
-    {
-        v *= s;
-    }
-
-} // nv namespace
-
-#endif // NV_MATH_PLANE_H
diff --git a/thirdparty/thekla_atlas/nvmath/ProximityGrid.cpp b/thirdparty/thekla_atlas/nvmath/ProximityGrid.cpp
deleted file mode 100644
index 3553e48f64..0000000000
--- a/thirdparty/thekla_atlas/nvmath/ProximityGrid.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-#include "ProximityGrid.h"
-
-#include "Box.inl"
-#include "Morton.h"
-
-
-using namespace nv;
-
-ProximityGrid::ProximityGrid() {
-}
-
-void ProximityGrid::reset() {
-    cellArray.clear();
-}
-
-void ProximityGrid::init(const Array<Vector3> & pointArray) {
-
-	// Compute bounding box.
-    Box box;
-	box.clearBounds();
-	
-    const uint count = pointArray.count();
-
-    for (uint i = 0; i < count; i++) {
-		box.addPointToBounds(pointArray[i]);
-	}
-
-    init(box, count);
-
-	// Insert all points.
-	for (uint i = 0; i < count; i++) {
-        add(pointArray[i], i);
-    }
-}
-
-
-void ProximityGrid::init(const Box & box, uint count) {
-    reset();
- 
-    // Determine grid size.
-    float cellWidth;
-
-    Vector3 diagonal = box.extents() * 2.f;
-    float volume = box.volume();
-
-    if (equal(volume, 0)) {
-        // Degenerate box, treat like a quad.
-        Vector2 quad;
-        if (diagonal.x < diagonal.y && diagonal.x < diagonal.z) {
-            quad.x = diagonal.y;
-            quad.y = diagonal.z;
-        }
-        else if (diagonal.y < diagonal.x && diagonal.y < diagonal.z) {
-            quad.x = diagonal.x;
-            quad.y = diagonal.z;
-        }
-        else {
-            quad.x = diagonal.x;
-            quad.y = diagonal.y;
-        }
-
-        float cellArea = quad.x * quad.y / count;
-        cellWidth = sqrtf(cellArea); // pow(cellArea, 1.0f / 2.0f);
-    }
-    else {
-        // Ideally we want one cell per point.
-        float cellVolume = volume / count;
-        cellWidth = pow(cellVolume, 1.0f / 3.0f);
-    }
-
-    nvDebugCheck(cellWidth != 0);
-
-    sx = max(1, ftoi_ceil(diagonal.x / cellWidth));
-    sy = max(1, ftoi_ceil(diagonal.y / cellWidth));
-    sz = max(1, ftoi_ceil(diagonal.z / cellWidth));
-
-    invCellSize.x = float(sx) / diagonal.x;
-    invCellSize.y = float(sy) / diagonal.y;
-    invCellSize.z = float(sz) / diagonal.z;
-
-	cellArray.resize(sx * sy * sz);
-
-    corner = box.minCorner; // @@ Align grid better?
-}
-
-// Gather all points inside the given sphere.
-// Radius is assumed to be small, so we don't bother culling the cells.
-void ProximityGrid::gather(const Vector3 & position, float radius, Array<uint> & indexArray) {
-    int x0 = index_x(position.x - radius);
-    int x1 = index_x(position.x + radius);
-
-    int y0 = index_y(position.y - radius);
-    int y1 = index_y(position.y + radius);
-
-    int z0 = index_z(position.z - radius);
-    int z1 = index_z(position.z + radius);
-
-    for (int z = z0; z <= z1; z++) {
-        for (int y = y0; y <= y1; y++) {
-            for (int x = x0; x <= x1; x++) {
-                int idx = index(x, y, z);
-                indexArray.append(cellArray[idx].indexArray);
-            }
-        }
-    }
-}
-
-
-uint32 ProximityGrid::mortonCount() const {
-    uint64 s = U64(max3(sx, sy, sz));
-    s = nextPowerOfTwo(s);
-    
-    if (s > 1024) {
-        return U32(s * s * min3(sx, sy, sz));
-    }
-
-    return U32(s * s * s);
-}
-
-int ProximityGrid::mortonIndex(uint32 code) const {
-    uint32 x, y, z;
-
-    uint s = U32(max3(sx, sy, sz));
-    if (s > 1024) {
-        // Use layered two-dimensional morton order.
-        s = nextPowerOfTwo(s);
-        uint layer = code / (s * s);
-        code = code % (s * s);
-
-        uint layer_count = U32(min3(sx, sy, sz));
-        if (sx == layer_count) {
-            x = layer;
-            y = decodeMorton2X(code);
-            z = decodeMorton2Y(code);
-        }
-        else if (sy == layer_count) {
-            x = decodeMorton2Y(code); 
-            y = layer; 
-            z = decodeMorton2X(code);
-        }
-        else /*if (sz == layer_count)*/ {
-            x = decodeMorton2X(code);
-            y = decodeMorton2Y(code);
-            z = layer;
-        }
-    }
-    else {
-        x = decodeMorton3X(code);
-        y = decodeMorton3Y(code);
-        z = decodeMorton3Z(code);
-    }
-
-    if (x >= U32(sx) || y >= U32(sy) || z >= U32(sz)) {
-        return -1;
-    }
-
-    return index(x, y, z);
-}
diff --git a/thirdparty/thekla_atlas/nvmath/ProximityGrid.h b/thirdparty/thekla_atlas/nvmath/ProximityGrid.h
deleted file mode 100644
index a21bb3bd68..0000000000
--- a/thirdparty/thekla_atlas/nvmath/ProximityGrid.h
+++ /dev/null
@@ -1,99 +0,0 @@
-#pragma once
-#ifndef NV_MATH_PROXIMITYGRID_H
-#define NV_MATH_PROXIMITYGRID_H
-
-#include "Vector.h"
-#include "ftoi.h"
-
-#include "nvcore/Array.inl"
-
-
-// A simple, dynamic proximity grid based on Jon's code.
-// Instead of storing pointers here I store indices.
-
-namespace nv {
-
-    class Box;
-
-    struct Cell {
-        Array<uint> indexArray;
-    };
-
-    struct ProximityGrid {
-        ProximityGrid();
-
-        void reset();
-        void init(const Array<Vector3> & pointArray);
-        void init(const Box & box, uint count);
-
-        int index_x(float x) const;
-        int index_y(float y) const;
-        int index_z(float z) const;
-        int index(int x, int y, int z) const;
-        int index(const Vector3 & pos) const;
-        
-        uint32 mortonCount() const;
-        int mortonIndex(uint32 code) const;
-
-        void add(const Vector3 & pos, uint key);
-        bool remove(const Vector3 & pos, uint key);
-
-        void gather(const Vector3 & pos, float radius, Array<uint> & indices);
-
-        Array<Cell> cellArray;
-
-        Vector3 corner;
-        Vector3 invCellSize;
-        int sx, sy, sz;
-    };
-
-    // For morton traversal, do:
-    // for (int code = 0; code < mortonCount(); code++) {
-    //   int idx = mortonIndex(code);
-    //   if (idx < 0) continue;
-    // }
-
-
-
-    inline int ProximityGrid::index_x(float x) const {
-        return clamp(ftoi_floor((x - corner.x) * invCellSize.x),  0, sx-1);
-    }
-
-    inline int ProximityGrid::index_y(float y) const {
-        return clamp(ftoi_floor((y - corner.y) * invCellSize.y),  0, sy-1);
-    }
-
-    inline int ProximityGrid::index_z(float z) const {
-        return clamp(ftoi_floor((z - corner.z) * invCellSize.z),  0, sz-1);
-    }
-
-    inline int ProximityGrid::index(int x, int y, int z) const {
-        nvDebugCheck(x >= 0 && x < sx);
-        nvDebugCheck(y >= 0 && y < sy);
-        nvDebugCheck(z >= 0 && z < sz);
-        int idx = (z * sy + y) * sx + x;
-        nvDebugCheck(idx >= 0 && uint(idx) < cellArray.count());
-        return idx;
-    }
-
-    inline int ProximityGrid::index(const Vector3 & pos) const {
-        int x = index_x(pos.x);
-        int y = index_y(pos.y);
-        int z = index_z(pos.z);
-        return index(x, y, z);
-    }
-
-
-    inline void ProximityGrid::add(const Vector3 & pos, uint key) {
-        uint idx = index(pos);
-        cellArray[idx].indexArray.append(key);
-    }
-
-    inline bool ProximityGrid::remove(const Vector3 & pos, uint key) {
-        uint idx = index(pos);
-        return cellArray[idx].indexArray.remove(key);
-    }
-
-} // nv namespace
-
-#endif // NV_MATH_PROXIMITYGRID_H
diff --git a/thirdparty/thekla_atlas/nvmath/Quaternion.h b/thirdparty/thekla_atlas/nvmath/Quaternion.h
deleted file mode 100644
index dc5219e5e4..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Quaternion.h
+++ /dev/null
@@ -1,213 +0,0 @@
-// This code is in the public domain -- castano@gmail.com
-
-#pragma once
-#ifndef NV_MATH_QUATERNION_H
-#define NV_MATH_QUATERNION_H
-
-#include "nvmath/nvmath.h"
-#include "nvmath/Vector.inl" // @@ Do not include inl files from header files.
-#include "nvmath/Matrix.h"
-
-namespace nv
-{
-
-    class NVMATH_CLASS Quaternion
-    {
-    public:
-        typedef Quaternion const & Arg;
-
-        Quaternion();
-        explicit Quaternion(float f);
-        Quaternion(float x, float y, float z, float w);
-        Quaternion(Vector4::Arg v);
-
-        const Quaternion & operator=(Quaternion::Arg v);
-
-        Vector4 asVector() const;
-
-        union {
-            struct {
-                float x, y, z, w;
-            };
-            float component[4];
-        };
-    };
-
-    inline Quaternion::Quaternion() {}
-    inline Quaternion::Quaternion(float f) : x(f), y(f), z(f), w(f) {}
-    inline Quaternion::Quaternion(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
-    inline Quaternion::Quaternion(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
-
-    // @@ Move all these to Quaternion.inl!
-
-    inline const Quaternion & Quaternion::operator=(Quaternion::Arg v) { 
-        x = v.x;
-        y = v.y;
-        z = v.z;
-        w = v.w;
-        return *this;
-    }
-
-    inline Vector4 Quaternion::asVector() const { return Vector4(x, y, z, w); }
-
-    inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
-    {
-        return Quaternion(
-            + a.x*b.w + a.y*b.z - a.z*b.y + a.w*b.x,
-            - a.x*b.z + a.y*b.w + a.z*b.x + a.w*b.y,
-            + a.x*b.y - a.y*b.x + a.z*b.w + a.w*b.z,
-            - a.x*b.x - a.y*b.y - a.z*b.z + a.w*b.w);
-    }
-
-    inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b)
-    {
-        return Quaternion(
-            + a.y*b.z - a.z*b.y + a.w*b.x,
-            - a.x*b.z           + a.z*b.x + a.w*b.y,
-            + a.x*b.y - a.y*b.x           + a.w*b.z,
-            - a.x*b.x - a.y*b.y - a.z*b.z );
-    }
-
-    inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b)
-    {
-        return Quaternion(
-            + a.x*b.w + a.y*b.z - a.z*b.y,
-            - a.x*b.z + a.y*b.w + a.z*b.x,
-            + a.x*b.y - a.y*b.x + a.z*b.w,
-            - a.x*b.x - a.y*b.y - a.z*b.z);
-    }
-
-    inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b)
-    {
-        return mul(a, b);
-    }
-
-    inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b)
-    {
-        return mul(a, b);
-    }
-
-    inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b)
-    {
-        return mul(a, b);
-    }
-
-
-    inline Quaternion scale(Quaternion::Arg q, float s)
-    {
-        return scale(q.asVector(), s);
-    }
-    inline Quaternion operator *(Quaternion::Arg q, float s)
-    {
-        return scale(q, s);
-    }
-    inline Quaternion operator *(float s, Quaternion::Arg q)
-    {
-        return scale(q, s);
-    }
-
-    inline Quaternion scale(Quaternion::Arg q, Vector4::Arg s)
-    {
-        return scale(q.asVector(), s);
-    }
-    /*inline Quaternion operator *(Quaternion::Arg q, Vector4::Arg s)
-    {
-    return scale(q, s);
-    }
-    inline Quaternion operator *(Vector4::Arg s, Quaternion::Arg q)
-    {
-    return scale(q, s);
-    }*/
-
-    inline Quaternion conjugate(Quaternion::Arg q)
-    {
-        return scale(q, Vector4(-1, -1, -1, 1));
-    }
-
-    inline float length(Quaternion::Arg q)
-    {
-        return length(q.asVector());
-    }
-
-    inline bool isNormalized(Quaternion::Arg q, float epsilon = NV_NORMAL_EPSILON)
-    {
-        return equal(length(q), 1, epsilon);
-    }
-
-    inline Quaternion normalize(Quaternion::Arg q, float epsilon = NV_EPSILON)
-    {
-        float l = length(q);
-        nvDebugCheck(!isZero(l, epsilon));
-        Quaternion n = scale(q, 1.0f / l);
-        nvDebugCheck(isNormalized(n));
-        return n;
-    }
-
-    inline Quaternion inverse(Quaternion::Arg q)
-    {
-        return conjugate(normalize(q));
-    }
-
-    /// Create a rotation quaternion for @a angle alpha around normal vector @a v.
-    inline Quaternion axisAngle(Vector3::Arg v, float alpha)
-    {
-        float s = sinf(alpha * 0.5f);
-        float c = cosf(alpha * 0.5f);
-        return Quaternion(Vector4(v * s, c));
-    }
-
-    inline Vector3 imag(Quaternion::Arg q)
-    {
-        return q.asVector().xyz();
-    }
-
-    inline float real(Quaternion::Arg q)
-    {
-        return q.w;
-    }
-
-
-    /// Transform vector.
-    inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v)
-    {
-        //Quaternion t = q * v * conjugate(q);
-        //return imag(t);
-
-        // Faster method by Fabian Giesen and others:
-        // http://molecularmusings.wordpress.com/2013/05/24/a-faster-quaternion-vector-multiplication/
-        // http://mollyrocket.com/forums/viewtopic.php?t=833&sid=3a84e00a70ccb046cfc87ac39881a3d0
-        
-        Vector3 t = 2 * cross(imag(q), v);
-        return v + q.w * t + cross(imag(q), t);
-    }
-
-    // @@ Not tested.
-    // From Insomniac's Mike Day:
-    // http://www.insomniacgames.com/converting-a-rotation-matrix-to-a-quaternion/
-    inline Quaternion fromMatrix(const Matrix & m) {
-        if (m(2, 2) < 0) {
-            if (m(0, 0) < m(1,1)) {
-                float t = 1 - m(0, 0) - m(1, 1) - m(2, 2);
-                return Quaternion(t, m(0,1)+m(1,0), m(2,0)+m(0,2), m(1,2)-m(2,1));
-            }
-            else {
-                float t = 1 - m(0, 0) + m(1, 1) - m(2, 2);
-                return Quaternion(t, m(0,1) + m(1,0), m(1,2) + m(2,1), m(2,0) - m(0,2));
-            }
-        }
-        else {
-            if (m(0, 0) < -m(1, 1)) {
-                float t = 1 - m(0, 0) - m(1, 1) + m(2, 2);
-                return Quaternion(t, m(2,0) + m(0,2), m(1,2) + m(2,1), m(0,1) - m(1,0));
-            }
-            else {
-                float t = 1 + m(0, 0) + m(1, 1) + m(2, 2);
-                return Quaternion(t, m(1,2) - m(2,1), m(2,0) - m(0,2), m(0,1) - m(1,0));
-            }
-        }
-    }
-
-
-} // nv namespace
-
-#endif // NV_MATH_QUATERNION_H
diff --git a/thirdparty/thekla_atlas/nvmath/Random.cpp b/thirdparty/thekla_atlas/nvmath/Random.cpp
deleted file mode 100644
index 1a60e7f5e7..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Random.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#include <nvmath/Random.h>
-#include <time.h>
-
-using namespace nv;
-
-// Statics
-const uint16 Rand48::a0 = 0xE66D; 
-const uint16 Rand48::a1 = 0xDEEC; 
-const uint16 Rand48::a2 = 0x0005;
-const uint16 Rand48::c0 = 0x000B;
-
-
-/// Get a random seed based on the current time.
-uint Rand::randomSeed()
-{
-    return (uint)time(NULL);
-}
-
-
-void MTRand::initialize( uint32 seed )
-{
-    // Initialize generator state with seed
-    // See Knuth TAOCP Vol 2, 3rd Ed, p.106 for multiplier.
-    // In previous versions, most significant bits (MSBs) of the seed affect
-    // only MSBs of the state array.  Modified 9 Jan 2002 by Makoto Matsumoto.
-    uint32 *s = state;
-    uint32 *r = state;
-    int i = 1;
-    *s++ = seed & 0xffffffffUL;
-    for( ; i < N; ++i )
-    {
-        *s++ = ( 1812433253UL * ( *r ^ (*r >> 30) ) + i ) & 0xffffffffUL;
-        r++;
-    }
-}
-
-
-void MTRand::reload()
-{
-    // Generate N new values in state
-    // Made clearer and faster by Matthew Bellew (matthew.bellew@home.com)
-    uint32 *p = state;
-    int i;
-    for( i = N - M; i--; ++p )
-        *p = twist( p[M], p[0], p[1] );
-    for( i = M; --i; ++p )
-        *p = twist( p[M-N], p[0], p[1] );
-    *p = twist( p[M-N], p[0], state[0] );
-
-    left = N, next = state;
-}
-
diff --git a/thirdparty/thekla_atlas/nvmath/Random.h b/thirdparty/thekla_atlas/nvmath/Random.h
deleted file mode 100644
index 223292706a..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Random.h
+++ /dev/null
@@ -1,376 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_RANDOM_H
-#define NV_MATH_RANDOM_H
-
-#include "nvmath.h"
-#include "nvcore/Utils.h" // nextPowerOfTwo
-
-
-namespace nv
-{
-
-    /// Interface of the random number generators.
-    class Rand
-    {
-    public:
-
-        virtual ~Rand() {}
-
-        enum time_e { Time };
-
-        /// Provide a new seed.
-        virtual void seed( uint s ) { /* empty */ };
-
-        /// Get an integer random number.
-        virtual uint get() = 0;
-
-        /// Get a random number on [0, max] interval.
-        uint getRange( uint max )
-        {
-            if (max == 0) return 0;
-            if (max == NV_UINT32_MAX) return get();
-
-            const uint np2 = nextPowerOfTwo( max+1 ); // @@ This fails if max == NV_UINT32_MAX
-            const uint mask = np2 - 1;
-            uint n;
-            do { n = get() & mask; } while( n > max );
-            return n;
-        }
-
-        /// Random number on [0.0, 1.0] interval.
-        float getFloat()
-        {
-            union
-            {
-                uint32 i;
-                float f;
-            } pun;
-
-            pun.i = 0x3f800000UL | (get() & 0x007fffffUL);
-            return pun.f - 1.0f;
-        }
-
-        float getFloatRange(float min, float max) {
-            return getFloat() * (max - min) + min;
-        }
-
-        /*
-        /// Random number on [0.0, 1.0] interval.
-        double getReal()
-        {
-        return double(get()) * (1.0/4294967295.0); // 2^32-1
-        }
-
-        /// Random number on [0.0, 1.0) interval.
-        double getRealExclusive()
-        {
-        return double(get()) * (1.0/4294967296.0); // 2^32
-        }
-        */
-
-        /// Get the max value of the random number.
-        uint max() const { return NV_UINT32_MAX; }
-
-        // Get a random seed.
-        static uint randomSeed();
-
-    };
-
-
-    /// Very simple random number generator with low storage requirements.
-    class SimpleRand : public Rand
-    {
-    public:
-
-        /// Constructor that uses the current time as the seed.
-        SimpleRand( time_e )
-        {
-            seed(randomSeed());
-        }
-
-        /// Constructor that uses the given seed.
-        SimpleRand( uint s = 0 )
-        {
-            seed(s);
-        }
-
-        /// Set the given seed.
-        virtual void seed( uint s )
-        {
-            current = s;
-        }
-
-        /// Get a random number.
-        virtual uint get()
-        {
-            return current = current * 1103515245 + 12345;
-        }
-
-    private:
-
-        uint current;
-
-    };
-
-
-    /// Mersenne twister random number generator.
-    class MTRand : public Rand
-    {
-    public:
-
-        enum { N = 624 };       // length of state vector
-        enum { M = 397 };
-
-        /// Constructor that uses the current time as the seed.
-        MTRand( time_e )
-        {
-            seed(randomSeed());
-        }
-
-        /// Constructor that uses the given seed.
-        MTRand( uint s = 0 )
-        {
-            seed(s);
-        }
-
-        /// Constructor that uses the given seeds.
-        NVMATH_API MTRand( const uint * seed_array, uint length );
-
-
-        /// Provide a new seed.
-        virtual void seed( uint s )
-        {
-            initialize(s);
-            reload();
-        }	
-
-        /// Get a random number between 0 - 65536.
-        virtual uint get()
-        {
-            // Pull a 32-bit integer from the generator state
-            // Every other access function simply transforms the numbers extracted here
-            if( left == 0 ) { 
-                reload(); 
-            }
-            left--;
-
-            uint s1;
-            s1 = *next++;
-            s1 ^= (s1 >> 11);
-            s1 ^= (s1 <<  7) & 0x9d2c5680U;
-            s1 ^= (s1 << 15) & 0xefc60000U;
-            return ( s1 ^ (s1 >> 18) );		
-        };
-
-
-    private:
-
-        NVMATH_API void initialize( uint32 seed );
-        NVMATH_API void reload();
-
-        uint hiBit( uint u ) const { return u & 0x80000000U; }
-        uint loBit( uint u ) const { return u & 0x00000001U; }
-        uint loBits( uint u ) const { return u & 0x7fffffffU; }
-        uint mixBits( uint u, uint v ) const { return hiBit(u) | loBits(v); }
-        uint twist( uint m, uint s0, uint s1 ) const { return m ^ (mixBits(s0,s1)>>1) ^ ((~loBit(s1)+1) & 0x9908b0dfU); }
-
-    private:
-
-        uint state[N];	// internal state
-        uint * next;	// next value to get from state
-        int left;		// number of values left before reload needed		
-
-    };
-
-
-
-    /** George Marsaglia's random number generator. 
-    * Code based on Thatcher Ulrich public domain source code:
-    * http://cvs.sourceforge.net/viewcvs.py/tu-testbed/tu-testbed/base/tu_random.cpp?rev=1.7&view=auto
-    *
-    * PRNG code adapted from the complimentary-multiply-with-carry
-    * code in the article: George Marsaglia, "Seeds for Random Number
-    * Generators", Communications of the ACM, May 2003, Vol 46 No 5,
-    * pp90-93.
-    * 
-    * The article says:
-    * 
-    * "Any one of the choices for seed table size and multiplier will
-    * provide a RNG that has passed extensive tests of randomness,
-    * particularly those in [3], yet is simple and fast --
-    * approximately 30 million random 32-bit integers per second on a
-    * 850MHz PC.  The period is a*b^n, where a is the multiplier, n
-    * the size of the seed table and b=2^32-1.  (a is chosen so that
-    * b is a primitive root of the prime a*b^n + 1.)"
-    * 
-    * [3] Marsaglia, G., Zaman, A., and Tsang, W.  Toward a universal
-    * random number generator.  _Statistics and Probability Letters
-    * 8_ (1990), 35-39.
-    */
-    class GMRand : public Rand
-    {
-    public:
-
-        enum { SEED_COUNT = 8 };
-
-        //	const uint64 a = 123471786;		// for SEED_COUNT=1024
-        //	const uint64 a = 123554632;		// for SEED_COUNT=512
-        //	const uint64 a = 8001634;		// for SEED_COUNT=255
-        //	const uint64 a = 8007626;		// for SEED_COUNT=128
-        //	const uint64 a = 647535442;		// for SEED_COUNT=64
-        //	const uint64 a = 547416522;		// for SEED_COUNT=32
-        //	const uint64 a = 487198574;		// for SEED_COUNT=16
-        //	const uint64 a = 716514398U;	// for SEED_COUNT=8
-        enum { a = 716514398U };
-
-
-        GMRand( time_e )
-        {
-            seed(randomSeed());
-        }
-
-        GMRand(uint s = 987654321)
-        {
-            seed(s);
-        }
-
-
-        /// Provide a new seed.
-        virtual void seed( uint s )
-        {
-            c = 362436;
-            i = SEED_COUNT - 1;
-
-            for(int i = 0; i < SEED_COUNT; i++) {
-                s = s ^ (s << 13);
-                s = s ^ (s >> 17);
-                s = s ^ (s << 5);
-                Q[i] = s;
-            }
-        }
-
-        /// Get a random number between 0 - 65536.
-        virtual uint get()
-        {
-            const uint32 r = 0xFFFFFFFE;		
-
-            uint64 t;
-            uint32 x;
-
-            i = (i + 1) & (SEED_COUNT - 1);
-            t = a * Q[i] + c;
-            c = uint32(t >> 32);
-            x = uint32(t + c);
-
-            if( x < c ) {
-                x++;
-                c++;
-            }
-
-            uint32  val = r - x;
-            Q[i] = val;
-            return val;
-        };
-
-
-    private:
-
-        uint32 c;
-        uint32 i;
-        uint32 Q[8];
-
-    };
-
-
-    /** Random number implementation from the GNU Sci. Lib. (GSL).
-    * Adapted from Nicholas Chapman version:
-    * 
-    * Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough
-    * This is the Unix rand48() generator. The generator returns the
-    * upper 32 bits from each term of the sequence,
-    * 
-    * x_{n+1} = (a x_n + c) mod m 
-    * 
-    * using 48-bit unsigned arithmetic, with a = 0x5DEECE66D , c = 0xB
-    * and m = 2^48. The seed specifies the upper 32 bits of the initial
-    * value, x_1, with the lower 16 bits set to 0x330E.
-    * 
-    * The theoretical value of x_{10001} is 244131582646046.
-    * 
-    * The period of this generator is ? FIXME (probably around 2^48). 
-    */
-    class Rand48 : public Rand
-    {
-    public:
-
-        Rand48( time_e )
-        {
-            seed(randomSeed());
-        }
-
-        Rand48( uint s = 0x1234ABCD )
-        {
-            seed(s);
-        }	
-
-
-        /** Set the given seed. */
-        virtual void seed( uint s ) {
-            vstate.x0 = 0x330E;
-            vstate.x1 = uint16(s & 0xFFFF);
-            vstate.x2 = uint16((s >> 16) & 0xFFFF);
-        }
-
-        /** Get a random number. */
-        virtual uint get() {
-
-            advance();
-
-            uint x1 = vstate.x1;
-            uint x2 = vstate.x2;
-            return (x2 << 16) + x1;
-        }
-
-
-    private:
-
-        void advance()
-        {
-            /* work with unsigned long ints throughout to get correct integer
-            promotions of any unsigned short ints */
-            const uint32 x0 = vstate.x0;
-            const uint32 x1 = vstate.x1;
-            const uint32 x2 = vstate.x2;
-
-            uint32 a;
-            a = a0 * x0 + c0;
-
-            vstate.x0 = uint16(a & 0xFFFF);
-            a >>= 16;
-
-            /* although the next line may overflow we only need the top 16 bits
-            in the following stage, so it does not matter */
-
-            a += a0 * x1 + a1 * x0; 
-            vstate.x1 = uint16(a & 0xFFFF);
-
-            a >>= 16;
-            a += a0 * x2 + a1 * x1 + a2 * x0;
-            vstate.x2 = uint16(a & 0xFFFF);
-        }
-
-
-    private:	
-        NVMATH_API static const uint16 a0, a1, a2, c0;
-
-        struct rand48_state_t { 
-            uint16 x0, x1, x2; 
-        } vstate;
-
-    };
-
-} // nv namespace
-
-#endif // NV_MATH_RANDOM_H
diff --git a/thirdparty/thekla_atlas/nvmath/Solver.cpp b/thirdparty/thekla_atlas/nvmath/Solver.cpp
deleted file mode 100644
index 191793ee29..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Solver.cpp
+++ /dev/null
@@ -1,744 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#include "Solver.h"
-#include "Sparse.h"
-
-#include "nvcore/Array.inl"
-
-using namespace nv;
-
-namespace
-{
-    class Preconditioner
-    {
-    public:
-        // Virtual dtor.
-        virtual ~Preconditioner() { }
-
-        // Apply preconditioning step.
-        virtual void apply(const FullVector & x, FullVector & y) const = 0;
-    };
-
-
-    // Jacobi preconditioner.
-    class JacobiPreconditioner : public Preconditioner
-    {
-    public:
-
-        JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
-        {
-            nvCheck(M.isSquare());
-
-            for(uint x = 0; x < M.width(); x++)
-            {
-                float elem = M.getCoefficient(x, x);
-                //nvDebugCheck( elem != 0.0f ); // This can be zero in the presence of zero area triangles.
-
-                if (symmetric) 
-                {
-                    m_inverseDiagonal[x] = (elem != 0) ? 1.0f / sqrtf(fabsf(elem)) : 1.0f;
-                }
-                else 
-                {
-                    m_inverseDiagonal[x] = (elem != 0) ? 1.0f / elem : 1.0f;
-                }
-            }
-        }
-
-        void apply(const FullVector & x, FullVector & y) const
-        {
-            nvDebugCheck(x.dimension() == m_inverseDiagonal.dimension());
-            nvDebugCheck(y.dimension() == m_inverseDiagonal.dimension());
-
-            // @@ Wrap vector component-wise product into a separate function.
-            const uint D = x.dimension();
-            for (uint i = 0; i < D; i++)
-            {
-                y[i] = m_inverseDiagonal[i] * x[i];
-            }
-        }
-
-    private:
-
-        FullVector m_inverseDiagonal;
-
-    };
-
-} // namespace
-
-
-static bool ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
-static bool ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
-
-
-// Solve the symmetric system: At�A�x = At�b
-bool nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
-{
-    nvDebugCheck(A.width() == x.dimension());
-    nvDebugCheck(A.height() == b.dimension());
-    nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
-
-    const uint D = A.width();
-
-    SparseMatrix At(A.height(), A.width());
-    transpose(A, At);
-
-    FullVector Atb(D);
-    //mult(Transposed, A, b, Atb);
-    mult(At, b, Atb);
-
-    SparseMatrix AtA(D);
-    //mult(Transposed, A, NoTransposed, A, AtA);
-    mult(At, A, AtA);
-
-    return SymmetricSolver(AtA, Atb, x, epsilon);
-}
-
-
-// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
-bool nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
-{
-    nvDebugCheck(A.width() == x.dimension());
-    nvDebugCheck(A.height() == b.dimension());
-    nvDebugCheck(A.height() >= A.width() - lockedCount);
-
-    // @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
-
-    const uint D = A.width() - lockedCount;
-    nvDebugCheck(D > 0);
-
-    // Compute: b - Al * xl
-    FullVector b_Alxl(b);
-
-    for (uint y = 0; y < A.height(); y++)
-    {
-        const uint count = A.getRow(y).count();
-        for (uint e = 0; e < count; e++)
-        {
-            uint column = A.getRow(y)[e].x;
-
-            bool isFree = true;
-            for (uint i = 0; i < lockedCount; i++) 
-            {
-                isFree &= (lockedParameters[i] != column);
-            }
-
-            if (!isFree)
-            {
-                b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
-            }
-        }
-    }
-
-    // Remove locked columns from A.
-    SparseMatrix Af(D, A.height());
-
-    for (uint y = 0; y < A.height(); y++)
-    {
-        const uint count = A.getRow(y).count();
-        for (uint e = 0; e < count; e++)
-        {
-            uint column = A.getRow(y)[e].x;
-            uint ix = column;
-
-            bool isFree = true;
-            for (uint i = 0; i < lockedCount; i++) 
-            {
-                isFree &= (lockedParameters[i] != column);
-                if (column > lockedParameters[i]) ix--; // shift columns
-            }
-
-            if (isFree)
-            {
-                Af.setCoefficient(ix, y, A.getRow(y)[e].v);
-            }
-        }
-    }
-
-    // Remove elements from x
-    FullVector xf(D);
-
-    for (uint i = 0, j = 0; i < A.width(); i++)
-    {
-        bool isFree = true;
-        for (uint l = 0; l < lockedCount; l++) 
-        {
-            isFree &= (lockedParameters[l] != i);
-        }
-
-        if (isFree)
-        {
-            xf[j++] = x[i];
-        }
-    }
-
-    // Solve reduced system.
-    bool result = LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
-
-    // Copy results back to x.
-    for (uint i = 0, j = 0; i < A.width(); i++)
-    {
-        bool isFree = true;
-        for (uint l = 0; l < lockedCount; l++) 
-        {
-            isFree &= (lockedParameters[l] != i);
-        }
-
-        if (isFree)
-        {
-            x[i] = xf[j++];
-        }
-    }
-
-    return result;
-}
-
-
-bool nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
-{
-    nvDebugCheck(A.height() == A.width());
-    nvDebugCheck(A.height() == b.dimension());
-    nvDebugCheck(b.dimension() == x.dimension());
-
-    JacobiPreconditioner jacobi(A, true);
-    return ConjugateGradientSolver(jacobi, A, b, x, epsilon);
-
-    //return ConjugateGradientSolver(A, b, x, epsilon);
-}
-
-
-/**
-* Compute the solution of the sparse linear system Ab=x using the Conjugate
-* Gradient method.
-*
-* Solving sparse linear systems:
-* (1)		A�x = b
-* 
-* The conjugate gradient algorithm solves (1) only in the case that A is 
-* symmetric and positive definite. It is based on the idea of minimizing the 
-* function
-* 
-* (2)		f(x) = 1/2�x�A�x - b�x
-* 
-* This function is minimized when its gradient
-* 
-* (3)		df = A�x - b
-* 
-* is zero, which is equivalent to (1). The minimization is carried out by 
-* generating a succession of search directions p.k and improved minimizers x.k. 
-* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k�p.k), 
-* and x.k+1 is set equal to the new point x.k + alfa.k�p.k. The p.k and x.k are 
-* built up in such a way that x.k+1 is also the minimizer of f over the whole
-* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N 
-* iterations you arrive at the minimizer over the entire vector space, i.e., the 
-* solution to (1).
-*
-* For a really good explanation of the method see:
-*
-* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
-* Jonhathan Richard Shewchuk.
-*
-**/
-/*static*/ bool ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
-{
-    nvDebugCheck( A.isSquare() );
-    nvDebugCheck( A.width() == b.dimension() );
-    nvDebugCheck( A.width() == x.dimension() );
-
-    int i = 0;
-    const int D = A.width();
-    const int i_max = 4 * D;   // Convergence should be linear, but in some cases, it's not.
-
-    FullVector r(D);   // residual
-    FullVector p(D);   // search direction
-    FullVector q(D);   // 
-    float delta_0;
-    float delta_old;
-    float delta_new;
-    float alpha;
-    float beta;
-
-    // r = b - A�x;
-    copy(b, r);
-    sgemv(-1, A, x, 1, r);
-
-    // p = r;
-    copy(r, p);
-
-    delta_new = dot( r, r );
-    delta_0 = delta_new;
-
-    while (i < i_max && delta_new > epsilon*epsilon*delta_0)
-    {
-        i++;
-
-        // q = A�p
-        mult(A, p, q);
-
-        // alpha = delta_new / p�q
-        alpha = delta_new / dot( p, q );
-
-        // x = alfa�p + x
-        saxpy(alpha, p, x);
-
-        if ((i & 31) == 0) // recompute r after 32 steps
-        {
-            // r = b - A�x
-            copy(b, r);
-            sgemv(-1, A, x, 1, r);
-        }
-        else
-        {
-            // r = r - alpha�q
-            saxpy(-alpha, q, r);
-        }
-
-        delta_old = delta_new;
-        delta_new = dot( r, r );
-
-        beta = delta_new / delta_old;
-
-        // p = beta�p + r
-        scal(beta, p);
-        saxpy(1, r, p);
-    }
-
-    return delta_new <= epsilon*epsilon*delta_0;
-}
-
-
-// Conjugate gradient with preconditioner.
-/*static*/ bool ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
-{
-    nvDebugCheck( A.isSquare() );
-    nvDebugCheck( A.width() == b.dimension() );
-    nvDebugCheck( A.width() == x.dimension() );
-
-    int i = 0;
-    const int D = A.width();
-    const int i_max = 4 * D;   // Convergence should be linear, but in some cases, it's not.
-
-    FullVector r(D);    // residual
-    FullVector p(D);    // search direction
-    FullVector q(D);    // 
-    FullVector s(D);    // preconditioned
-    float delta_0;
-    float delta_old;
-    float delta_new;
-    float alpha;
-    float beta;
-
-    // r = b - A�x
-    copy(b, r);
-    sgemv(-1, A, x, 1, r);
-
-
-    // p = M^-1 � r
-    preconditioner.apply(r, p);
-    //copy(r, p);
-
-
-    delta_new = dot(r, p);
-    delta_0 = delta_new;
-
-    while (i < i_max && delta_new > epsilon*epsilon*delta_0)
-    {
-        i++;
-
-        // q = A�p
-        mult(A, p, q);
-
-        // alpha = delta_new / p�q
-        alpha = delta_new / dot(p, q);
-
-        // x = alfa�p + x
-        saxpy(alpha, p, x);
-
-        if ((i & 31) == 0)  // recompute r after 32 steps
-        {			
-            // r = b - A�x
-            copy(b, r);
-            sgemv(-1, A, x, 1, r);
-        }
-        else
-        {
-            // r = r - alfa�q
-            saxpy(-alpha, q, r);
-        }
-
-        // s = M^-1 � r
-        preconditioner.apply(r, s);
-        //copy(r, s);
-
-        delta_old = delta_new;
-        delta_new = dot( r, s );
-
-        beta = delta_new / delta_old;
-
-        // p = s + beta�p
-        scal(beta, p);
-        saxpy(1, s, p);
-    }
-
-    return delta_new <= epsilon*epsilon*delta_0;
-}
-
-
-#if 0 // Nonsymmetric solvers
-
-/** Bi-conjugate gradient method.  */
-MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
-    piDebugCheck( A.IsSquare() );
-    piDebugCheck( A.Width() == b.Dim() );
-    piDebugCheck( A.Width() == x.Dim() );
-
-    int i = 0;
-    const int D = A.Width();
-    const int i_max = 4 * D;
-
-    float resid;
-    float rho_1 = 0;
-    float rho_2 = 0;
-    float alpha;
-    float beta;
-
-    DenseVector r(D);
-    DenseVector rtilde(D);
-    DenseVector p(D);
-    DenseVector ptilde(D);
-    DenseVector q(D);
-    DenseVector qtilde(D);
-    DenseVector tmp(D);	// temporal vector.
-
-    // r = b - A�x;
-    A.Product( x, tmp );
-    r.Sub( b, tmp );
-
-    // rtilde = r
-    rtilde.Set( r );
-
-    // p = r;
-    p.Set( r );
-
-    // ptilde = rtilde
-    ptilde.Set( rtilde );
-
-
-
-    float normb = b.Norm();
-    if( normb == 0.0 ) normb = 1;
-
-    // test convergence
-    resid = r.Norm() / normb;
-    if( resid < epsilon ) {
-        // method converges?
-        return 0;
-    }
-
-
-    while( i < i_max ) {
-
-        i++;
-
-        rho_1 = DenseVectorDotProduct( r, rtilde );
-
-        if( rho_1 == 0 ) {
-            // method fails.
-            return -i;
-        }
-
-        if (i == 1) {
-            p.Set( r );
-            ptilde.Set( rtilde );
-        } 
-        else {
-            beta = rho_1 / rho_2;
-
-            // p = r + beta * p;
-            p.Mad( r, p, beta );
-
-            // ptilde = ztilde + beta * ptilde;
-            ptilde.Mad( rtilde, ptilde, beta );
-        }
-
-        // q = A * p;
-        A.Product( p, q );
-
-        // qtilde = A^t * ptilde;
-        A.TransProduct( ptilde, qtilde );
-
-        alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
-
-        // x += alpha * p;
-        x.Mad( x, p, alpha );
-
-        // r -= alpha * q;
-        r.Mad( r, q, -alpha );
-
-        // rtilde -= alpha * qtilde;
-        rtilde.Mad( rtilde, qtilde, -alpha );
-
-        rho_2 = rho_1;
-
-        // test convergence
-        resid = r.Norm() / normb;
-        if( resid < epsilon ) {
-            // method converges
-            return i;
-        }
-    }
-
-    return i;
-}
-
-
-/** Bi-conjugate gradient stabilized method. */
-int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
-    piDebugCheck( A.IsSquare() );
-    piDebugCheck( A.Width() == b.Dim() );
-    piDebugCheck( A.Width() == x.Dim() );
-
-    int i = 0;
-    const int D = A.Width();
-    const int i_max = 2 * D;
-
-
-    float resid;
-    float rho_1 = 0;
-    float rho_2 = 0;
-    float alpha = 0;
-    float beta = 0;
-    float omega = 0;
-
-    DenseVector p(D);
-    DenseVector phat(D);
-    DenseVector s(D);
-    DenseVector shat(D);
-    DenseVector t(D);
-    DenseVector v(D);
-
-    DenseVector r(D);
-    DenseVector rtilde(D);
-
-    DenseVector tmp(D);
-
-    // r = b - A�x;
-    A.Product( x, tmp );
-    r.Sub( b, tmp );
-
-    // rtilde = r
-    rtilde.Set( r );
-
-
-    float normb = b.Norm();
-    if( normb == 0.0 ) normb = 1;
-
-    // test convergence
-    resid = r.Norm() / normb;
-    if( resid < epsilon ) {
-        // method converges?
-        return 0;
-    }
-
-
-    while( i<i_max ) {
-
-        i++;
-
-        rho_1 = DenseVectorDotProduct( rtilde, r );
-        if( rho_1 == 0 ) {
-            // method fails
-            return -i;
-        }
-
-
-        if( i == 1 ) {
-            p.Set( r );
-        }
-        else {
-            beta = (rho_1 / rho_2) * (alpha / omega);
-
-            // p = r + beta * (p - omega * v);
-            p.Mad( p, v, -omega );
-            p.Mad( r, p, beta );
-        }
-
-        //phat = M.solve(p);
-        phat.Set( p );
-        //Precond( &phat, p );
-
-        //v = A * phat;
-        A.Product( phat, v );
-
-        alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
-
-        // s = r - alpha * v;
-        s.Mad( r, v, -alpha );
-
-
-        resid = s.Norm() / normb;
-        if( resid < epsilon ) {
-            // x += alpha * phat;
-            x.Mad( x, phat, alpha );
-            return i;
-        }
-
-        //shat = M.solve(s);
-        shat.Set( s );
-        //Precond( &shat, s );
-
-        //t = A * shat;
-        A.Product( shat, t );
-
-        omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
-
-        // x += alpha * phat + omega * shat;
-        x.Mad( x, shat, omega );
-        x.Mad( x, phat, alpha );
-
-        //r = s - omega * t;
-        r.Mad( s, t, -omega );
-
-        rho_2 = rho_1;
-
-        resid = r.Norm() / normb;
-        if( resid < epsilon ) {
-            return i;
-        }
-
-        if( omega == 0 ) {
-            return -i;	// ???
-        }
-    }
-
-    return i;
-}
-
-
-/** Bi-conjugate gradient stabilized method. */
-int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
-    piDebugCheck( A.IsSquare() );
-    piDebugCheck( A.Width() == b.Dim() );
-    piDebugCheck( A.Width() == x.Dim() );
-
-    int i = 0;
-    const int D = A.Width();
-    const int i_max = D;
-    //	const int i_max = 1000;
-
-
-    float resid;
-    float rho_1 = 0;
-    float rho_2 = 0;
-    float alpha = 0;
-    float beta = 0;
-    float omega = 0;
-
-    DenseVector p(D);
-    DenseVector phat(D);
-    DenseVector s(D);
-    DenseVector shat(D);
-    DenseVector t(D);
-    DenseVector v(D);
-
-    DenseVector r(D);
-    DenseVector rtilde(D);
-
-    DenseVector tmp(D);
-
-    // r = b - A�x;
-    A.Product( x, tmp );
-    r.Sub( b, tmp );
-
-    // rtilde = r
-    rtilde.Set( r );
-
-
-    float normb = b.Norm();
-    if( normb == 0.0 ) normb = 1;
-
-    // test convergence
-    resid = r.Norm() / normb;
-    if( resid < epsilon ) {
-        // method converges?
-        return 0;
-    }
-
-
-    while( i<i_max ) {
-
-        i++;
-
-        rho_1 = DenseVectorDotProduct( rtilde, r );
-        if( rho_1 == 0 ) {
-            // method fails
-            return -i;
-        }
-
-
-        if( i == 1 ) {
-            p.Set( r );
-        }
-        else {
-            beta = (rho_1 / rho_2) * (alpha / omega);
-
-            // p = r + beta * (p - omega * v);
-            p.Mad( p, v, -omega );
-            p.Mad( r, p, beta );
-        }
-
-        //phat = M.solve(p);
-        //phat.Set( p );
-        M.Precond( &phat, p );
-
-        //v = A * phat;
-        A.Product( phat, v );
-
-        alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
-
-        // s = r - alpha * v;
-        s.Mad( r, v, -alpha );
-
-
-        resid = s.Norm() / normb;
-
-        //printf( "--- Iteration %d: residual = %f\n", i, resid );
-
-        if( resid < epsilon ) {
-            // x += alpha * phat;
-            x.Mad( x, phat, alpha );
-            return i;
-        }
-
-        //shat = M.solve(s);
-        //shat.Set( s );
-        M.Precond( &shat, s );
-
-        //t = A * shat;
-        A.Product( shat, t );
-
-        omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
-
-        // x += alpha * phat + omega * shat;
-        x.Mad( x, shat, omega );
-        x.Mad( x, phat, alpha );
-
-        //r = s - omega * t;
-        r.Mad( s, t, -omega );
-
-        rho_2 = rho_1;
-
-        resid = r.Norm() / normb;
-        if( resid < epsilon ) {
-            return i;
-        }
-
-        if( omega == 0 ) {
-            return -i;	// ???
-        }
-    }
-
-    return i;
-}
-
-#endif
diff --git a/thirdparty/thekla_atlas/nvmath/Solver.h b/thirdparty/thekla_atlas/nvmath/Solver.h
deleted file mode 100644
index 2bbf92736a..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Solver.h
+++ /dev/null
@@ -1,24 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_SOLVER_H
-#define NV_MATH_SOLVER_H
-
-#include "nvmath.h"
-
-namespace nv
-{
-    class SparseMatrix;
-    class FullVector;
-
-
-    // Linear solvers.
-    NVMATH_API bool LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
-    NVMATH_API bool LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f);
-    NVMATH_API bool SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
-    //NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
-
-} // nv namespace
-
-
-#endif // NV_MATH_SOLVER_H
diff --git a/thirdparty/thekla_atlas/nvmath/Sparse.cpp b/thirdparty/thekla_atlas/nvmath/Sparse.cpp
deleted file mode 100644
index 421e7ee022..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Sparse.cpp
+++ /dev/null
@@ -1,889 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castanyo@yahoo.es>
-
-#include "Sparse.h"
-#include "KahanSum.h"
-
-#include "nvcore/Array.inl"
-
-#define USE_KAHAN_SUM 0
-
-
-using namespace nv;
-
-
-FullVector::FullVector(uint dim)
-{ 
-    m_array.resize(dim); 
-}
-
-FullVector::FullVector(const FullVector & v) : m_array(v.m_array)
-{
-}
-
-const FullVector & FullVector::operator=(const FullVector & v)
-{
-    nvCheck(dimension() == v.dimension());
-
-    m_array = v.m_array;
-
-    return *this;
-}
-
-
-void FullVector::fill(float f)
-{
-    const uint dim = dimension();
-    for (uint i = 0; i < dim; i++)
-    {
-        m_array[i] = f;
-    }
-}
-
-void FullVector::operator+= (const FullVector & v)
-{
-    nvDebugCheck(dimension() == v.dimension());
-
-    const uint dim = dimension();
-    for (uint i = 0; i < dim; i++)
-    {
-        m_array[i] += v.m_array[i];
-    }
-}
-
-void FullVector::operator-= (const FullVector & v)
-{
-    nvDebugCheck(dimension() == v.dimension());
-
-    const uint dim = dimension();
-    for (uint i = 0; i < dim; i++)
-    {
-        m_array[i] -= v.m_array[i];
-    }
-}
-
-void FullVector::operator*= (const FullVector & v)
-{
-    nvDebugCheck(dimension() == v.dimension());
-
-    const uint dim = dimension();
-    for (uint i = 0; i < dim; i++)
-    {
-        m_array[i] *= v.m_array[i];
-    }
-}
-
-void FullVector::operator+= (float f)
-{
-    const uint dim = dimension();
-    for (uint i = 0; i < dim; i++)
-    {
-        m_array[i] += f;
-    }
-}
-
-void FullVector::operator-= (float f)
-{
-    const uint dim = dimension();
-    for (uint i = 0; i < dim; i++)
-    {
-        m_array[i] -= f;
-    }
-}
-
-void FullVector::operator*= (float f)
-{
-    const uint dim = dimension();
-    for (uint i = 0; i < dim; i++)
-    {
-        m_array[i] *= f;
-    }
-}
-
-
-void nv::saxpy(float a, const FullVector & x, FullVector & y)
-{
-    nvDebugCheck(x.dimension() == y.dimension());
-
-    const uint dim = x.dimension();
-    for (uint i = 0; i < dim; i++)
-    {
-        y[i] += a * x[i];
-    }
-}
-
-void nv::copy(const FullVector & x, FullVector & y)
-{
-    nvDebugCheck(x.dimension() == y.dimension());
-
-    const uint dim = x.dimension();
-    for (uint i = 0; i < dim; i++)
-    {
-        y[i] = x[i];
-    }
-}
-
-void nv::scal(float a, FullVector & x)
-{
-    const uint dim = x.dimension();
-    for (uint i = 0; i < dim; i++)
-    {
-        x[i] *= a;
-    }
-}
-
-float nv::dot(const FullVector & x, const FullVector & y)
-{
-    nvDebugCheck(x.dimension() == y.dimension());
-
-    const uint dim = x.dimension();
-
-#if USE_KAHAN_SUM
-    KahanSum kahan;
-    for (uint i = 0; i < dim; i++)
-    {
-        kahan.add(x[i] * y[i]);
-    }
-    return kahan.sum();
-#else
-    float sum = 0;
-    for (uint i = 0; i < dim; i++)
-    {
-        sum += x[i] * y[i];
-    }
-    return sum;
-#endif
-}
-
-
-FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d)
-{
-    m_array.resize(d*d, 0.0f);
-}
-
-FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h)
-{
-    m_array.resize(w*h, 0.0f);
-}
-
-FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height)
-{
-    m_array = m.m_array;
-}
-
-const FullMatrix & FullMatrix::operator=(const FullMatrix & m)
-{
-    nvCheck(width() == m.width());
-    nvCheck(height() == m.height());
-
-    m_array = m.m_array;
-
-    return *this;
-}
-
-
-float FullMatrix::getCoefficient(uint x, uint y) const
-{
-    nvDebugCheck( x < width() );
-    nvDebugCheck( y < height() );
-
-    return m_array[y * width() + x];
-}
-
-void FullMatrix::setCoefficient(uint x, uint y, float f)
-{
-    nvDebugCheck( x < width() );
-    nvDebugCheck( y < height() );
-
-    m_array[y * width() + x] = f;
-}
-
-void FullMatrix::addCoefficient(uint x, uint y, float f)
-{
-    nvDebugCheck( x < width() );
-    nvDebugCheck( y < height() );
-
-    m_array[y * width() + x] += f;
-}
-
-void FullMatrix::mulCoefficient(uint x, uint y, float f)
-{
-    nvDebugCheck( x < width() );
-    nvDebugCheck( y < height() );
-
-    m_array[y * width() + x] *= f;
-}
-
-float FullMatrix::dotRow(uint y, const FullVector & v) const
-{
-    nvDebugCheck( v.dimension() == width() );
-    nvDebugCheck( y < height() );
-
-    float sum = 0;
-
-    const uint count = v.dimension();
-    for (uint i = 0; i < count; i++)
-    {
-        sum += m_array[y * count + i] * v[i];
-    }
-
-    return sum;
-}
-
-void FullMatrix::madRow(uint y, float alpha, FullVector & v) const
-{
-    nvDebugCheck( v.dimension() == width() );
-    nvDebugCheck( y < height() );
-
-    const uint count = v.dimension();
-    for (uint i = 0; i < count; i++)
-    {
-        v[i] += m_array[y * count + i];
-    }
-}
-
-
-// y = M * x
-void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y)
-{
-    mult(NoTransposed, M, x, y);
-}
-
-void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y)
-{
-    const uint w = M.width();
-    const uint h = M.height();
-
-    if (TM == Transposed)
-    {
-        nvDebugCheck( h == x.dimension() );
-        nvDebugCheck( w == y.dimension() );
-
-        y.fill(0.0f);
-
-        for (uint i = 0; i < h; i++)
-        {
-            M.madRow(i, x[i], y);
-        }
-    }
-    else
-    {
-        nvDebugCheck( w == x.dimension() );
-        nvDebugCheck( h == y.dimension() );
-
-        for (uint i = 0; i < h; i++)
-        {
-            y[i] = M.dotRow(i, x);
-        }
-    }
-}
-
-// y = alpha*A*x + beta*y
-void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
-{
-    sgemv(alpha, NoTransposed, A, x, beta, y);
-}
-
-void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
-{
-    const uint w = A.width();
-    const uint h = A.height();
-
-    if (TA == Transposed)
-    {
-        nvDebugCheck( h == x.dimension() );
-        nvDebugCheck( w == y.dimension() );
-
-        for (uint i = 0; i < h; i++)
-        {
-            A.madRow(i, alpha * x[i], y);
-        }
-    }
-    else
-    {
-        nvDebugCheck( w == x.dimension() );
-        nvDebugCheck( h == y.dimension() );
-
-        for (uint i = 0; i < h; i++)
-        {
-            y[i] = alpha * A.dotRow(i, x) + beta * y[i];
-        }
-    }
-}
-
-
-// Multiply a row of A by a column of B.
-static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B)
-{
-    const uint w = (TA == NoTransposed) ? A.width() : A.height();
-    nvDebugCheck(w == ((TB == NoTransposed) ? B.height() : A.width()));
-
-    float sum = 0.0f;
-
-    for (uint k = 0; k < w; k++)
-    {
-        const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop?
-        const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i);
-        sum += a * b;
-    }
-
-    return sum;
-}
-
-
-// C = A * B
-void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C)
-{
-    mult(NoTransposed, A, NoTransposed, B, C);
-}
-
-void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C)
-{
-    sgemm(1.0f, TA, A, TB, B, 0.0f, C);
-}
-
-// C = alpha*A*B + beta*C
-void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C)
-{
-    sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
-}
-
-void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C)
-{
-    const uint w = C.width();
-    const uint h = C.height();
-
-    uint aw = (TA == NoTransposed) ? A.width() : A.height();
-    uint ah = (TA == NoTransposed) ? A.height() : A.width();
-    uint bw = (TB == NoTransposed) ? B.width() : B.height();
-    uint bh = (TB == NoTransposed) ? B.height() : B.width();
-
-    nvDebugCheck(aw == bh);
-    nvDebugCheck(bw == ah);
-    nvDebugCheck(w == bw);
-    nvDebugCheck(h == ah);
-
-    for (uint y = 0; y < h; y++)
-    {
-        for (uint x = 0; x < w; x++)
-        {
-            float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y);
-            C.setCoefficient(x, y, c);
-        }
-    }
-}
-
-
-
-
-
-/// Ctor. Init the size of the sparse matrix.
-SparseMatrix::SparseMatrix(uint d) : m_width(d)
-{
-    m_array.resize(d);
-}
-
-/// Ctor. Init the size of the sparse matrix.
-SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w)
-{
-    m_array.resize(h);
-}
-
-SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width)
-{
-    m_array = m.m_array;
-}
-
-const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m)
-{
-    nvCheck(width() == m.width());
-    nvCheck(height() == m.height());
-
-    m_array = m.m_array;
-
-    return *this;
-}
-
-
-// x is column, y is row
-float SparseMatrix::getCoefficient(uint x, uint y) const
-{
-    nvDebugCheck( x < width() );
-    nvDebugCheck( y < height() );
-
-    const uint count = m_array[y].count();
-    for (uint i = 0; i < count; i++)
-    {
-        if (m_array[y][i].x == x) return m_array[y][i].v;
-    }
-
-    return 0.0f;
-}
-
-void SparseMatrix::setCoefficient(uint x, uint y, float f)
-{
-    nvDebugCheck( x < width() );
-    nvDebugCheck( y < height() );
-
-    const uint count = m_array[y].count();
-    for (uint i = 0; i < count; i++)
-    {
-        if (m_array[y][i].x == x) 
-        {
-            m_array[y][i].v = f;
-            return;
-        }
-    }
-
-    if (f != 0.0f)
-    {
-        Coefficient c = { x, f };
-        m_array[y].append( c );
-    }
-}
-
-void SparseMatrix::addCoefficient(uint x, uint y, float f)
-{
-    nvDebugCheck( x < width() );
-    nvDebugCheck( y < height() );
-
-    if (f != 0.0f)
-    {
-        const uint count = m_array[y].count();
-        for (uint i = 0; i < count; i++)
-        {
-            if (m_array[y][i].x == x) 
-            {
-                m_array[y][i].v += f;
-                return;
-            }
-        }
-
-        Coefficient c = { x, f };
-        m_array[y].append( c );
-    }
-}
-
-void SparseMatrix::mulCoefficient(uint x, uint y, float f)
-{
-    nvDebugCheck( x < width() );
-    nvDebugCheck( y < height() );
-
-    const uint count = m_array[y].count();
-    for (uint i = 0; i < count; i++)
-    {
-        if (m_array[y][i].x == x) 
-        {
-            m_array[y][i].v *= f;
-            return;
-        }
-    }
-
-    if (f != 0.0f)
-    {
-        Coefficient c = { x, f };
-        m_array[y].append( c );
-    }
-}
-
-
-float SparseMatrix::sumRow(uint y) const
-{
-    nvDebugCheck( y < height() );
-
-    const uint count = m_array[y].count();
-
-#if USE_KAHAN_SUM
-    KahanSum kahan;
-    for (uint i = 0; i < count; i++)
-    {
-        kahan.add(m_array[y][i].v);
-    }
-    return kahan.sum();
-#else
-    float sum = 0;
-    for (uint i = 0; i < count; i++)
-    {
-        sum += m_array[y][i].v;
-    }
-    return sum;
-#endif
-}
-
-float SparseMatrix::dotRow(uint y, const FullVector & v) const
-{
-    nvDebugCheck( y < height() );
-
-    const uint count = m_array[y].count();
-
-#if USE_KAHAN_SUM
-    KahanSum kahan;
-    for (uint i = 0; i < count; i++)
-    {
-        kahan.add(m_array[y][i].v * v[m_array[y][i].x]);
-    }
-    return kahan.sum();
-#else
-    float sum = 0;
-    for (uint i = 0; i < count; i++)
-    {
-        sum += m_array[y][i].v * v[m_array[y][i].x];
-    }
-    return sum;
-#endif
-}
-
-void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const
-{
-    nvDebugCheck(y < height());
-
-    const uint count = m_array[y].count();
-    for (uint i = 0; i < count; i++)
-    {
-        v[m_array[y][i].x] += alpha * m_array[y][i].v;
-    }
-}
-
-
-void SparseMatrix::clearRow(uint y)
-{
-    nvDebugCheck( y < height() );
-
-    m_array[y].clear();
-}
-
-void SparseMatrix::scaleRow(uint y, float f)
-{
-    nvDebugCheck( y < height() );
-
-    const uint count = m_array[y].count();
-    for (uint i = 0; i < count; i++)
-    {
-        m_array[y][i].v *= f;
-    }
-}
-
-void SparseMatrix::normalizeRow(uint y)
-{
-    nvDebugCheck( y < height() );
-
-    float norm = 0.0f;
-
-    const uint count = m_array[y].count();
-    for (uint i = 0; i < count; i++)
-    {
-        float f = m_array[y][i].v;
-        norm += f * f;
-    }
-
-    scaleRow(y, 1.0f / sqrtf(norm));
-}
-
-
-void SparseMatrix::clearColumn(uint x)
-{
-    nvDebugCheck(x < width());
-
-    for (uint y = 0; y < height(); y++)
-    {
-        const uint count = m_array[y].count();
-        for (uint e = 0; e < count; e++)
-        {
-            if (m_array[y][e].x == x)
-            {
-                m_array[y][e].v = 0.0f;
-                break;
-            }
-        }
-    }
-}
-
-void SparseMatrix::scaleColumn(uint x, float f)
-{
-    nvDebugCheck(x < width());
-
-    for (uint y = 0; y < height(); y++)
-    {
-        const uint count = m_array[y].count();
-        for (uint e = 0; e < count; e++)
-        {
-            if (m_array[y][e].x == x)
-            {
-                m_array[y][e].v *= f;
-                break;
-            }
-        }
-    }
-}
-
-const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const
-{
-    return m_array[y];
-}
-
-
-bool SparseMatrix::isSymmetric() const
-{
-    for (uint y = 0; y < height(); y++)
-    {
-        const uint count = m_array[y].count();
-        for (uint e = 0; e < count; e++)
-        {
-            const uint x = m_array[y][e].x;
-            if (x > y) {
-                float v = m_array[y][e].v;
-
-                if (!equal(getCoefficient(y, x), v)) {  // @@ epsilon
-                    return false;
-                }
-            }
-        }
-    }
-
-    return true;
-}
-
-
-// y = M * x
-void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y)
-{
-    mult(NoTransposed, M, x, y);
-}
-
-void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y)
-{
-    const uint w = M.width();
-    const uint h = M.height();
-
-    if (TM == Transposed)
-    {
-        nvDebugCheck( h == x.dimension() );
-        nvDebugCheck( w == y.dimension() );
-
-        y.fill(0.0f);
-
-        for (uint i = 0; i < h; i++)
-        {
-            M.madRow(i, x[i], y);
-        }
-    }
-    else
-    {
-        nvDebugCheck( w == x.dimension() );
-        nvDebugCheck( h == y.dimension() );
-
-        for (uint i = 0; i < h; i++)
-        {
-            y[i] = M.dotRow(i, x);
-        }
-    }
-}
-
-// y = alpha*A*x + beta*y
-void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
-{
-    sgemv(alpha, NoTransposed, A, x, beta, y);
-}
-
-void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
-{
-    const uint w = A.width();
-    const uint h = A.height();
-
-    if (TA == Transposed)
-    {
-        nvDebugCheck( h == x.dimension() );
-        nvDebugCheck( w == y.dimension() );
-
-        for (uint i = 0; i < h; i++)
-        {
-            A.madRow(i, alpha * x[i], y);
-        }
-    }
-    else
-    {
-        nvDebugCheck( w == x.dimension() );
-        nvDebugCheck( h == y.dimension() );
-
-        for (uint i = 0; i < h; i++)
-        {
-            y[i] = alpha * A.dotRow(i, x) + beta * y[i];
-        }
-    }
-}
-
-
-// dot y-row of A by x-column of B
-static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
-{
-    const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
-
-    const uint count = row.count();
-
-#if USE_KAHAN_SUM
-    KahanSum kahan;
-    for (uint i = 0; i < count; i++)
-    {
-        const SparseMatrix::Coefficient & c = row[i];
-        kahan.add(c.v * B.getCoefficient(x, c.x));
-    }
-    return kahan.sum();
-#else
-    float sum = 0.0f;
-    for (uint i = 0; i < count; i++)
-    {
-        const SparseMatrix::Coefficient & c = row[i];
-        sum += c.v * B.getCoefficient(x, c.x);
-    }
-    return sum;
-#endif
-}
-
-// dot y-row of A by x-row of B
-static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
-{
-    const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
-
-    const uint count = row.count();
-
-#if USE_KAHAN_SUM
-    KahanSum kahan;
-    for (uint i = 0; i < count; i++)
-    {
-        const SparseMatrix::Coefficient & c = row[i];
-        kahan.add(c.v * B.getCoefficient(c.x, x));
-    }
-    return kahan.sum();
-#else
-    float sum = 0.0f;
-    for (uint i = 0; i < count; i++)
-    {
-        const SparseMatrix::Coefficient & c = row[i];
-        sum += c.v * B.getCoefficient(c.x, x);
-    }
-    return sum;
-#endif
-}
-
-// dot y-column of A by x-column of B
-static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
-{
-    nvDebugCheck(A.height() == B.height());
-
-    const uint h = A.height();
-
-#if USE_KAHAN_SUM
-    KahanSum kahan;
-    for (uint i = 0; i < h; i++)
-    {
-        kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i));
-    }
-    return kahan.sum();
-#else
-    float sum = 0.0f;
-    for (uint i = 0; i < h; i++)
-    {
-        sum += A.getCoefficient(y, i) * B.getCoefficient(x, i);
-    }
-    return sum;
-#endif
-}
-
-
-void nv::transpose(const SparseMatrix & A, SparseMatrix & B)
-{
-    nvDebugCheck(A.width() == B.height());
-    nvDebugCheck(B.width() == A.height());
-
-    const uint w = A.width();
-    for (uint x = 0; x < w; x++)
-    {
-        B.clearRow(x);
-    }
-
-    const uint h = A.height();
-    for (uint y = 0; y < h; y++)
-    {
-        const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
-
-        const uint count = row.count();
-        for (uint i = 0; i < count; i++)
-        {
-            const SparseMatrix::Coefficient & c = row[i];
-            nvDebugCheck(c.x < w);
-
-            B.setCoefficient(y, c.x, c.v);
-        }
-    }
-}
-
-// C = A * B
-void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C)
-{
-    mult(NoTransposed, A, NoTransposed, B, C);
-}
-
-void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C)
-{
-    sgemm(1.0f, TA, A, TB, B, 0.0f, C);
-}
-
-// C = alpha*A*B + beta*C
-void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C)
-{
-    sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
-}
-
-void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C)
-{
-    const uint w = C.width();
-    const uint h = C.height();
-
-    uint aw = (TA == NoTransposed) ? A.width() : A.height();
-    uint ah = (TA == NoTransposed) ? A.height() : A.width();
-    uint bw = (TB == NoTransposed) ? B.width() : B.height();
-    uint bh = (TB == NoTransposed) ? B.height() : B.width();
-
-    nvDebugCheck(aw == bh);
-    nvDebugCheck(bw == ah);
-    nvDebugCheck(w == bw);
-    nvDebugCheck(h == ah);
-
-
-    for (uint y = 0; y < h; y++)
-    {
-        for (uint x = 0; x < w; x++)
-        {
-            float c = beta * C.getCoefficient(x, y);
-
-            if (TA == NoTransposed && TB == NoTransposed)
-            {
-                // dot y-row of A by x-column of B.
-                c += alpha * dotRowColumn(y, A, x, B);
-            }
-            else if (TA == Transposed && TB == Transposed)
-            {
-                // dot y-column of A by x-row of B.
-                c += alpha * dotRowColumn(x, B, y, A);
-            }
-            else if (TA == Transposed && TB == NoTransposed)
-            {
-                // dot y-column of A by x-column of B.
-                c += alpha * dotColumnColumn(y, A, x, B);
-            }
-            else if (TA == NoTransposed && TB == Transposed)
-            {
-                // dot y-row of A by x-row of B.
-                c += alpha * dotRowRow(y, A, x, B);
-            }
-
-            C.setCoefficient(x, y, c);
-        }
-    }
-}
-
-// C = At * A
-void nv::sqm(const SparseMatrix & A, SparseMatrix & C)
-{
-    // This is quite expensive...
-    mult(Transposed, A, NoTransposed, A, C);
-}
diff --git a/thirdparty/thekla_atlas/nvmath/Sparse.h b/thirdparty/thekla_atlas/nvmath/Sparse.h
deleted file mode 100644
index 6b03ed51f3..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Sparse.h
+++ /dev/null
@@ -1,204 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_SPARSE_H
-#define NV_MATH_SPARSE_H
-
-#include "nvmath.h"
-#include "nvcore/Array.h"
-
-
-// Full and sparse vector and matrix classes. BLAS subset.
-
-namespace nv
-{
-    class FullVector;
-    class FullMatrix;
-    class SparseMatrix;
-
-
-    /// Fixed size vector class.
-    class FullVector
-    {
-    public:
-
-        FullVector(uint dim);
-        FullVector(const FullVector & v);
-
-        const FullVector & operator=(const FullVector & v);
-
-        uint dimension() const { return m_array.count(); }
-
-        const float & operator[]( uint index ) const { return m_array[index]; }
-        float & operator[] ( uint index ) { return m_array[index]; }
-
-        void fill(float f);
-
-        void operator+= (const FullVector & v);
-        void operator-= (const FullVector & v);
-        void operator*= (const FullVector & v);
-
-        void operator+= (float f);
-        void operator-= (float f);
-        void operator*= (float f);
-
-
-    private:
-
-        Array<float> m_array;
-
-    };
-
-    // Pseudo-BLAS interface.
-    NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y); // y = a * x + y
-    NVMATH_API void copy(const FullVector & x, FullVector & y);
-    NVMATH_API void scal(float a, FullVector & x);
-    NVMATH_API float dot(const FullVector & x, const FullVector & y);
-
-
-    enum Transpose
-    {
-        NoTransposed = 0,
-        Transposed = 1
-    };
-
-    /// Full matrix class.
-    class FullMatrix
-    {
-    public:
-
-        FullMatrix(uint d);
-        FullMatrix(uint w, uint h);
-        FullMatrix(const FullMatrix & m);
-
-        const FullMatrix & operator=(const FullMatrix & m);
-
-        uint width() const { return m_width; }
-        uint height() const { return m_height; }
-        bool isSquare() const { return m_width == m_height; }
-
-        float getCoefficient(uint x, uint y) const;
-
-        void setCoefficient(uint x, uint y, float f);
-        void addCoefficient(uint x, uint y, float f);
-        void mulCoefficient(uint x, uint y, float f);
-
-        float dotRow(uint y, const FullVector & v) const;
-        void madRow(uint y, float alpha, FullVector & v) const;
-
-    protected:
-
-        bool isValid() const {
-            return m_array.size() == (m_width * m_height);
-        }
-
-    private:
-
-        const uint m_width;
-        const uint m_height;
-        Array<float> m_array;
-
-    };
-
-    NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y);
-    NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y);
-
-    // y = alpha*A*x + beta*y
-    NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
-    NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
-
-    NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C);
-    NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C);
-
-    // C = alpha*A*B + beta*C
-    NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C);
-    NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C);
-
-
-    /**
-    * Sparse matrix class. The matrix is assumed to be sparse and to have
-    * very few non-zero elements, for this reason it's stored in indexed 
-    * format. To multiply column vectors efficiently, the matrix stores 
-    * the elements in indexed-column order, there is a list of indexed 
-    * elements for each row of the matrix. As with the FullVector the 
-    * dimension of the matrix is constant.
-    **/
-    class SparseMatrix
-    {
-        friend class FullMatrix;
-    public:
-
-        // An element of the sparse array.
-        struct Coefficient {
-            uint x;  // column
-            float v; // value
-        };
-
-
-    public:
-
-        SparseMatrix(uint d);
-        SparseMatrix(uint w, uint h);
-        SparseMatrix(const SparseMatrix & m);
-
-        const SparseMatrix & operator=(const SparseMatrix & m);
-
-
-        uint width() const { return m_width; }
-        uint height() const { return m_array.count(); }
-        bool isSquare() const { return width() == height(); }
-
-        float getCoefficient(uint x, uint y) const; // x is column, y is row
-
-        void setCoefficient(uint x, uint y, float f);
-        void addCoefficient(uint x, uint y, float f);
-        void mulCoefficient(uint x, uint y, float f);
-
-        float sumRow(uint y) const;
-        float dotRow(uint y, const FullVector & v) const;
-        void madRow(uint y, float alpha, FullVector & v) const;
-
-        void clearRow(uint y);
-        void scaleRow(uint y, float f);
-        void normalizeRow(uint y);
-
-        void clearColumn(uint x);
-        void scaleColumn(uint x, float f);
-
-        const Array<Coefficient> & getRow(uint y) const;
-
-        bool isSymmetric() const;
-
-    private:
-
-        /// Number of columns.
-        const uint m_width;
-
-        /// Array of matrix elements.
-        Array< Array<Coefficient> > m_array;
-
-    };
-
-    NVMATH_API void transpose(const SparseMatrix & A, SparseMatrix & B);
-
-    NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y);
-    NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y);
-
-    // y = alpha*A*x + beta*y
-    NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
-    NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
-
-    NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C);
-    NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C);
-
-    // C = alpha*A*B + beta*C
-    NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C);
-    NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C);
-
-    // C = At * A
-    NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C);
-
-} // nv namespace
-
-
-#endif // NV_MATH_SPARSE_H
diff --git a/thirdparty/thekla_atlas/nvmath/Sphere.cpp b/thirdparty/thekla_atlas/nvmath/Sphere.cpp
deleted file mode 100644
index e0c1ad652c..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Sphere.cpp
+++ /dev/null
@@ -1,431 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#include "Sphere.h"
-#include "Vector.inl"
-#include "Box.inl"
-
-#include <float.h> // FLT_MAX
-
-using namespace nv;
-
-const float radiusEpsilon = 1e-4f;
-
-Sphere::Sphere(Vector3::Arg p0, Vector3::Arg p1)
-{
-    if (p0 == p1) *this = Sphere(p0);
-    else {
-        center = (p0 + p1) * 0.5f;
-        radius = length(p0 - center) + radiusEpsilon;
-
-        float d0 = length(p0 - center);
-        float d1 = length(p1 - center);
-        nvDebugCheck(equal(d0, radius - radiusEpsilon));
-        nvDebugCheck(equal(d1, radius - radiusEpsilon));
-    }
-}
-
-Sphere::Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2)
-{
-    if (p0 == p1 || p0 == p2) *this = Sphere(p1, p2);
-    else if (p1 == p2) *this = Sphere(p0, p2);
-    else {
-        Vector3 a = p1 - p0;
-        Vector3 b = p2 - p0;
-        Vector3 c = cross(a, b);
-
-        float denominator = 2.0f * lengthSquared(c);
-        
-        if (!isZero(denominator)) {
-	        Vector3 d = (lengthSquared(b) * cross(c, a) + lengthSquared(a) * cross(b, c)) / denominator;
-
-	        center = p0 + d;
-	        radius = length(d) + radiusEpsilon;
-
-            float d0 = length(p0 - center);
-            float d1 = length(p1 - center);
-            float d2 = length(p2 - center);
-            nvDebugCheck(equal(d0, radius - radiusEpsilon));
-            nvDebugCheck(equal(d1, radius - radiusEpsilon));
-            nvDebugCheck(equal(d2, radius - radiusEpsilon));
-        }
-        else {
-            // @@ This is a specialization of the code below, but really, the only thing we need to do here is to find the two most distant points.
-            // Compute all possible spheres, invalidate those that do not contain the four points, keep the smallest.
-            Sphere s0(p1, p2);
-            float d0 = distanceSquared(s0, p0);
-            if (d0 > 0) s0.radius = NV_FLOAT_MAX;
-
-            Sphere s1(p0, p2);
-            float d1 = distanceSquared(s1, p1);
-            if (d1 > 0) s1.radius = NV_FLOAT_MAX;
-
-            Sphere s2(p0, p1);
-            float d2 = distanceSquared(s2, p2);
-            if (d2 > 0) s1.radius = NV_FLOAT_MAX;
-
-            if (s0.radius < s1.radius && s0.radius < s2.radius) {
-                center = s0.center;
-                radius = s0.radius;
-            }
-            else if (s1.radius < s2.radius) {
-                center = s1.center;
-                radius = s1.radius;
-            }
-            else {
-                center = s2.center;
-                radius = s2.radius;
-            }
-        }
-    }
-}
-
-Sphere::Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2, Vector3::Arg p3)
-{
-    if (p0 == p1 || p0 == p2 || p0 == p3) *this = Sphere(p1, p2, p3);
-    else if (p1 == p2 || p1 == p3) *this = Sphere(p0, p2, p3);
-    else if (p2 == p3) *this = Sphere(p0, p1, p2);
-    else {
-        // @@ This only works if the points are not coplanar!
-	    Vector3 a = p1 - p0;
-	    Vector3 b = p2 - p0;
-	    Vector3 c = p3 - p0;
-
-        float denominator = 2.0f * dot(c, cross(a, b)); // triple product.
-
-        if (!isZero(denominator)) {
-	        Vector3 d = (lengthSquared(c) * cross(a, b) + lengthSquared(b) * cross(c, a) + lengthSquared(a) * cross(b, c)) / denominator;
-
-	        center = p0 + d;
-            radius = length(d) + radiusEpsilon;
-
-            float d0 = length(p0 - center);
-            float d1 = length(p1 - center);
-            float d2 = length(p2 - center);
-            float d3 = length(p3 - center);
-            nvDebugCheck(equal(d0, radius - radiusEpsilon));
-            nvDebugCheck(equal(d1, radius - radiusEpsilon));
-            nvDebugCheck(equal(d2, radius - radiusEpsilon));
-            nvDebugCheck(equal(d3, radius - radiusEpsilon));
-        }
-        else {
-            // Compute all possible spheres, invalidate those that do not contain the four points, keep the smallest.
-            Sphere s0(p1, p2, p3);
-            float d0 = distanceSquared(s0, p0);
-            if (d0 > 0) s0.radius = NV_FLOAT_MAX;
-
-            Sphere s1(p0, p2, p3);
-            float d1 = distanceSquared(s1, p1);
-            if (d1 > 0) s1.radius = NV_FLOAT_MAX;
-
-            Sphere s2(p0, p1, p3);
-            float d2 = distanceSquared(s2, p2);
-            if (d2 > 0) s2.radius = NV_FLOAT_MAX;
-
-            Sphere s3(p0, p1, p2);
-            float d3 = distanceSquared(s3, p3);
-            if (d3 > 0) s2.radius = NV_FLOAT_MAX;
-
-            if (s0.radius < s1.radius && s0.radius < s2.radius && s0.radius < s3.radius) {
-                center = s0.center;
-                radius = s0.radius;
-            }
-            else if (s1.radius < s2.radius && s1.radius < s3.radius) {
-                center = s1.center;
-                radius = s1.radius;
-            }
-            else if (s1.radius < s3.radius) {
-                center = s2.center;
-                radius = s2.radius;
-            }
-            else {
-                center = s3.center;
-                radius = s3.radius;
-            }
-        }
-    }
-}
-
-
-float nv::distanceSquared(const Sphere & sphere, const Vector3 & point)
-{
-    return lengthSquared(sphere.center - point) - square(sphere.radius);
-}
-
-
-
-// Implementation of "MiniBall" based on:
-// http://www.flipcode.com/archives/Smallest_Enclosing_Spheres.shtml
-
-static Sphere recurseMini(const Vector3 *P[], uint p, uint b = 0)
-{
-	Sphere MB;
-
-	switch(b)
-	{
-	case 0:
-		MB = Sphere(*P[0]);
-		break;
-	case 1:
-		MB = Sphere(*P[-1]);
-		break;
-	case 2:
-		MB = Sphere(*P[-1], *P[-2]);
-		break;
-	case 3:
-		MB = Sphere(*P[-1], *P[-2], *P[-3]);
-		break;
-	case 4:
-		MB = Sphere(*P[-1], *P[-2], *P[-3], *P[-4]);
-		return MB;
-	}
-
-	for (uint i = 0; i < p; i++)
-    {
-        if (distanceSquared(MB, *P[i]) > 0)   // Signed square distance to sphere
-		{
-			for (uint j = i; j > 0; j--)
-			{
-                swap(P[j], P[j-1]);
-			}
-
-			MB = recurseMini(P + 1, i, b + 1);
-		}
-    }
-
-	return MB;
-}
-
-static bool allInside(const Sphere & sphere, const Vector3 * pointArray, const uint pointCount) {
-    for (uint i = 0; i < pointCount; i++) {
-        if (distanceSquared(sphere, pointArray[i]) >= NV_EPSILON) {
-            return false;
-        }
-    }
-    return true;
-}
-
-
-Sphere nv::miniBall(const Vector3 * pointArray, const uint pointCount)
-{
-    nvDebugCheck(pointArray != NULL);
-    nvDebugCheck(pointCount > 0);
-
-	const Vector3 **L = new const Vector3*[pointCount];
-
-    for (uint i = 0; i < pointCount; i++) {
-		L[i] = &pointArray[i];
-    }
-
-	Sphere sphere = recurseMini(L, pointCount);
-
-	delete [] L;
-
-    nvDebugCheck(allInside(sphere, pointArray, pointCount));
-
-	return sphere;
-}
-
-
-// Approximate bounding sphere, based on "An Efficient Bounding Sphere" by Jack Ritter, from "Graphics Gems"
-Sphere nv::approximateSphere_Ritter(const Vector3 * pointArray, const uint pointCount)
-{
-    nvDebugCheck(pointArray != NULL);
-    nvDebugCheck(pointCount > 0);
-
-    Vector3 xmin, xmax, ymin, ymax, zmin, zmax;
-
-    xmin = xmax = ymin = ymax = zmin = zmax = pointArray[0];
-
-    // FIRST PASS: find 6 minima/maxima points
-    xmin.x = ymin.y = zmin.z = FLT_MAX;
-    xmax.x = ymax.y = zmax.z = -FLT_MAX;
-
-    for (uint i = 0; i < pointCount; i++)
-	{
-        const Vector3 & p = pointArray[i];
-        if (p.x < xmin.x) xmin = p;
-	    if (p.x > xmax.x) xmax = p;
-	    if (p.y < ymin.y) ymin = p;
-	    if (p.y > ymax.y) ymax = p;
-	    if (p.z < zmin.z) zmin = p;
-	    if (p.z > zmax.z) zmax = p;
-	}
-
-    float xspan = lengthSquared(xmax - xmin);
-    float yspan = lengthSquared(ymax - ymin);
-    float zspan = lengthSquared(zmax - zmin);
-
-    // Set points dia1 & dia2 to the maximally separated pair.
-    Vector3 dia1 = xmin; 
-    Vector3 dia2 = xmax;
-    float maxspan = xspan;
-    if (yspan > maxspan) {
-	    maxspan = yspan;
-	    dia1 = ymin;
-        dia2 = ymax;
-	}
-    if (zspan > maxspan) {
-	    dia1 = zmin;
-        dia2 = zmax;
-	}
-
-    // |dia1-dia2| is a diameter of initial sphere
-    
-    // calc initial center
-    Sphere sphere;
-    sphere.center = (dia1 + dia2) / 2.0f;
-
-    // calculate initial radius**2 and radius
-    float rad_sq = lengthSquared(dia2 - sphere.center);
-    sphere.radius = sqrtf(rad_sq);
-
-
-    // SECOND PASS: increment current sphere
-    for (uint i = 0; i < pointCount; i++)
-	{
-        const Vector3 & p = pointArray[i];
-
-        float old_to_p_sq = lengthSquared(p - sphere.center);
-
-	    if (old_to_p_sq > rad_sq) 	// do r**2 test first
-		{ 	
-            // this point is outside of current sphere
-		    float old_to_p = sqrtf(old_to_p_sq);
-
-		    // calc radius of new sphere
-            sphere.radius = (sphere.radius + old_to_p) / 2.0f;
-		    rad_sq = sphere.radius * sphere.radius; 	// for next r**2 compare
-    		
-            float old_to_new = old_to_p - sphere.radius;
-
-		    // calc center of new sphere
-            sphere.center = (sphere.radius * sphere.center + old_to_new * p) / old_to_p;
-		}	
-	}
-
-    nvDebugCheck(allInside(sphere, pointArray, pointCount));
-
-    return sphere;
-}
-
-
-static float computeSphereRadius(const Vector3 & center, const Vector3 * pointArray, const uint pointCount) {
-
-    float maxRadius2 = 0;
-
-    for (uint i = 0; i < pointCount; i++)
-	{
-        const Vector3 & p = pointArray[i];
-
-        float r2 = lengthSquared(center - p);
-
-        if (r2 > maxRadius2) {
-            maxRadius2 = r2;
-        }
-    }
-
-    return sqrtf(maxRadius2) + radiusEpsilon;
-}
-
-
-Sphere nv::approximateSphere_AABB(const Vector3 * pointArray, const uint pointCount)
-{
-    nvDebugCheck(pointArray != NULL);
-    nvDebugCheck(pointCount > 0);
-
-    Box box;
-    box.clearBounds();
-
-    for (uint i = 0; i < pointCount; i++) {
-        box.addPointToBounds(pointArray[i]);
-    }
-
-    Sphere sphere;
-    sphere.center = box.center();
-    sphere.radius = computeSphereRadius(sphere.center, pointArray, pointCount);
-
-    nvDebugCheck(allInside(sphere, pointArray, pointCount));
-
-    return sphere;
-}
-
-
-static void computeExtremalPoints(const Vector3 & dir, const Vector3 * pointArray, uint pointCount, Vector3 * minPoint, Vector3 * maxPoint) {
-    nvDebugCheck(pointCount > 0);
-
-    uint mini = 0;
-    uint maxi = 0;
-    float minDist = FLT_MAX;
-    float maxDist = -FLT_MAX;
-
-    for (uint i = 0; i < pointCount; i++) {
-        float d = dot(dir, pointArray[i]);
-
-        if (d < minDist) {
-            minDist = d;
-            mini = i;
-        }
-        if (d > maxDist) {
-            maxDist = d;
-            maxi = i;
-        }
-    }
-    nvDebugCheck(minDist != FLT_MAX);
-    nvDebugCheck(maxDist != -FLT_MAX);
-
-    *minPoint = pointArray[mini];
-    *maxPoint = pointArray[maxi];
-}
-
-// EPOS algorithm based on:
-// http://www.ep.liu.se/ecp/034/009/ecp083409.pdf
-Sphere nv::approximateSphere_EPOS6(const Vector3 * pointArray, uint pointCount)
-{
-    nvDebugCheck(pointArray != NULL);
-    nvDebugCheck(pointCount > 0);
-
-    Vector3 extremalPoints[6];
-
-    // Compute 6 extremal points.
-    computeExtremalPoints(Vector3(1, 0, 0), pointArray, pointCount, extremalPoints+0, extremalPoints+1);
-    computeExtremalPoints(Vector3(0, 1, 0), pointArray, pointCount, extremalPoints+2, extremalPoints+3);
-    computeExtremalPoints(Vector3(0, 0, 1), pointArray, pointCount, extremalPoints+4, extremalPoints+5);
-
-    Sphere sphere = miniBall(extremalPoints, 6);
-    sphere.radius = computeSphereRadius(sphere.center, pointArray, pointCount);
-
-    nvDebugCheck(allInside(sphere, pointArray, pointCount));
-
-    return sphere;
-}
-
-Sphere nv::approximateSphere_EPOS14(const Vector3 * pointArray, uint pointCount)
-{
-    nvDebugCheck(pointArray != NULL);
-    nvDebugCheck(pointCount > 0);
-
-    Vector3 extremalPoints[14];
-
-    // Compute 14 extremal points.
-    computeExtremalPoints(Vector3(1, 0, 0), pointArray, pointCount, extremalPoints+0, extremalPoints+1);
-    computeExtremalPoints(Vector3(0, 1, 0), pointArray, pointCount, extremalPoints+2, extremalPoints+3);
-    computeExtremalPoints(Vector3(0, 0, 1), pointArray, pointCount, extremalPoints+4, extremalPoints+5);
-
-    float d = sqrtf(1.0f/3.0f);
-
-    computeExtremalPoints(Vector3(d, d, d), pointArray, pointCount, extremalPoints+6, extremalPoints+7);
-    computeExtremalPoints(Vector3(-d, d, d), pointArray, pointCount, extremalPoints+8, extremalPoints+9);
-    computeExtremalPoints(Vector3(-d, -d, d), pointArray, pointCount, extremalPoints+10, extremalPoints+11);
-    computeExtremalPoints(Vector3(d, -d, d), pointArray, pointCount, extremalPoints+12, extremalPoints+13);
-
-
-    Sphere sphere = miniBall(extremalPoints, 14);
-    sphere.radius = computeSphereRadius(sphere.center, pointArray, pointCount);
-
-    nvDebugCheck(allInside(sphere, pointArray, pointCount));
-
-    return sphere;
-}
-
-
-
diff --git a/thirdparty/thekla_atlas/nvmath/Sphere.h b/thirdparty/thekla_atlas/nvmath/Sphere.h
deleted file mode 100644
index 300731af44..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Sphere.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#pragma once
-#ifndef NV_MATH_SPHERE_H
-#define NV_MATH_SPHERE_H
-
-#include "Vector.h"
-
-namespace nv
-{
-    
-    class Sphere
-    {
-    public:
-        Sphere() {}
-        Sphere(Vector3::Arg center, float radius) : center(center), radius(radius) {}
-
-        Sphere(Vector3::Arg center) : center(center), radius(0.0f) {}
-        Sphere(Vector3::Arg p0, Vector3::Arg p1);
-        Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2);
-        Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2, Vector3::Arg p3);
-
-        Vector3 center;
-        float radius;
-    };
-
-    // Returns negative values if point is inside.
-    float distanceSquared(const Sphere & sphere, const Vector3 &point);
-
-
-    // Welz's algorithm. Fairly slow, recursive implementation uses large stack.
-    Sphere miniBall(const Vector3 * pointArray, uint pointCount);
-
-    Sphere approximateSphere_Ritter(const Vector3 * pointArray, uint pointCount);
-    Sphere approximateSphere_AABB(const Vector3 * pointArray, uint pointCount);
-    Sphere approximateSphere_EPOS6(const Vector3 * pointArray, uint pointCount);
-    Sphere approximateSphere_EPOS14(const Vector3 * pointArray, uint pointCount);
-
-
-} // nv namespace
-
-
-#endif // NV_MATH_SPHERE_H
diff --git a/thirdparty/thekla_atlas/nvmath/TypeSerialization.cpp b/thirdparty/thekla_atlas/nvmath/TypeSerialization.cpp
deleted file mode 100644
index 72fa678f47..0000000000
--- a/thirdparty/thekla_atlas/nvmath/TypeSerialization.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#include "TypeSerialization.h"
-
-#include "nvcore/Stream.h"
-
-#include "nvmath/Vector.h"
-#include "nvmath/Matrix.h"
-#include "nvmath/Quaternion.h"
-#include "nvmath/Basis.h"
-#include "nvmath/Box.h"
-#include "nvmath/Plane.inl"
-
-using namespace nv;
-
-Stream & nv::operator<< (Stream & s, Vector2 & v)
-{
-    return s << v.x << v.y;
-}
-
-Stream & nv::operator<< (Stream & s, Vector3 & v)
-{
-    return s << v.x << v.y << v.z;
-}
-
-Stream & nv::operator<< (Stream & s, Vector4 & v)
-{
-    return s << v.x << v.y << v.z << v.w;
-}
-
-Stream & nv::operator<< (Stream & s, Matrix & m)
-{
-    return s;
-}
-
-Stream & nv::operator<< (Stream & s, Quaternion & q)
-{
-    return s << q.x << q.y << q.z << q.w;
-}
-
-Stream & nv::operator<< (Stream & s, Basis & basis)
-{
-    return s << basis.tangent << basis.bitangent << basis.normal;
-}
-
-Stream & nv::operator<< (Stream & s, Box & box)
-{
-    return s << box.minCorner << box.maxCorner;
-}
-
-Stream & nv::operator<< (Stream & s, Plane & plane)
-{
-    return s << plane.v;
-}
diff --git a/thirdparty/thekla_atlas/nvmath/TypeSerialization.h b/thirdparty/thekla_atlas/nvmath/TypeSerialization.h
deleted file mode 100644
index 32d6de827e..0000000000
--- a/thirdparty/thekla_atlas/nvmath/TypeSerialization.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
-
-#pragma once
-#ifndef NV_MATH_TYPESERIALIZATION_H
-#define NV_MATH_TYPESERIALIZATION_H
-
-#include "nvmath.h"
-
-namespace nv
-{
-    class Stream;
-
-    class Vector2;
-    class Vector3;
-    class Vector4;
-
-    class Matrix;
-    class Quaternion;
-    class Basis;
-    class Box;
-    class Plane;
-
-    NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj);
-    NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj);
-    NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj);
-
-    NVMATH_API Stream & operator<< (Stream & s, Matrix & obj);
-    NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj);
-    NVMATH_API Stream & operator<< (Stream & s, Basis & obj);
-    NVMATH_API Stream & operator<< (Stream & s, Box & obj);
-    NVMATH_API Stream & operator<< (Stream & s, Plane & obj);
-
-} // nv namespace
-
-#endif // NV_MATH_TYPESERIALIZATION_H
diff --git a/thirdparty/thekla_atlas/nvmath/Vector.cpp b/thirdparty/thekla_atlas/nvmath/Vector.cpp
deleted file mode 100644
index 9122a1b0e9..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Vector.cpp
+++ /dev/null
@@ -1,4 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#include "Vector.h"
-#include "Vector.inl"
diff --git a/thirdparty/thekla_atlas/nvmath/Vector.h b/thirdparty/thekla_atlas/nvmath/Vector.h
deleted file mode 100644
index ad18672a8a..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Vector.h
+++ /dev/null
@@ -1,149 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_VECTOR_H
-#define NV_MATH_VECTOR_H
-
-#include "nvmath.h"
-
-namespace nv
-{
-    class NVMATH_CLASS Vector2
-    {
-    public:
-        typedef Vector2 const & Arg;
-
-        Vector2();
-        explicit Vector2(float f);
-        Vector2(float x, float y);
-        Vector2(Vector2::Arg v);
-
-        //template <typename T> explicit Vector2(const T & v) : x(v.x), y(v.y) {}
-        //template <typename T> operator T() const { return T(x, y); }
-
-        const Vector2 & operator=(Vector2::Arg v);
-
-        const float * ptr() const;
-
-        void set(float x, float y);
-
-        Vector2 operator-() const;
-        void operator+=(Vector2::Arg v);
-        void operator-=(Vector2::Arg v);
-        void operator*=(float s);
-        void operator*=(Vector2::Arg v);
-
-        friend bool operator==(Vector2::Arg a, Vector2::Arg b);
-        friend bool operator!=(Vector2::Arg a, Vector2::Arg b);
-
-        union {
-            struct {
-                float x, y;
-            };
-            float component[2];
-        };
-    };
-
-    class NVMATH_CLASS Vector3
-    {
-    public:
-        typedef Vector3 const & Arg;
-
-        Vector3();
-        explicit Vector3(float x);
-        //explicit Vector3(int x) : x(float(x)), y(float(x)), z(float(x)) {}
-        Vector3(float x, float y, float z);
-        Vector3(Vector2::Arg v, float z);
-        Vector3(Vector3::Arg v);
-
-        //template <typename T> explicit Vector3(const T & v) : x(v.x), y(v.y), z(v.z) {}
-        //template <typename T> operator T() const { return T(x, y, z); }
-
-        const Vector3 & operator=(Vector3::Arg v);
-
-        Vector2 xy() const;
-
-        const float * ptr() const;
-
-        void set(float x, float y, float z);
-
-        Vector3 operator-() const;
-        void operator+=(Vector3::Arg v);
-        void operator-=(Vector3::Arg v);
-        void operator*=(float s);
-        void operator/=(float s);
-        void operator*=(Vector3::Arg v);
-        void operator/=(Vector3::Arg v);
-
-        friend bool operator==(Vector3::Arg a, Vector3::Arg b);
-        friend bool operator!=(Vector3::Arg a, Vector3::Arg b);
-
-        union {
-            struct {
-                float x, y, z;
-            };
-            float component[3];
-        };
-    };
-
-    class NVMATH_CLASS Vector4
-    {
-    public:
-        typedef Vector4 const & Arg;
-
-        Vector4();
-        explicit Vector4(float x);
-        Vector4(float x, float y, float z, float w);
-        Vector4(Vector2::Arg v, float z, float w);
-        Vector4(Vector2::Arg v, Vector2::Arg u);
-        Vector4(Vector3::Arg v, float w);
-        Vector4(Vector4::Arg v);
-        //	Vector4(const Quaternion & v);
-
-        //template <typename T> explicit Vector4(const T & v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
-        //template <typename T> operator T() const { return T(x, y, z, w); }
-
-        const Vector4 & operator=(Vector4::Arg v);
-
-        Vector2 xy() const;
-        Vector2 zw() const;
-        Vector3 xyz() const;
-
-        const float * ptr() const;
-
-        void set(float x, float y, float z, float w);
-
-        Vector4 operator-() const;
-        void operator+=(Vector4::Arg v);
-        void operator-=(Vector4::Arg v);
-        void operator*=(float s);
-        void operator/=(float s);
-        void operator*=(Vector4::Arg v);
-        void operator/=(Vector4::Arg v);
-
-        friend bool operator==(Vector4::Arg a, Vector4::Arg b);
-        friend bool operator!=(Vector4::Arg a, Vector4::Arg b);
-
-        union {
-            struct {
-                float x, y, z, w;
-            };
-            float component[4];
-        };
-    };
-
-} // nv namespace
-
-// If we had these functions, they would be ambiguous, the compiler would not know which one to pick:
-//template <typename T> Vector2 to(const T & v) { return Vector2(v.x, v.y); }
-//template <typename T> Vector3 to(const T & v) { return Vector3(v.x, v.y, v.z); }
-//template <typename T> Vector4 to(const T & v) { return Vector4(v.x, v.y, v.z, v.z); }
-
-// We could use a cast operator so that we could infer the expected type, but that doesn't work the same way in all compilers and produces horrible error messages.
-
-// Instead we simply have explicit casts:
-template <typename T> T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); }
-template <typename T> T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); }
-template <typename T> T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.w); }
-
-#endif // NV_MATH_VECTOR_H
diff --git a/thirdparty/thekla_atlas/nvmath/Vector.inl b/thirdparty/thekla_atlas/nvmath/Vector.inl
deleted file mode 100644
index bcaec7bf2a..0000000000
--- a/thirdparty/thekla_atlas/nvmath/Vector.inl
+++ /dev/null
@@ -1,919 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_VECTOR_INL
-#define NV_MATH_VECTOR_INL
-
-#include "Vector.h"
-#include "nvcore/Utils.h" // min, max
-#include "nvcore/Hash.h" // hash
-
-namespace nv
-{
-
-    // Helpers to convert vector types. Assume T has x,y members and 2 argument constructor.
-    //template <typename T> T to(Vector2::Arg v) { return T(v.x, v.y); }
-
-    // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor.
-    //template <typename T> T to(Vector3::Arg v) { return T(v.x, v.y, v.z); }
-
-    // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor.
-    //template <typename T> T to(Vector4::Arg v) { return T(v.x, v.y, v.z, v.w); }
-
-
-    // Vector2
-    inline Vector2::Vector2() {}
-    inline Vector2::Vector2(float f) : x(f), y(f) {}
-    inline Vector2::Vector2(float x, float y) : x(x), y(y) {}
-    inline Vector2::Vector2(Vector2::Arg v) : x(v.x), y(v.y) {}
-
-    inline const Vector2 & Vector2::operator=(Vector2::Arg v)
-    {
-        x = v.x;
-        y = v.y;
-        return *this;
-    }
-
-    inline const float * Vector2::ptr() const
-    {
-        return &x;
-    }
-
-    inline void Vector2::set(float x, float y)
-    {
-        this->x = x;
-        this->y = y;
-    }
-
-    inline Vector2 Vector2::operator-() const
-    {
-        return Vector2(-x, -y);
-    }
-
-    inline void Vector2::operator+=(Vector2::Arg v)
-    {
-        x += v.x;
-        y += v.y;
-    }
-
-    inline void Vector2::operator-=(Vector2::Arg v)
-    {
-        x -= v.x;
-        y -= v.y;
-    }
-
-    inline void Vector2::operator*=(float s)
-    {
-        x *= s;
-        y *= s;
-    }
-
-    inline void Vector2::operator*=(Vector2::Arg v)
-    {
-        x *= v.x;
-        y *= v.y;
-    }
-
-    inline bool operator==(Vector2::Arg a, Vector2::Arg b)
-    {
-        return a.x == b.x && a.y == b.y; 
-    }
-    inline bool operator!=(Vector2::Arg a, Vector2::Arg b)
-    {
-        return a.x != b.x || a.y != b.y; 
-    }
-
-
-    // Vector3
-    inline Vector3::Vector3() {}
-    inline Vector3::Vector3(float f) : x(f), y(f), z(f) {}
-    inline Vector3::Vector3(float x, float y, float z) : x(x), y(y), z(z) {}
-    inline Vector3::Vector3(Vector2::Arg v, float z) : x(v.x), y(v.y), z(z) {}
-    inline Vector3::Vector3(Vector3::Arg v) : x(v.x), y(v.y), z(v.z) {}
-
-    inline const Vector3 & Vector3::operator=(Vector3::Arg v)
-    {
-        x = v.x;
-        y = v.y;
-        z = v.z;
-        return *this;
-    }
-
-
-    inline Vector2 Vector3::xy() const
-    {
-        return Vector2(x, y);
-    }
-
-    inline const float * Vector3::ptr() const
-    {
-        return &x;
-    }
-
-    inline void Vector3::set(float x, float y, float z)
-    {
-        this->x = x;
-        this->y = y;
-        this->z = z;
-    }
-
-    inline Vector3 Vector3::operator-() const
-    {
-        return Vector3(-x, -y, -z);
-    }
-
-    inline void Vector3::operator+=(Vector3::Arg v)
-    {
-        x += v.x;
-        y += v.y;
-        z += v.z;
-    }
-
-    inline void Vector3::operator-=(Vector3::Arg v)
-    {
-        x -= v.x;
-        y -= v.y;
-        z -= v.z;
-    }
-
-    inline void Vector3::operator*=(float s)
-    {
-        x *= s;
-        y *= s;
-        z *= s;
-    }
-
-    inline void Vector3::operator/=(float s)
-    {
-        float is = 1.0f / s;
-        x *= is;
-        y *= is;
-        z *= is;
-    }
-
-    inline void Vector3::operator*=(Vector3::Arg v)
-    {
-        x *= v.x;
-        y *= v.y;
-        z *= v.z;
-    }
-
-    inline void Vector3::operator/=(Vector3::Arg v)
-    {
-        x /= v.x;
-        y /= v.y;
-        z /= v.z;
-    }
-
-    inline bool operator==(Vector3::Arg a, Vector3::Arg b)
-    {
-        return a.x == b.x && a.y == b.y && a.z == b.z; 
-    }
-    inline bool operator!=(Vector3::Arg a, Vector3::Arg b)
-    {
-        return a.x != b.x || a.y != b.y || a.z != b.z; 
-    }
-
-
-    // Vector4
-    inline Vector4::Vector4() {}
-    inline Vector4::Vector4(float f) : x(f), y(f), z(f), w(f) {}
-    inline Vector4::Vector4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
-    inline Vector4::Vector4(Vector2::Arg v, float z, float w) : x(v.x), y(v.y), z(z), w(w) {}
-    inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {}
-    inline Vector4::Vector4(Vector3::Arg v, float w) : x(v.x), y(v.y), z(v.z), w(w) {}
-    inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
-
-    inline const Vector4 & Vector4::operator=(const Vector4 & v)
-    {
-        x = v.x;
-        y = v.y;
-        z = v.z;
-        w = v.w;
-        return *this;
-    }
-
-    inline Vector2 Vector4::xy() const
-    {
-        return Vector2(x, y);
-    }
-
-    inline Vector2 Vector4::zw() const
-    {
-        return Vector2(z, w);
-    }
-
-    inline Vector3 Vector4::xyz() const
-    {
-        return Vector3(x, y, z);
-    }
-
-    inline const float * Vector4::ptr() const
-    {
-        return &x;
-    }
-
-    inline void Vector4::set(float x, float y, float z, float w)
-    {
-        this->x = x;
-        this->y = y;
-        this->z = z;
-        this->w = w;
-    }
-
-    inline Vector4 Vector4::operator-() const
-    {
-        return Vector4(-x, -y, -z, -w);
-    }
-
-    inline void Vector4::operator+=(Vector4::Arg v)
-    {
-        x += v.x;
-        y += v.y;
-        z += v.z;
-        w += v.w;
-    }
-
-    inline void Vector4::operator-=(Vector4::Arg v)
-    {
-        x -= v.x;
-        y -= v.y;
-        z -= v.z;
-        w -= v.w;
-    }
-
-    inline void Vector4::operator*=(float s)
-    {
-        x *= s;
-        y *= s;
-        z *= s;
-        w *= s;
-    }
-
-    inline void Vector4::operator/=(float s)
-    {
-        x /= s;
-        y /= s;
-        z /= s;
-        w /= s;
-    }
-
-    inline void Vector4::operator*=(Vector4::Arg v)
-    {
-        x *= v.x;
-        y *= v.y;
-        z *= v.z;
-        w *= v.w;
-    }
-
-    inline void Vector4::operator/=(Vector4::Arg v)
-    {
-        x /= v.x;
-        y /= v.y;
-        z /= v.z;
-        w /= v.w;
-    }
-
-    inline bool operator==(Vector4::Arg a, Vector4::Arg b)
-    {
-        return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; 
-    }
-    inline bool operator!=(Vector4::Arg a, Vector4::Arg b)
-    {
-        return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; 
-    }
-
-
-
-    // Functions
-
-
-    // Vector2
-
-    inline Vector2 add(Vector2::Arg a, Vector2::Arg b)
-    {
-        return Vector2(a.x + b.x, a.y + b.y);
-    }
-    inline Vector2 operator+(Vector2::Arg a, Vector2::Arg b)
-    {
-        return add(a, b);
-    }
-
-    inline Vector2 sub(Vector2::Arg a, Vector2::Arg b)
-    {
-        return Vector2(a.x - b.x, a.y - b.y);
-    }
-    inline Vector2 operator-(Vector2::Arg a, Vector2::Arg b)
-    {
-        return sub(a, b);
-    }
-
-    inline Vector2 scale(Vector2::Arg v, float s)
-    {
-        return Vector2(v.x * s, v.y * s);
-    }
-
-    inline Vector2 scale(Vector2::Arg v, Vector2::Arg s)
-    {
-        return Vector2(v.x * s.x, v.y * s.y);
-    }
-
-    inline Vector2 operator*(Vector2::Arg v, float s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector2 operator*(Vector2::Arg v1, Vector2::Arg v2)
-    {
-        return Vector2(v1.x*v2.x, v1.y*v2.y);
-    }
-
-    inline Vector2 operator*(float s, Vector2::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector2 operator/(Vector2::Arg v, float s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    inline Vector2 lerp(Vector2::Arg v1, Vector2::Arg v2, float t)
-    {
-        const float s = 1.0f - t;
-        return Vector2(v1.x * s + t * v2.x, v1.y * s + t * v2.y);
-    }
-
-    inline float dot(Vector2::Arg a, Vector2::Arg b)
-    {
-        return a.x * b.x + a.y * b.y;
-    }
-
-    inline float lengthSquared(Vector2::Arg v)
-    {
-        return v.x * v.x + v.y * v.y;
-    }
-
-    inline float length(Vector2::Arg v)
-    {
-        return sqrtf(lengthSquared(v));
-    }
-
-    inline float distance(Vector2::Arg a, Vector2::Arg b)
-    {
-        return length(a - b);
-    }
-
-    inline float inverseLength(Vector2::Arg v)
-    {
-        return 1.0f / sqrtf(lengthSquared(v));
-    }
-
-    inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
-    {
-        return equal(length(v), 1, epsilon);
-    }
-
-    inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        nvDebugCheck(!isZero(l, epsilon));
-        Vector2 n = scale(v, 1.0f / l);
-        nvDebugCheck(isNormalized(n));
-        return n;
-    }
-
-    inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        if (isZero(l, epsilon)) {
-            return fallback;
-        }
-        return scale(v, 1.0f / l);
-    }
-
-    // Safe, branchless normalization from Andy Firth. All error checking ommitted.
-    // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
-    inline Vector2 normalizeFast(Vector2::Arg v)
-    {
-        const float very_small_float = 1.0e-037f;
-        float l = very_small_float + length(v);
-        return scale(v, 1.0f / l);
-    }
-
-    inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
-    {
-        return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon);
-    }
-
-    inline Vector2 min(Vector2::Arg a, Vector2::Arg b)
-    {
-        return Vector2(min(a.x, b.x), min(a.y, b.y));
-    }
-
-    inline Vector2 max(Vector2::Arg a, Vector2::Arg b)
-    {
-        return Vector2(max(a.x, b.x), max(a.y, b.y));
-    }
-
-    inline Vector2 clamp(Vector2::Arg v, float min, float max)
-    {
-        return Vector2(clamp(v.x, min, max), clamp(v.y, min, max));
-    }
-
-    inline Vector2 saturate(Vector2::Arg v)
-    {
-        return Vector2(saturate(v.x), saturate(v.y));
-    }
-
-    inline bool isFinite(Vector2::Arg v)
-    {
-        return isFinite(v.x) && isFinite(v.y);
-    }
-
-    inline Vector2 validate(Vector2::Arg v, Vector2::Arg fallback = Vector2(0.0f))
-    {
-        if (!isFinite(v)) return fallback;
-        Vector2 vf = v;
-        nv::floatCleanup(vf.component, 2);
-        return vf;
-    }
-
-    // Note, this is the area scaled by 2!
-    inline float triangleArea(Vector2::Arg v0, Vector2::Arg v1)
-    {
-	    return (v0.x * v1.y - v0.y * v1.x); // * 0.5f;
-    }
-    inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
-    {
-        // IC: While it may be appealing to use the following expression:
-        //return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f;
-
-        // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point 
-        // numbers and the results becomes very unstable and dependent on the order of the factors.
-
-        // Instead, it's preferable to subtract the vertices first, and multiply the resulting small values together. The result
-        // in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of 
-        // the triangle.
-
-        //return ((a.x - c.x) * (b.y - c.y) - (a.y - c.y) * (b.x - c.x)); // * 0.5f;
-        return triangleArea(a-c, b-c);
-    }
-
-
-    template <>
-    inline uint hash(const Vector2 & v, uint h)
-    {
-        return sdbmFloatHash(v.component, 2, h);
-    }
-
-
-
-    // Vector3
-
-    inline Vector3 add(Vector3::Arg a, Vector3::Arg b)
-    {
-        return Vector3(a.x + b.x, a.y + b.y, a.z + b.z);
-    }
-    inline Vector3 add(Vector3::Arg a, float b)
-    {
-        return Vector3(a.x + b, a.y + b, a.z + b);
-    }
-    inline Vector3 operator+(Vector3::Arg a, Vector3::Arg b)
-    {
-        return add(a, b);
-    }
-    inline Vector3 operator+(Vector3::Arg a, float b)
-    {
-        return add(a, b);
-    }
-
-    inline Vector3 sub(Vector3::Arg a, Vector3::Arg b)
-    {
-        return Vector3(a.x - b.x, a.y - b.y, a.z - b.z);
-    }
-    inline Vector3 sub(Vector3::Arg a, float b)
-    {
-        return Vector3(a.x - b, a.y - b, a.z - b);
-    }
-    inline Vector3 operator-(Vector3::Arg a, Vector3::Arg b)
-    {
-        return sub(a, b);
-    }
-    inline Vector3 operator-(Vector3::Arg a, float b)
-    {
-        return sub(a, b);
-    }
-
-    inline Vector3 cross(Vector3::Arg a, Vector3::Arg b)
-    {
-        return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
-    }
-
-    inline Vector3 scale(Vector3::Arg v, float s)
-    {
-        return Vector3(v.x * s, v.y * s, v.z * s);
-    }
-
-    inline Vector3 scale(Vector3::Arg v, Vector3::Arg s)
-    {
-        return Vector3(v.x * s.x, v.y * s.y, v.z * s.z);
-    }
-
-    inline Vector3 operator*(Vector3::Arg v, float s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector3 operator*(float s, Vector3::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector3 operator*(Vector3::Arg v, Vector3::Arg s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector3 operator/(Vector3::Arg v, float s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, float s)
-    {
-        return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s);
-    }*/
-
-    inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, float t)
-    {
-        const float s = 1.0f - t;
-        return Vector3(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z);
-    }
-
-    inline float dot(Vector3::Arg a, Vector3::Arg b)
-    {
-        return a.x * b.x + a.y * b.y + a.z * b.z;
-    }
-
-    inline float lengthSquared(Vector3::Arg v)
-    {
-        return v.x * v.x + v.y * v.y + v.z * v.z;
-    }
-
-    inline float length(Vector3::Arg v)
-    {
-        return sqrtf(lengthSquared(v));
-    }
-
-    inline float distance(Vector3::Arg a, Vector3::Arg b)
-    {
-        return length(a - b);
-    }
-
-    inline float distanceSquared(Vector3::Arg a, Vector3::Arg b)
-    {
-        return lengthSquared(a - b);
-    }
-
-    inline float inverseLength(Vector3::Arg v)
-    {
-        return 1.0f / sqrtf(lengthSquared(v));
-    }
-
-    inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
-    {
-        return equal(length(v), 1, epsilon);
-    }
-
-    inline Vector3 normalize(Vector3::Arg v, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        nvDebugCheck(!isZero(l, epsilon));
-        Vector3 n = scale(v, 1.0f / l);
-        nvDebugCheck(isNormalized(n));
-        return n;
-    }
-
-    inline Vector3 normalizeSafe(Vector3::Arg v, Vector3::Arg fallback, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        if (isZero(l, epsilon)) {
-            return fallback;
-        }
-        return scale(v, 1.0f / l);
-    }
-
-    // Safe, branchless normalization from Andy Firth. All error checking ommitted.
-    // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
-    inline Vector3 normalizeFast(Vector3::Arg v)
-    {
-        const float very_small_float = 1.0e-037f;
-        float l = very_small_float + length(v);
-        return scale(v, 1.0f / l);
-    }
-
-    inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON)
-    {
-        return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon);
-    }
-
-    inline Vector3 min(Vector3::Arg a, Vector3::Arg b)
-    {
-        return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
-    }
-
-    inline Vector3 max(Vector3::Arg a, Vector3::Arg b)
-    {
-        return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
-    }
-
-    inline Vector3 clamp(Vector3::Arg v, float min, float max)
-    {
-        return Vector3(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max));
-    }
-
-    inline Vector3 saturate(Vector3::Arg v)
-    {
-        return Vector3(saturate(v.x), saturate(v.y), saturate(v.z));
-    }
-
-    inline Vector3 floor(Vector3::Arg v)
-    {
-        return Vector3(floorf(v.x), floorf(v.y), floorf(v.z));
-    }
-
-    inline Vector3 ceil(Vector3::Arg v)
-    {
-        return Vector3(ceilf(v.x), ceilf(v.y), ceilf(v.z));
-    }
-
-    inline bool isFinite(Vector3::Arg v)
-    {
-        return isFinite(v.x) && isFinite(v.y) && isFinite(v.z);
-    }
-
-    inline Vector3 validate(Vector3::Arg v, Vector3::Arg fallback = Vector3(0.0f))
-    {
-        if (!isFinite(v)) return fallback;
-        Vector3 vf = v;
-        nv::floatCleanup(vf.component, 3);
-        return vf;
-    }
-
-    inline Vector3 reflect(Vector3::Arg v, Vector3::Arg n)
-    {
-	    return v - (2 * dot(v, n)) * n;
-    }
-
-    template <>
-    inline uint hash(const Vector3 & v, uint h)
-    {
-        return sdbmFloatHash(v.component, 3, h);
-    }
-
-
-    // Vector4
-
-    inline Vector4 add(Vector4::Arg a, Vector4::Arg b)
-    {
-        return Vector4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
-    }
-    inline Vector4 operator+(Vector4::Arg a, Vector4::Arg b)
-    {
-        return add(a, b);
-    }
-
-    inline Vector4 sub(Vector4::Arg a, Vector4::Arg b)
-    {
-        return Vector4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
-    }
-    inline Vector4 operator-(Vector4::Arg a, Vector4::Arg b)
-    {
-        return sub(a, b);
-    }
-
-    inline Vector4 scale(Vector4::Arg v, float s)
-    {
-        return Vector4(v.x * s, v.y * s, v.z * s, v.w * s);
-    }
-
-    inline Vector4 scale(Vector4::Arg v, Vector4::Arg s)
-    {
-        return Vector4(v.x * s.x, v.y * s.y, v.z * s.z, v.w * s.w);
-    }
-
-    inline Vector4 operator*(Vector4::Arg v, float s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector4 operator*(float s, Vector4::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector4 operator*(Vector4::Arg v, Vector4::Arg s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector4 operator/(Vector4::Arg v, float s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    /*inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, float s)
-    {
-        return Vector4(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s, a.w + b.w * s);
-    }*/
-
-    inline Vector4 lerp(Vector4::Arg v1, Vector4::Arg v2, float t)
-    {
-        const float s = 1.0f - t;
-        return Vector4(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z, v1.w * s + t * v2.w);
-    }
-
-    inline float dot(Vector4::Arg a, Vector4::Arg b)
-    {
-        return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
-    }
-
-    inline float lengthSquared(Vector4::Arg v)
-    {
-        return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
-    }
-
-    inline float length(Vector4::Arg v)
-    {
-        return sqrtf(lengthSquared(v));
-    }
-
-    inline float inverseLength(Vector4::Arg v)
-    {
-        return 1.0f / sqrtf(lengthSquared(v));
-    }
-
-    inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
-    {
-        return equal(length(v), 1, epsilon);
-    }
-
-    inline Vector4 normalize(Vector4::Arg v, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        nvDebugCheck(!isZero(l, epsilon));
-        Vector4 n = scale(v, 1.0f / l);
-        nvDebugCheck(isNormalized(n));
-        return n;
-    }
-
-    inline Vector4 normalizeSafe(Vector4::Arg v, Vector4::Arg fallback, float epsilon = NV_EPSILON)
-    {
-        float l = length(v);
-        if (isZero(l, epsilon)) {
-            return fallback;
-        }
-        return scale(v, 1.0f / l);
-    }
-
-    // Safe, branchless normalization from Andy Firth. All error checking ommitted.
-    // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
-    inline Vector4 normalizeFast(Vector4::Arg v)
-    {
-        const float very_small_float = 1.0e-037f;
-        float l = very_small_float + length(v);
-        return scale(v, 1.0f / l);
-    }
-
-    inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON)
-    {
-        return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon);
-    }
-
-    inline Vector4 min(Vector4::Arg a, Vector4::Arg b)
-    {
-        return Vector4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
-    }
-
-    inline Vector4 max(Vector4::Arg a, Vector4::Arg b)
-    {
-        return Vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
-    }
-
-    inline Vector4 clamp(Vector4::Arg v, float min, float max)
-    {
-        return Vector4(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max), clamp(v.w, min, max));
-    }
-
-    inline Vector4 saturate(Vector4::Arg v)
-    {
-        return Vector4(saturate(v.x), saturate(v.y), saturate(v.z), saturate(v.w));
-    }
-
-    inline bool isFinite(Vector4::Arg v)
-    {
-        return isFinite(v.x) && isFinite(v.y) && isFinite(v.z) && isFinite(v.w);
-    }
-
-    inline Vector4 validate(Vector4::Arg v, Vector4::Arg fallback = Vector4(0.0f))
-    {
-        if (!isFinite(v)) return fallback;
-        Vector4 vf = v;
-        nv::floatCleanup(vf.component, 4);
-        return vf;
-    }
-
-    template <>
-    inline uint hash(const Vector4 & v, uint h)
-    {
-        return sdbmFloatHash(v.component, 4, h);
-    }
-
-
-#if NV_OS_IOS // LLVM is not happy with implicit conversion of immediate constants to float
-
-    //int:
-
-    inline Vector2 scale(Vector2::Arg v, int s)
-    {
-        return Vector2(v.x * s, v.y * s);
-    }
-
-    inline Vector2 operator*(Vector2::Arg v, int s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector2 operator*(int s, Vector2::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector2 operator/(Vector2::Arg v, int s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    inline Vector3 scale(Vector3::Arg v, int s)
-    {
-        return Vector3(v.x * s, v.y * s, v.z * s);
-    }
-
-    inline Vector3 operator*(Vector3::Arg v, int s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector3 operator*(int s, Vector3::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector3 operator/(Vector3::Arg v, int s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    inline Vector4 scale(Vector4::Arg v, int s)
-    {
-        return Vector4(v.x * s, v.y * s, v.z * s, v.w * s);
-    }
-
-    inline Vector4 operator*(Vector4::Arg v, int s)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector4 operator*(int s, Vector4::Arg v)
-    {
-        return scale(v, s);
-    }
-
-    inline Vector4 operator/(Vector4::Arg v, int s)
-    {
-        return scale(v, 1.0f/s);
-    }
-
-    //double:
-
-    inline Vector3 operator*(Vector3::Arg v, double s)
-    {
-        return scale(v, (float)s);
-    }
-
-    inline Vector3 operator*(double s, Vector3::Arg v)
-    {
-        return scale(v, (float)s);
-    }
-
-    inline Vector3 operator/(Vector3::Arg v, double s)
-    {
-        return scale(v, 1.f/((float)s));
-    }    
-        
-#endif //NV_OS_IOS
-
-} // nv namespace
-
-#endif // NV_MATH_VECTOR_INL
diff --git a/thirdparty/thekla_atlas/nvmath/ftoi.h b/thirdparty/thekla_atlas/nvmath/ftoi.h
deleted file mode 100644
index 182c56d1c3..0000000000
--- a/thirdparty/thekla_atlas/nvmath/ftoi.h
+++ /dev/null
@@ -1,261 +0,0 @@
-// This code is in the public domain -- castano@gmail.com
-
-#pragma once
-#ifndef NV_MATH_FTOI_H
-#define NV_MATH_FTOI_H
-
-#include "nvmath/nvmath.h"
-
-#include <math.h>
-
-namespace nv
-{
-    // Optimized float to int conversions. See:
-    // http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html
-    // http://www.stereopsis.com/sree/fpu2006.html
-    // http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/
-    // http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point
-
-
-    union DoubleAnd64 {
-        uint64    i;
-        double    d;
-    };
-
-    static const double floatutil_xs_doublemagic = (6755399441055744.0);                            // 2^52 * 1.5
-    static const double floatutil_xs_doublemagicdelta = (1.5e-8);                                   // almost .5f = .5f + 1e^(number of exp bit)
-    static const double floatutil_xs_doublemagicroundeps = (0.5f - floatutil_xs_doublemagicdelta);  // almost .5f = .5f - 1e^(number of exp bit)
-
-    NV_FORCEINLINE int ftoi_round_xs(double val, double magic) {
-#if 1
-        DoubleAnd64 dunion;
-        dunion.d = val + magic;
-        return (int32) dunion.i; // just cast to grab the bottom bits
-#else
-        val += magic;
-        return ((int*)&val)[0]; // @@ Assumes little endian.
-#endif
-    }
-
-    NV_FORCEINLINE int ftoi_round_xs(float val) {
-        return ftoi_round_xs(val, floatutil_xs_doublemagic);
-    }
-
-    NV_FORCEINLINE int ftoi_floor_xs(float val) {
-        return ftoi_round_xs(val - floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
-    }
-
-    NV_FORCEINLINE int ftoi_ceil_xs(float val) {
-        return ftoi_round_xs(val + floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
-    }
-
-    NV_FORCEINLINE int ftoi_trunc_xs(float val) {
-        return (val<0) ? ftoi_ceil_xs(val) : ftoi_floor_xs(val);
-    }
-
-// -- GODOT start --
-//#if NV_CPU_X86 || NV_CPU_X86_64
-#if NV_USE_SSE
-// -- GODOT end --
-
-    NV_FORCEINLINE int ftoi_round_sse(float f) {
-        return _mm_cvt_ss2si(_mm_set_ss(f));
-    }
-
-    NV_FORCEINLINE int ftoi_trunc_sse(float f) {
-      return _mm_cvtt_ss2si(_mm_set_ss(f));
-    }
-
-#endif
-
-
-
-#if NV_USE_SSE
-
-    NV_FORCEINLINE int ftoi_round(float val) {
-        return ftoi_round_sse(val);
-    }
-
-    NV_FORCEINLINE int ftoi_trunc(float f) {
-      return ftoi_trunc_sse(f);
-    }
-
-    // We can probably do better than this. See for example:
-    // http://dss.stephanierct.com/DevBlog/?p=8
-    NV_FORCEINLINE int ftoi_floor(float val) {
-        return ftoi_round(floorf(val));
-    }
-
-    NV_FORCEINLINE int ftoi_ceil(float val) {
-        return ftoi_round(ceilf(val));
-    }
-
-#else
-
-    // In theory this should work with any double floating point math implementation, but it appears that MSVC produces incorrect code
-    // when SSE2 is targeted and fast math is enabled (/arch:SSE2 & /fp:fast). These problems go away with /fp:precise, which is the default mode.
-
-    NV_FORCEINLINE int ftoi_round(float val) {
-        return ftoi_round_xs(val);
-    }
-
-    NV_FORCEINLINE int ftoi_floor(float val) {
-        return ftoi_floor_xs(val);
-    }
-
-    NV_FORCEINLINE int ftoi_ceil(float val) {
-        return ftoi_ceil_xs(val);
-    }
-
-    NV_FORCEINLINE int ftoi_trunc(float f) {
-      return ftoi_trunc_xs(f);
-    }
-
-#endif
-
-
-    inline void test_ftoi() {
-
-        // Round to nearest integer.
-        nvCheck(ftoi_round(0.1f) == 0);
-        nvCheck(ftoi_round(0.6f) == 1);
-        nvCheck(ftoi_round(-0.2f) == 0);
-        nvCheck(ftoi_round(-0.7f) == -1);
-        nvCheck(ftoi_round(10.1f) == 10);
-        nvCheck(ftoi_round(10.6f) == 11);
-        nvCheck(ftoi_round(-90.1f) == -90);
-        nvCheck(ftoi_round(-90.6f) == -91);
-
-        nvCheck(ftoi_round(0) == 0);
-        nvCheck(ftoi_round(1) == 1);
-        nvCheck(ftoi_round(-1) == -1);
-        
-        nvCheck(ftoi_round(0.5f) == 0);  // How are midpoints rounded? Bankers rounding.
-        nvCheck(ftoi_round(1.5f) == 2);
-        nvCheck(ftoi_round(2.5f) == 2);
-        nvCheck(ftoi_round(3.5f) == 4);
-        nvCheck(ftoi_round(4.5f) == 4);
-        nvCheck(ftoi_round(-0.5f) == 0);
-        nvCheck(ftoi_round(-1.5f) == -2);
-                
-
-        // Truncation (round down if > 0, round up if < 0).
-        nvCheck(ftoi_trunc(0.1f) == 0);
-        nvCheck(ftoi_trunc(0.6f) == 0);
-        nvCheck(ftoi_trunc(-0.2f) == 0);
-        nvCheck(ftoi_trunc(-0.7f) == 0);    // @@ When using /arch:SSE2 in Win32, msvc produce wrong code for this one. It is skipping the addition.
-        nvCheck(ftoi_trunc(1.99f) == 1);
-        nvCheck(ftoi_trunc(-1.2f) == -1);
-
-        // Floor (round down).
-        nvCheck(ftoi_floor(0.1f) == 0);
-        nvCheck(ftoi_floor(0.6f) == 0);
-        nvCheck(ftoi_floor(-0.2f) == -1);
-        nvCheck(ftoi_floor(-0.7f) == -1);
-        nvCheck(ftoi_floor(1.99f) == 1);
-        nvCheck(ftoi_floor(-1.2f) == -2);
-
-        nvCheck(ftoi_floor(0) == 0);
-        nvCheck(ftoi_floor(1) == 1);
-        nvCheck(ftoi_floor(-1) == -1);
-        nvCheck(ftoi_floor(2) == 2);
-        nvCheck(ftoi_floor(-2) == -2);
-
-        // Ceil (round up).
-        nvCheck(ftoi_ceil(0.1f) == 1);
-        nvCheck(ftoi_ceil(0.6f) == 1);
-        nvCheck(ftoi_ceil(-0.2f) == 0);
-        nvCheck(ftoi_ceil(-0.7f) == 0);
-        nvCheck(ftoi_ceil(1.99f) == 2);
-        nvCheck(ftoi_ceil(-1.2f) == -1);
-
-        nvCheck(ftoi_ceil(0) == 0);
-        nvCheck(ftoi_ceil(1) == 1);
-        nvCheck(ftoi_ceil(-1) == -1);
-        nvCheck(ftoi_ceil(2) == 2);
-        nvCheck(ftoi_ceil(-2) == -2);
-    }
-
-
-
-
-
-    // Safe versions using standard casts.
-
-    inline int iround(float f)
-    {
-        return ftoi_round(f);
-        //return int(floorf(f + 0.5f));
-    }
-
-    inline int iround(double f)
-    {
-        return int(::floor(f + 0.5));
-    }
-
-    inline int ifloor(float f)
-    {
-        return ftoi_floor(f);
-        //return int(floorf(f));
-    }
-
-    inline int iceil(float f)
-    {
-        return int(ceilf(f));
-    }
-
-
-
-    // I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule.
-    // Some notes from cbloom: http://cbloomrants.blogspot.com/2011/07/07-26-11-pixel-int-to-float-options.html
-
-    // Quantize a float in the [0,1] range, using exact end points or uniform bins.
-    inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) {
-        nvDebugCheck(bits <= 16);
-
-        float range = float(1 << bits);
-        if (exactEndPoints) {
-            return floorf(x * (range-1) + 0.5f) / (range-1);
-        }
-        else {
-            return (floorf(x * range) + 0.5f) / range;
-        }
-    }
-
-
-    // This is the most common rounding mode:
-    // 
-    //   0     1       2     3
-    // |___|_______|_______|___|
-    // 0                       1
-    //
-    // You get that if you take the unit floating point number multiply by 'N-1' and round to nearest. That is, `i = round(f * (N-1))`.
-    // You reconstruct the original float dividing by 'N-1': `f = i / (N-1)`
-
-
-    //    0     1     2     3
-    // |_____|_____|_____|_____|
-    // 0                       1
-
-    /*enum BinningMode {
-        RoundMode_ExactEndPoints,       
-        RoundMode_UniformBins,
-    };*/
-
-    template <int N>
-    inline uint unitFloatToFixed(float f) {
-        return ftoi_round(f * ((1<<N)-1));
-    }
-
-    inline uint8 unitFloatToFixed8(float f) {
-        return (uint8)unitFloatToFixed<8>(f);
-    }
-
-    inline uint16 unitFloatToFixed16(float f) {
-        return (uint16)unitFloatToFixed<16>(f);
-    }
-
-
-} // nv
-
-#endif // NV_MATH_FTOI_H
diff --git a/thirdparty/thekla_atlas/nvmath/nvmath.h b/thirdparty/thekla_atlas/nvmath/nvmath.h
deleted file mode 100644
index a697f9293d..0000000000
--- a/thirdparty/thekla_atlas/nvmath/nvmath.h
+++ /dev/null
@@ -1,342 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#pragma once
-#ifndef NV_MATH_H
-#define NV_MATH_H
-
-#include "nvcore/nvcore.h"
-#include "nvcore/Debug.h"   // nvDebugCheck
-#include "nvcore/Utils.h"   // max, clamp
-
-#include <math.h>
-
-#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
-#include <float.h>  // finite, isnan
-#endif
-
-// -- GODOT start --
-//#if NV_CPU_X86 || NV_CPU_X86_64
-//    //#include <intrin.h>
-//    #include <xmmintrin.h>
-//#endif
-// -- GODOT end --
-
-
-
-// Function linkage
-#if NVMATH_SHARED
-#ifdef NVMATH_EXPORTS
-#define NVMATH_API DLL_EXPORT
-#define NVMATH_CLASS DLL_EXPORT_CLASS
-#else
-#define NVMATH_API DLL_IMPORT
-#define NVMATH_CLASS DLL_IMPORT
-#endif
-#else // NVMATH_SHARED
-#define NVMATH_API
-#define NVMATH_CLASS
-#endif // NVMATH_SHARED
-
-// Set some reasonable defaults.
-#ifndef NV_USE_ALTIVEC
-#   define NV_USE_ALTIVEC NV_CPU_PPC
-//#   define NV_USE_ALTIVEC defined(__VEC__)
-#endif
-
-#ifndef NV_USE_SSE
-#   if NV_CPU_X86_64
-        // x64 always supports at least SSE2
-#       define NV_USE_SSE 2
-#   elif NV_CC_MSVC && defined(_M_IX86_FP)
-        // Also on x86 with the /arch:SSE flag in MSVC.
-#       define NV_USE_SSE _M_IX86_FP       // 1=SSE, 2=SS2
-#   elif defined(__SSE__)
-#       define NV_USE_SSE 1
-#   elif defined(__SSE2__)
-#       define NV_USE_SSE 2
-#   else
-        // Otherwise we assume no SSE.
-#       define NV_USE_SSE 0
-#   endif
-#endif
-
-
-// Internally set NV_USE_SIMD when either altivec or sse is available.
-#if NV_USE_ALTIVEC && NV_USE_SSE
-#	error "Cannot enable both altivec and sse!"
-#endif
-
-
-// -- GODOT start --
-#if NV_USE_SSE
-    //#include <intrin.h>
-    #include <xmmintrin.h>
-#endif
-// -- GODOT end --
-
-
-#ifndef PI
-#define PI                  float(3.1415926535897932384626433833)
-#endif
-
-#define NV_EPSILON          (0.0001f)
-#define NV_NORMAL_EPSILON   (0.001f)
-
-/*
-#define SQ(r)               ((r)*(r))
-
-#define SIGN_BITMASK        0x80000000
-
-/// Integer representation of a floating-point value.
-#define IR(x)               ((uint32 &)(x))
-
-/// Absolute integer representation of a floating-point value
-#define AIR(x)              (IR(x) & 0x7fffffff)
-
-/// Floating-point representation of an integer value.
-#define FR(x)               ((float&)(x))
-
-/// Integer-based comparison of a floating point value.
-/// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
-#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
-*/
-
-extern "C" inline double sqrt_assert(const double f)
-{
-    nvDebugCheck(f >= 0.0f);
-    return sqrt(f);
-}
-
-inline float sqrtf_assert(const float f)
-{
-    nvDebugCheck(f >= 0.0f);
-    return sqrtf(f);
-}
-
-extern "C" inline double acos_assert(const double f) 
-{
-    nvDebugCheck(f >= -1.0f && f <= 1.0f);
-    return acos(f);
-}
-
-inline float acosf_assert(const float f)
-{
-    nvDebugCheck(f >= -1.0f && f <= 1.0f);
-    return acosf(f);
-}
-
-extern "C" inline double asin_assert(const double f)
-{
-    nvDebugCheck(f >= -1.0f && f <= 1.0f);
-    return asin(f);
-}
-
-inline float asinf_assert(const float f)
-{
-    nvDebugCheck(f >= -1.0f && f <= 1.0f);
-    return asinf(f);
-}
-
-// Replace default functions with asserting ones.
-#if !NV_CC_MSVC || (NV_CC_MSVC && (_MSC_VER < 1700))    // IC: Apparently this was causing problems in Visual Studio 2012. See Issue 194: https://code.google.com/p/nvidia-texture-tools/issues/detail?id=194
-#define sqrt sqrt_assert
-#define sqrtf sqrtf_assert
-#define acos acos_assert
-#define acosf acosf_assert
-#define asin asin_assert
-#define asinf asinf_assert
-#endif
-
-#if NV_CC_MSVC
-NV_FORCEINLINE float log2f(float x)
-{
-    nvCheck(x >= 0);
-    return logf(x) / logf(2.0f);
-}
-NV_FORCEINLINE float exp2f(float x)
-{
-    return powf(2.0f, x);
-}
-#endif
-
-namespace nv
-{
-    inline float toRadian(float degree) { return degree * (PI / 180.0f); }
-    inline float toDegree(float radian) { return radian * (180.0f / PI); }
-
-    // Robust floating point comparisons:
-    // http://realtimecollisiondetection.net/blog/?p=89
-    inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
-    {
-        //return fabs(f0-f1) <= epsilon;
-        return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1));
-    }
-
-    inline bool isZero(const float f, const float epsilon = NV_EPSILON)
-    {
-        return fabs(f) <= epsilon;
-    }
-
-    inline bool isFinite(const float f)
-    {
-#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
-        return _finite(f) != 0;
-#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS || NV_OS_LINUX
-        return isfinite(f);
-#else
-#   error "isFinite not supported"
-#endif
-        //return std::isfinite (f);
-        //return finite (f);
-    }
-
-    inline bool isNan(const float f)
-    {
-#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
-        return _isnan(f) != 0;
-#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS || NV_OS_LINUX
-        return isnan(f);
-#else
-#   error "isNan not supported"
-#endif
-    }
-
-    inline uint log2(uint32 i)
-    {
-        uint32 value = 0;
-        while( i >>= 1 ) value++;
-        return value;
-    }
-
-    inline uint log2(uint64 i)
-    {
-        uint64 value = 0;
-        while (i >>= 1) value++;
-        return U32(value);
-    }
-
-    inline float lerp(float f0, float f1, float t)
-    {
-        const float s = 1.0f - t;
-        return f0 * s + f1 * t;
-    }
-
-    inline float square(float f) { return f * f; }
-    inline int square(int i) { return i * i; }
-
-    inline float cube(float f) { return f * f * f; }
-    inline int cube(int i) { return i * i * i; }
-
-    inline float frac(float f)
-    {
-        return f - floor(f);
-    }
-
-    inline float floatRound(float f)
-    {
-        return floorf(f + 0.5f);
-    }
-
-    // Eliminates negative zeros from a float array.
-    inline void floatCleanup(float * fp, int n)
-    {
-        for (int i = 0; i < n; i++) {
-            //nvDebugCheck(isFinite(fp[i]));
-            union { float f; uint32 i; } x = { fp[i] };
-            if (x.i == 0x80000000) fp[i] = 0.0f;
-        }
-    }
-
-    inline float saturate(float f) {
-        return clamp(f, 0.0f, 1.0f);
-    }
-
-    inline float linearstep(float edge0, float edge1, float x) {
-        // Scale, bias and saturate x to 0..1 range
-        return saturate((x - edge0) / (edge1 - edge0));
-    }
-
-    inline float smoothstep(float edge0, float edge1, float x) {
-        x = linearstep(edge0, edge1, x); 
-
-        // Evaluate polynomial
-        return x*x*(3 - 2*x);
-    }
-
-    inline int sign(float a)
-    {
-        return (a > 0) - (a < 0);
-        //if (a > 0.0f) return 1;
-        //if (a < 0.0f) return -1;
-        //return 0;
-    }
-
-    union Float754 {
-        unsigned int raw;
-        float value;
-        struct {
-        #if NV_BIG_ENDIAN
-            unsigned int negative:1;
-            unsigned int biasedexponent:8;
-            unsigned int mantissa:23;
-        #else
-            unsigned int mantissa:23;
-            unsigned int biasedexponent:8;
-            unsigned int negative:1;
-        #endif
-        } field;
-    };
-
-    // Return the exponent of x ~ Floor(Log2(x))
-    inline int floatExponent(float x)
-    {
-        Float754 f;
-        f.value = x;
-        return (f.field.biasedexponent - 127);
-    }
-
-
-    // FloatRGB9E5
-    union Float3SE {
-        uint32 v;
-        struct {
-        #if NV_BIG_ENDIAN
-            uint32 e : 5;
-            uint32 zm : 9;
-            uint32 ym : 9;
-            uint32 xm : 9;
-        #else
-            uint32 xm : 9;
-            uint32 ym : 9;
-            uint32 zm : 9;
-            uint32 e : 5;
-        #endif
-        };
-    };
-
-    // FloatR11G11B10
-    union Float3PK {
-        uint32 v;
-        struct {
-        #if NV_BIG_ENDIAN
-            uint32 ze : 5;
-            uint32 zm : 5;
-            uint32 ye : 5;
-            uint32 ym : 6;
-            uint32 xe : 5;
-            uint32 xm : 6;
-        #else
-            uint32 xm : 6;
-            uint32 xe : 5;
-            uint32 ym : 6;
-            uint32 ye : 5;
-            uint32 zm : 5;
-            uint32 ze : 5;
-        #endif
-        };
-    };
-
-
-} // nv
-
-#endif // NV_MATH_H