123 files changed, 31679 insertions, 0 deletions
diff --git a/thirdparty/thekla_atlas/LICENSE b/thirdparty/thekla_atlas/LICENSE
new file mode 100644
index 0000000000..164e7d3a2b
--- /dev/null
+++ b/thirdparty/thekla_atlas/LICENSE
@@ -0,0 +1,8 @@
+Copyright (c) 2013 Thekla, Inc
+
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/thirdparty/thekla_atlas/nvconfig.h b/thirdparty/thekla_atlas/nvconfig.h
new file mode 100644
index 0000000000..815bc3ec75
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvconfig.h
@@ -0,0 +1,37 @@
+#ifndef NV_CONFIG
+#define NV_CONFIG
+
+#if NV_OS_DARWIN
+
+// Hardcoded.
+
+#define NV_HAVE_UNISTD_H
+#define NV_HAVE_STDARG_H
+#define NV_HAVE_SIGNAL_H
+#define NV_HAVE_EXECINFO_H
+//#define NV_HAVE_MALLOC_H
+
+#else
+
+//#define HAVE_UNISTD_H
+#define NV_HAVE_STDARG_H
+//#define HAVE_SIGNAL_H
+//#define HAVE_EXECINFO_H
+//#define HAVE_MALLOC_H
+
+#endif
+
+//#define HAVE_OPENMP // Only in MSVC pro edition.
+
+//#cmakedefine HAVE_PNG
+//#cmakedefine HAVE_JPEG
+//#cmakedefine HAVE_TIFF
+//#cmakedefine HAVE_OPENEXR
+//#cmakedefine HAVE_FREEIMAGE
+#if !NV_OS_IOS
+#define NV_HAVE_STBIMAGE
+#endif
+
+//#cmakedefine HAVE_MAYA
+
+#endif // NV_CONFIG
diff --git a/thirdparty/thekla_atlas/nvcore/Array.h b/thirdparty/thekla_atlas/nvcore/Array.h
new file mode 100644
index 0000000000..b295cb2b0c
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/Array.h
@@ -0,0 +1,182 @@
+// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_ARRAY_H
+#define NV_CORE_ARRAY_H
+
+/*
+This array class requires the elements to be relocable; it uses memmove and realloc. Ideally I should be 
+using swap, but I honestly don't care. The only thing that you should be aware of is that internal pointers
+are not supported.
+
+Note also that push_back and resize does not support inserting arguments elements that are in the same 
+container. This is forbidden to prevent an extra copy.
+*/
+
+
+#include "Memory.h"
+#include "Debug.h"
+#include "ForEach.h" // PseudoIndex
+
+
+namespace nv 
+{
+    class Stream;
+
+    /**
+    * Replacement for std::vector that is easier to debug and provides
+    * some nice foreach enumerators. 
+    */
+    template<typename T>
+    class NVCORE_CLASS Array {
+    public:
+        typedef uint size_type;
+
+        // Default constructor.
+        NV_FORCEINLINE Array() : m_buffer(NULL), m_capacity(0), m_size(0) {}
+
+        // Copy constructor.
+        NV_FORCEINLINE Array(const Array & a) : m_buffer(NULL), m_capacity(0), m_size(0) {
+            copy(a.m_buffer, a.m_size);
+        }
+
+        // Constructor that initializes the vector with the given elements.
+        NV_FORCEINLINE Array(const T * ptr, uint num) : m_buffer(NULL), m_capacity(0), m_size(0) {
+            copy(ptr, num);
+        }
+
+        // Allocate array.
+        NV_FORCEINLINE explicit Array(uint capacity) : m_buffer(NULL), m_capacity(0), m_size(0) {
+            setArrayCapacity(capacity);
+        }
+
+        // Destructor.
+        NV_FORCEINLINE ~Array() {
+            clear();
+            free<T>(m_buffer);
+        }
+
+
+        /// Const element access.
+        NV_FORCEINLINE const T & operator[]( uint index ) const
+        {
+            nvDebugCheck(index < m_size);
+            return m_buffer[index];
+        }
+        NV_FORCEINLINE const T & at( uint index ) const
+        {
+            nvDebugCheck(index < m_size);
+            return m_buffer[index];
+        }
+
+        /// Element access.
+        NV_FORCEINLINE T & operator[] ( uint index )
+        {
+            nvDebugCheck(index < m_size);
+            return m_buffer[index];
+        }
+        NV_FORCEINLINE T & at( uint index )
+        {
+            nvDebugCheck(index < m_size);
+            return m_buffer[index];
+        }
+
+        /// Get vector size.
+        NV_FORCEINLINE uint size() const { return m_size; }
+
+        /// Get vector size.
+        NV_FORCEINLINE uint count() const { return m_size; }
+
+        /// Get vector capacity.
+        NV_FORCEINLINE uint capacity() const { return m_capacity; }
+
+        /// Get const vector pointer.
+        NV_FORCEINLINE const T * buffer() const { return m_buffer; }
+
+        /// Get vector pointer.
+        NV_FORCEINLINE T * buffer() { return m_buffer; }
+
+        /// Provide begin/end pointers for C++11 range-based for loops.
+        NV_FORCEINLINE T * begin() { return m_buffer; }
+        NV_FORCEINLINE T * end() { return m_buffer + m_size; }
+        NV_FORCEINLINE const T * begin() const { return m_buffer; }
+        NV_FORCEINLINE const T * end() const { return m_buffer + m_size; }
+
+        /// Is vector empty.
+        NV_FORCEINLINE bool isEmpty() const { return m_size == 0; }
+
+        /// Is a null vector.
+        NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; }
+
+
+        T & append();
+        void push_back( const T & val );
+        void pushBack( const T & val );
+        Array<T> & append( const T & val );
+        Array<T> & operator<< ( T & t );
+        void pop_back();
+        void popBack(uint count = 1);
+        void popFront(uint count = 1);
+        const T & back() const;
+        T & back();
+        const T & front() const;
+        T & front();
+        bool contains(const T & e) const;
+        bool find(const T & element, uint * indexPtr) const;
+        bool find(const T & element, uint begin, uint end, uint * indexPtr) const;
+        void removeAt(uint index);
+        bool remove(const T & element);
+        void insertAt(uint index, const T & val = T());
+        void append(const Array<T> & other);
+        void append(const T other[], uint count);
+        void replaceWithLast(uint index);
+        void resize(uint new_size);
+        void resize(uint new_size, const T & elem);
+        void fill(const T & elem);
+        void clear();
+        void shrink();
+        void reserve(uint desired_size);
+        void copy(const T * data, uint count);
+        Array<T> & operator=( const Array<T> & a );
+        T * release();
+
+
+        // Array enumerator.
+        typedef uint PseudoIndex;
+
+        NV_FORCEINLINE PseudoIndex start() const { return 0; }
+        NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
+        NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
+
+#if NV_NEED_PSEUDOINDEX_WRAPPER
+        NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
+            return m_buffer[i(this)];
+        }
+        NV_FORCEINLINE const T & operator[]( const PseudoIndexWrapper & i ) const {
+            return m_buffer[i(this)];
+        }
+#endif
+
+        // Friends.
+        template <typename Typ> 
+        friend Stream & operator<< ( Stream & s, Array<Typ> & p );
+
+        template <typename Typ>
+        friend void swap(Array<Typ> & a, Array<Typ> & b);
+
+
+    protected:
+
+        void setArraySize(uint new_size);
+        void setArrayCapacity(uint new_capacity);
+
+        T * m_buffer;
+        uint m_capacity;
+        uint m_size;
+
+    };
+
+
+} // nv namespace
+
+#endif // NV_CORE_ARRAY_H
diff --git a/thirdparty/thekla_atlas/nvcore/Array.inl b/thirdparty/thekla_atlas/nvcore/Array.inl
new file mode 100644
index 0000000000..0b4de28ba9
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/Array.inl
@@ -0,0 +1,452 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_ARRAY_INL
+#define NV_CORE_ARRAY_INL
+
+#include "Array.h"
+
+#include "Stream.h"
+#include "Utils.h" // swap
+
+#include <string.h>	// memmove
+#include <new> // for placement new
+
+
+
+namespace nv 
+{
+    template <typename T>
+    NV_FORCEINLINE T & Array<T>::append()
+    {
+        uint old_size = m_size;
+        uint new_size = m_size + 1;
+
+        setArraySize(new_size);
+
+        construct_range(m_buffer, new_size, old_size);
+
+        return m_buffer[old_size]; // Return reference to last element.
+    }
+
+    // Push an element at the end of the vector.
+    template <typename T>
+    NV_FORCEINLINE void Array<T>::push_back( const T & val )
+    {
+#if 1
+        nvDebugCheck(&val < m_buffer || &val >= m_buffer+m_size);
+
+        uint old_size = m_size;
+        uint new_size = m_size + 1;
+
+        setArraySize(new_size);
+
+        construct_range(m_buffer, new_size, old_size, val);
+#else
+        uint new_size = m_size + 1;
+
+        if (new_size > m_capacity)
+        {
+            // @@ Is there any way to avoid this copy?
+            // @@ Can we create a copy without side effects? Ie. without calls to constructor/destructor. Use alloca + memcpy?
+            // @@ Assert instead of copy?
+            const T copy(val);	// create a copy in case value is inside of this array.
+
+            setArraySize(new_size);
+
+            new (m_buffer+new_size-1) T(copy);
+        }
+        else
+        {
+            m_size = new_size;
+            new(m_buffer+new_size-1) T(val);
+        }
+#endif // 0/1
+    }
+    template <typename T>
+    NV_FORCEINLINE void Array<T>::pushBack( const T & val )
+    {
+        push_back(val);
+    }
+    template <typename T>
+    NV_FORCEINLINE Array<T> & Array<T>::append( const T & val )
+    {
+        push_back(val);
+        return *this;
+    }
+
+    // Qt like push operator.
+    template <typename T>
+    NV_FORCEINLINE Array<T> & Array<T>::operator<< ( T & t )
+    {
+        push_back(t);
+        return *this;
+    }
+
+    // Pop the element at the end of the vector.
+    template <typename T>
+    NV_FORCEINLINE void Array<T>::pop_back()
+    {
+        nvDebugCheck( m_size > 0 );
+        resize( m_size - 1 );
+    }
+    template <typename T>
+    NV_FORCEINLINE void Array<T>::popBack(uint count)
+    {
+        nvDebugCheck(m_size >= count);
+        resize(m_size - count);
+    }
+
+    template <typename T>
+    NV_FORCEINLINE void Array<T>::popFront(uint count)
+    {
+        nvDebugCheck(m_size >= count);
+        //resize(m_size - count);
+
+        if (m_size == count) {
+            clear();
+        }
+        else {
+            destroy_range(m_buffer, 0, count);
+
+            memmove(m_buffer, m_buffer + count, sizeof(T) * (m_size - count));
+
+            m_size -= count;
+        }
+
+    }
+
+
+    // Get back element.
+    template <typename T>
+    NV_FORCEINLINE const T & Array<T>::back() const
+    {
+        nvDebugCheck( m_size > 0 );
+        return m_buffer[m_size-1];
+    }
+
+    // Get back element.
+    template <typename T>
+    NV_FORCEINLINE T & Array<T>::back()
+    {
+        nvDebugCheck( m_size > 0 );
+        return m_buffer[m_size-1];
+    }
+
+    // Get front element.
+    template <typename T>
+    NV_FORCEINLINE const T & Array<T>::front() const
+    {
+        nvDebugCheck( m_size > 0 );
+        return m_buffer[0];
+    }
+
+    // Get front element.
+    template <typename T>
+    NV_FORCEINLINE T & Array<T>::front()
+    {
+        nvDebugCheck( m_size > 0 );
+        return m_buffer[0];
+    }
+
+    // Check if the given element is contained in the array.
+    template <typename T>
+    NV_FORCEINLINE bool Array<T>::contains(const T & e) const
+    {
+        return find(e, NULL);
+    }
+
+    // Return true if element found.
+    template <typename T>
+    NV_FORCEINLINE bool Array<T>::find(const T & element, uint * indexPtr) const
+    {
+        return find(element, 0, m_size, indexPtr);
+    }
+
+    // Return true if element found within the given range.
+    template <typename T>
+    NV_FORCEINLINE bool Array<T>::find(const T & element, uint begin, uint end, uint * indexPtr) const
+    {
+        return ::nv::find(element, m_buffer, begin, end, indexPtr);
+    }
+
+
+    // Remove the element at the given index. This is an expensive operation!
+    template <typename T>
+    void Array<T>::removeAt(uint index)
+    {
+        nvDebugCheck(index >= 0 && index < m_size);
+
+        if (m_size == 1) {
+            clear();
+        }
+        else {
+            m_buffer[index].~T();
+
+            memmove(m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index));
+            m_size--;
+        }
+    }
+
+    // Remove the first instance of the given element.
+    template <typename T>
+    bool Array<T>::remove(const T & element)
+    {
+        uint index;
+        if (find(element, &index)) {
+            removeAt(index);
+            return true;
+        }
+        return false;
+    }
+
+    // Insert the given element at the given index shifting all the elements up.
+    template <typename T>
+    void Array<T>::insertAt(uint index, const T & val/*=T()*/)
+    {
+        nvDebugCheck( index >= 0 && index <= m_size );
+
+        setArraySize(m_size + 1);
+
+        if (index < m_size - 1) {
+            memmove(m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index));
+        }
+
+        // Copy-construct into the newly opened slot.
+        new(m_buffer+index) T(val);
+    }
+
+    // Append the given data to our vector.
+    template <typename T>
+    NV_FORCEINLINE void Array<T>::append(const Array<T> & other)
+    {
+        append(other.m_buffer, other.m_size);
+    }
+
+    // Append the given data to our vector.
+    template <typename T>
+    void Array<T>::append(const T other[], uint count)
+    {
+        if (count > 0) {
+            const uint old_size = m_size;
+
+            setArraySize(m_size + count);
+
+            for (uint i = 0; i < count; i++ ) {
+                new(m_buffer + old_size + i) T(other[i]);
+            }
+        }
+    }
+
+
+    // Remove the given element by replacing it with the last one.
+    template <typename T> 
+    void Array<T>::replaceWithLast(uint index)
+    {
+        nvDebugCheck( index < m_size );
+        nv::swap(m_buffer[index], back());      // @@ Is this OK when index == size-1?
+        (m_buffer+m_size-1)->~T();
+        m_size--;
+    }
+
+    // Resize the vector preserving existing elements.
+    template <typename T> 
+    void Array<T>::resize(uint new_size)
+    {
+        uint old_size = m_size;
+
+        // Destruct old elements (if we're shrinking).
+        destroy_range(m_buffer, new_size, old_size);
+
+        setArraySize(new_size);
+
+        // Call default constructors
+        construct_range(m_buffer, new_size, old_size);
+    }
+
+
+    // Resize the vector preserving existing elements and initializing the
+    // new ones with the given value.
+    template <typename T> 
+    void Array<T>::resize(uint new_size, const T & elem)
+    {
+        nvDebugCheck(&elem < m_buffer || &elem > m_buffer+m_size);
+
+        uint old_size = m_size;
+
+        // Destruct old elements (if we're shrinking).
+        destroy_range(m_buffer, new_size, old_size);
+
+        setArraySize(new_size);
+
+        // Call copy constructors
+        construct_range(m_buffer, new_size, old_size, elem);
+    }
+
+    // Fill array with the given value.
+    template <typename T>
+    void Array<T>::fill(const T & elem)
+    {
+        fill(m_buffer, m_size, elem);
+    }
+
+    // Clear the buffer.
+    template <typename T> 
+    NV_FORCEINLINE void Array<T>::clear()
+    {
+        nvDebugCheck(isValidPtr(m_buffer));
+
+        // Destruct old elements
+        destroy_range(m_buffer, 0, m_size);
+
+        m_size = 0;
+    }
+
+    // Shrink the allocated vector.
+    template <typename T> 
+    NV_FORCEINLINE void Array<T>::shrink()
+    {
+        if (m_size < m_capacity) {
+            setArrayCapacity(m_size);
+        }
+    }
+
+    // Preallocate space.
+    template <typename T> 
+    NV_FORCEINLINE void Array<T>::reserve(uint desired_size)
+    {
+        if (desired_size > m_capacity) {
+            setArrayCapacity(desired_size);
+        }
+    }
+
+    // Copy elements to this array. Resizes it if needed.
+    template <typename T>
+    NV_FORCEINLINE void Array<T>::copy(const T * data, uint count)
+    {
+#if 1   // More simple, but maybe not be as efficient?
+        destroy_range(m_buffer, 0, m_size);
+
+        setArraySize(count);
+
+        construct_range(m_buffer, count, 0, data);
+#else
+        const uint old_size = m_size;
+
+        destroy_range(m_buffer, count, old_size);
+
+        setArraySize(count);
+
+        copy_range(m_buffer, data, old_size);
+
+        construct_range(m_buffer, count, old_size, data);
+#endif
+    }
+
+    // Assignment operator.
+    template <typename T>
+    NV_FORCEINLINE Array<T> & Array<T>::operator=( const Array<T> & a )
+    {
+        copy(a.m_buffer, a.m_size);
+        return *this;
+    }
+
+    // Release ownership of allocated memory and returns pointer to it.
+    template <typename T>
+    T * Array<T>::release() {
+        T * tmp = m_buffer;
+        m_buffer = NULL;
+        m_capacity = 0;
+        m_size = 0;
+        return tmp;
+    }
+
+
+
+    // Change array size.
+    template <typename T> 
+    inline void Array<T>::setArraySize(uint new_size) {
+        m_size = new_size;
+
+        if (new_size > m_capacity) {
+            uint new_buffer_size;
+            if (m_capacity == 0) {
+                // first allocation is exact
+                new_buffer_size = new_size;
+            }
+            else {
+                // following allocations grow array by 25%
+                new_buffer_size = new_size + (new_size >> 2);
+            }
+
+            setArrayCapacity( new_buffer_size );
+        }
+    }
+
+    // Change array capacity.
+    template <typename T> 
+    inline void Array<T>::setArrayCapacity(uint new_capacity) {
+        nvDebugCheck(new_capacity >= m_size);
+
+        if (new_capacity == 0) {
+            // free the buffer.
+            if (m_buffer != NULL) {
+                free<T>(m_buffer);
+                m_buffer = NULL;
+            }
+        }
+        else {
+            // realloc the buffer
+            m_buffer = realloc<T>(m_buffer, new_capacity);
+        }
+
+        m_capacity = new_capacity;
+    }
+
+    // Array serialization.
+    template <typename Typ> 
+    inline Stream & operator<< ( Stream & s, Array<Typ> & p )
+    {
+        if (s.isLoading()) {
+            uint size;
+            s << size;
+            p.resize( size );
+        }
+        else {
+            s << p.m_size;
+        }
+
+        for (uint i = 0; i < p.m_size; i++) {
+            s << p.m_buffer[i];
+        }
+
+        return s;
+    }
+
+    // Swap the members of the two given vectors.
+    template <typename Typ>
+    inline void swap(Array<Typ> & a, Array<Typ> & b)
+    {
+        nv::swap(a.m_buffer, b.m_buffer);
+        nv::swap(a.m_capacity, b.m_capacity);
+        nv::swap(a.m_size, b.m_size);
+    }
+
+
+} // nv namespace
+
+// IC: These functions are for compatibility with the Foreach macro in The Witness.
+template <typename T> inline int item_count(const nv::Array<T> & array) { return array.count(); }
+template <typename T> inline const T & item_at(const nv::Array<T> & array, int i) { return array.at(i); }
+template <typename T> inline T & item_at(nv::Array<T> & array, int i) { return array.at(i); }
+template <typename T> inline int item_advance(const nv::Array<T> & array, int i) { return ++i; }
+template <typename T> inline int item_remove(nv::Array<T> & array, int i) { array.replaceWithLast(i); return i - 1; }
+
+template <typename T> inline int item_count(const nv::Array<T> * array) { return array->count(); }
+template <typename T> inline const T & item_at(const nv::Array<T> * array, int i) { return array->at(i); }
+template <typename T> inline T & item_at(nv::Array<T> * array, int i) { return array->at(i); }
+template <typename T> inline int item_advance(const nv::Array<T> * array, int i) { return ++i; }
+template <typename T> inline int item_remove(nv::Array<T> * array, int i) { array->replaceWithLast(i); return i - 1; }
+
+
+#endif // NV_CORE_ARRAY_INL
diff --git a/thirdparty/thekla_atlas/nvcore/BitArray.h b/thirdparty/thekla_atlas/nvcore/BitArray.h
new file mode 100644
index 0000000000..23cf880694
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/BitArray.h
@@ -0,0 +1,250 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_BITARRAY_H
+#define NV_CORE_BITARRAY_H
+
+#include "nvcore.h"
+#include "Array.inl"
+
+namespace nv
+{
+
+    // @@ Uh, this could be much faster.
+    inline uint countSetBits(uint32 x) {
+        uint count = 0;
+        for(; x != 0; x >>= 1) {
+            count += (x & 1);
+        }
+        return count;
+    }
+
+    // @@ This is even more lame. What was I thinking?
+    inline uint countSetBits(uint32 x, int bits) {
+        uint count = 0;
+        for(; x != 0 && bits != 0; x >>= 1, bits--) {
+            count += (x & 1);
+        }
+        return count;
+    }
+
+    // See "Conditionally set or clear bits without branching" at http://graphics.stanford.edu/~seander/bithacks.html
+    inline uint setBits(uint w, uint m, bool b) {
+        return (w & ~m) | (-int(b) & m);
+    }
+
+
+
+    // Simple bit array.
+    class BitArray
+    {
+    public:
+
+        BitArray() {}
+        BitArray(uint sz) {
+            resize(sz);
+        }
+
+        uint size() const { return m_size; }
+        void clear() { resize(0); }
+
+        void resize(uint new_size)
+        {
+            m_size = new_size;
+            m_wordArray.resize( (m_size + 31) >> 5 );
+        }
+
+        void resize(uint new_size, bool init)
+        {
+            //if (new_size == m_size) return;
+
+            uint old_size = m_size;
+            uint size_mod_32 = old_size & 31;
+            uint last_word_index = ((old_size + 31) >> 5) - 1;
+            uint mask = (1 << size_mod_32) - 1;
+
+            uint init_dword;
+            if (init) {
+                if (size_mod_32) m_wordArray[last_word_index] |= ~mask;
+                init_dword = ~0;
+            }
+            else {
+                if (size_mod_32) m_wordArray[last_word_index] &= mask;
+                init_dword = 0;
+            }
+
+            m_size = new_size;
+            m_wordArray.resize((new_size + 31) >> 5, init_dword);
+
+            // Make sure new bits are initialized correctly.
+            /*for (uint i = old_size; i < new_size; i++) {
+                nvCheck(bitAt(i) == init);
+            }*/
+        }
+
+
+        /// Get bit.
+        bool bitAt(uint b) const
+        {
+            nvDebugCheck( b < m_size );
+            return (m_wordArray[b >> 5] & (1 << (b & 31))) != 0;
+        }
+
+        // It may be useful to pack mulitple bit arrays together interleaving their bits.
+        uint bitsAt(uint idx, uint count) const
+        {
+            //nvDebugCheck(count == 2 || count == 4 || count == 8 || count == 16 || count == 32);
+            nvDebugCheck(count == 2);   // @@ Hardcoded for two.
+            uint b = idx * count;
+            nvDebugCheck(b < m_size);
+            return (m_wordArray[b >> 5] & (0x3 << (b & 31))) >> (b & 31);
+        }
+
+        // It would be useful to have a function to set two bits simultaneously.
+        /*void setBitsAt(uint idx, uint count, uint bits) const
+        {
+            //nvDebugCheck(count == 2 || count == 4 || count == 8 || count == 16 || count == 32);
+            nvDebugCheck(count == 2);   // @@ Hardcoded for two.
+            uint b = idx * count;
+            nvDebugCheck(b < m_size);
+            return (m_wordArray[b >> 5] & (0x3 << (b & 31))) >> (b & 31);
+        }*/
+
+
+
+        // Set a bit.
+        void setBitAt(uint idx)
+        {
+            nvDebugCheck(idx < m_size);
+            m_wordArray[idx >> 5] |=  (1 << (idx & 31));
+        }
+
+        // Clear a bit.
+        void clearBitAt(uint idx)
+        {
+            nvDebugCheck(idx < m_size);
+            m_wordArray[idx >> 5] &= ~(1 << (idx & 31));
+        }
+
+        // Toggle a bit.
+        void toggleBitAt(uint idx)
+        {
+            nvDebugCheck(idx < m_size);
+            m_wordArray[idx >> 5] ^= (1 << (idx & 31));
+        }
+
+        // Set a bit to the given value. @@ Rename modifyBitAt? 
+        void setBitAt(uint idx, bool b)
+        {
+            nvDebugCheck(idx < m_size);
+            m_wordArray[idx >> 5] = setBits(m_wordArray[idx >> 5], 1 << (idx & 31), b);
+            nvDebugCheck(bitAt(idx) == b);
+        }
+
+        void append(bool value)
+        {
+            resize(m_size + 1);
+            setBitAt(m_size - 1, value);
+        }
+
+
+        // Clear all the bits.
+        void clearAll()
+        {
+            memset(m_wordArray.buffer(), 0, m_wordArray.size() * sizeof(uint));
+        }
+
+        // Set all the bits.
+        void setAll()
+        {
+            memset(m_wordArray.buffer(), 0xFF, m_wordArray.size() * sizeof(uint));
+        }
+
+        // Toggle all the bits.
+        void toggleAll()
+        {
+            const uint wordCount = m_wordArray.count();
+            for(uint b = 0; b < wordCount; b++) {
+                m_wordArray[b] ^= 0xFFFFFFFF;
+            }
+        }
+
+        // Count the number of bits set.
+        uint countSetBits() const
+        {
+            const uint num = m_wordArray.size();
+            if( num == 0 ) {
+                return 0;
+            }
+
+            uint count = 0;				
+            for(uint i = 0; i < num - 1; i++) {
+                count += nv::countSetBits(m_wordArray[i]);
+            }
+            count += nv::countSetBits(m_wordArray[num - 1], m_size & 31);
+
+            //piDebugCheck(count + countClearBits() == m_size);
+            return count;
+        }
+
+        // Count the number of bits clear.
+        uint countClearBits() const {
+
+            const uint num = m_wordArray.size();
+            if( num == 0 ) {
+                return 0;
+            }
+
+            uint count = 0;
+            for(uint i = 0; i < num - 1; i++) {
+                count += nv::countSetBits(~m_wordArray[i]);
+            }
+            count += nv::countSetBits(~m_wordArray[num - 1], m_size & 31);
+
+            //piDebugCheck(count + countSetBits() == m_size);
+            return count;
+        }
+
+        friend void swap(BitArray & a, BitArray & b)
+        {
+            swap(a.m_size, b.m_size);
+            swap(a.m_wordArray, b.m_wordArray);
+        }
+
+        void operator &= (const BitArray & other) {
+            if (other.m_size != m_size) {
+                resize(other.m_size);
+            }
+
+            const uint wordCount = m_wordArray.count();
+            for (uint i = 0; i < wordCount; i++) {
+                m_wordArray[i] &= other.m_wordArray[i];
+            }
+        }
+
+        void operator |= (const BitArray & other) {
+            if (other.m_size != m_size) {
+                resize(other.m_size);
+            }
+
+            const uint wordCount = m_wordArray.count();
+            for (uint i = 0; i < wordCount; i++) {
+                m_wordArray[i] |= other.m_wordArray[i];
+            }
+        }
+
+
+    private:
+
+        // Number of bits stored.
+        uint m_size;
+
+        // Array of bits.
+        Array<uint> m_wordArray;
+
+    };
+
+} // nv namespace
+
+#endif // NV_CORE_BITARRAY_H
+
diff --git a/thirdparty/thekla_atlas/nvcore/Debug.cpp b/thirdparty/thekla_atlas/nvcore/Debug.cpp
new file mode 100644
index 0000000000..75ac6beb75
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/Debug.cpp
@@ -0,0 +1,1347 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "Debug.h"
+#include "Array.inl"
+#include "StrLib.h" // StringBuilder
+
+#include "StdStream.h" // fileOpen
+
+#include <stdlib.h>
+
+// Extern
+#if NV_OS_WIN32 //&& NV_CC_MSVC
+#   define WIN32_LEAN_AND_MEAN
+#   define VC_EXTRALEAN
+#   include <windows.h>
+#   include <direct.h>
+#   if NV_CC_MSVC
+#       include <crtdbg.h>
+#       if _MSC_VER < 1300
+#           define DECLSPEC_DEPRECATED
+// VC6: change this path to your Platform SDK headers
+#           include <dbghelp.h> // must be XP version of file
+//          include "M:\\dev7\\vs\\devtools\\common\\win32sdk\\include\\dbghelp.h"
+#       else
+// VC7: ships with updated headers
+#           include <dbghelp.h>
+#       endif
+#   endif
+#   pragma comment(lib,"dbghelp.lib")
+#endif
+
+#if NV_OS_XBOX
+#    include <Xtl.h>
+#    ifdef _DEBUG
+#        include <xbdm.h>
+#    endif //_DEBUG
+#endif //NV_OS_XBOX
+
+#if !NV_OS_WIN32 && defined(NV_HAVE_SIGNAL_H)
+#   include <signal.h>
+#endif
+
+#if NV_OS_UNIX
+#   include <unistd.h> // getpid
+#endif
+
+#if NV_OS_LINUX && defined(NV_HAVE_EXECINFO_H)
+#   include <execinfo.h> // backtrace
+#   if NV_CC_GNUC // defined(NV_HAVE_CXXABI_H)
+#       include <cxxabi.h>
+#   endif
+#endif
+
+#if NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD
+#   include <sys/types.h>
+#   include <sys/param.h>
+#   include <sys/sysctl.h> // sysctl
+#   if !defined(NV_OS_OPENBSD)
+#       include <sys/ucontext.h>
+#   endif
+#   if defined(NV_HAVE_EXECINFO_H) // only after OSX 10.5
+#       include <execinfo.h> // backtrace
+#       if NV_CC_GNUC // defined(NV_HAVE_CXXABI_H)
+#           include <cxxabi.h>
+#       endif
+#   endif
+#endif
+
+#if NV_OS_ORBIS
+#include <libdbg.h>
+#endif
+
+#if NV_OS_DURANGO
+#include "Windows.h"
+#include <winnt.h>
+#include <crtdbg.h>
+#include <dbghelp.h>
+#include <errhandlingapi.h>
+#define NV_USE_SEPARATE_THREAD 0
+#else
+#define NV_USE_SEPARATE_THREAD 1
+#endif
+
+
+
+using namespace nv;
+
+namespace 
+{
+
+    static MessageHandler * s_message_handler = NULL;
+    static AssertHandler * s_assert_handler = NULL;
+
+    static bool s_sig_handler_enabled = false;
+    static bool s_interactive = true;
+
+#if (NV_OS_WIN32 && NV_CC_MSVC) || NV_OS_DURANGO
+
+    // Old exception filter.
+    static LPTOP_LEVEL_EXCEPTION_FILTER s_old_exception_filter = NULL;
+
+#elif !NV_OS_WIN32 && defined(NV_HAVE_SIGNAL_H)
+
+    // Old signal handlers.
+    struct sigaction s_old_sigsegv;
+    struct sigaction s_old_sigtrap;
+    struct sigaction s_old_sigfpe;
+    struct sigaction s_old_sigbus;
+
+#endif
+
+
+#if (NV_OS_WIN32 && NV_CC_MSVC) || NV_OS_DURANGO
+
+    // We should try to simplify the top level filter as much as possible.
+    // http://www.nynaeve.net/?p=128
+
+    // The critical section enforcing the requirement that only one exception be
+    // handled by a handler at a time.
+    static CRITICAL_SECTION s_handler_critical_section;
+
+#if NV_USE_SEPARATE_THREAD
+    // Semaphores used to move exception handling between the exception thread
+    // and the handler thread.  handler_start_semaphore_ is signalled by the
+    // exception thread to wake up the handler thread when an exception occurs.
+    // handler_finish_semaphore_ is signalled by the handler thread to wake up
+    // the exception thread when handling is complete.
+    static HANDLE s_handler_start_semaphore = NULL;
+    static HANDLE s_handler_finish_semaphore = NULL;
+
+    // The exception handler thread.
+    static HANDLE s_handler_thread = NULL;
+
+    static DWORD s_requesting_thread_id = 0;
+    static EXCEPTION_POINTERS * s_exception_info = NULL;
+
+#endif // NV_USE_SEPARATE_THREAD
+
+
+    struct MinidumpCallbackContext {
+        ULONG64 memory_base;
+        ULONG memory_size;
+        bool finished;
+    };
+
+#if NV_OS_WIN32
+    // static
+    static BOOL CALLBACK miniDumpWriteDumpCallback(PVOID context, const PMINIDUMP_CALLBACK_INPUT callback_input, PMINIDUMP_CALLBACK_OUTPUT callback_output)
+    {
+        switch (callback_input->CallbackType)
+        {
+        case MemoryCallback: {
+            MinidumpCallbackContext* callback_context = reinterpret_cast<MinidumpCallbackContext*>(context);
+            if (callback_context->finished)
+                return FALSE;
+
+            // Include the specified memory region.
+            callback_output->MemoryBase = callback_context->memory_base;
+            callback_output->MemorySize = callback_context->memory_size;
+            callback_context->finished = true;
+            return TRUE;
+        }
+
+        // Include all modules.
+        case IncludeModuleCallback:
+        case ModuleCallback:
+            return TRUE;
+
+        // Include all threads.
+        case IncludeThreadCallback:
+        case ThreadCallback:
+            return TRUE;
+
+        // Stop receiving cancel callbacks.
+        case CancelCallback:
+            callback_output->CheckCancel = FALSE;
+            callback_output->Cancel = FALSE;
+            return TRUE;
+        }
+
+        // Ignore other callback types.
+        return FALSE;
+    }
+#endif
+
+    static bool writeMiniDump(EXCEPTION_POINTERS * pExceptionInfo)
+    {
+#if NV_OS_DURANGO
+        // Get a handle to the minidump method.
+        typedef BOOL(WINAPI* MiniDumpWriteDumpPfn) (
+            _In_ HANDLE hProcess,
+            _In_ DWORD ProcessId,
+            _In_ HANDLE hFile,
+            _In_ MINIDUMP_TYPE DumpType,
+            _In_opt_ PMINIDUMP_EXCEPTION_INFORMATION ExceptionParam,
+            _In_opt_ PMINIDUMP_USER_STREAM_INFORMATION UserStreamParam,
+            _Reserved_ PVOID CallbackParam
+            );
+        MiniDumpWriteDumpPfn MiniDumpWriteDump = NULL;
+        HMODULE hToolHelpModule = ::LoadLibraryW(L"toolhelpx.dll");
+        if (hToolHelpModule != INVALID_HANDLE_VALUE) {
+            MiniDumpWriteDump = reinterpret_cast<MiniDumpWriteDumpPfn>(::GetProcAddress(hToolHelpModule, "MiniDumpWriteDump"));
+            if (!MiniDumpWriteDump) {
+                FreeLibrary(hToolHelpModule);
+                return false;
+            }
+        }
+        else
+            return false;
+
+        // Generate a decent filename.
+        nv::Path application_path(256);
+        HINSTANCE hinstance = GetModuleHandle(NULL);
+        GetModuleFileName(hinstance, application_path.str(), 256);
+        application_path.stripExtension();
+        const char * application_name = application_path.fileName();
+
+        SYSTEMTIME local_time;
+        GetLocalTime(&local_time);
+
+        char dump_filename[MAX_PATH] = {};
+        sprintf_s(dump_filename, "d:\\%s-%04d%02d%02d-%02d%02d%02d.dmp",
+            application_name,
+            local_time.wYear, local_time.wMonth, local_time.wDay,
+            local_time.wHour, local_time.wMinute, local_time.wSecond );
+#else
+        const char* dump_filename = "crash.dmp";
+#endif
+
+        // create the file
+        HANDLE hFile = CreateFileA(dump_filename, GENERIC_READ | GENERIC_WRITE,
+            FILE_SHARE_WRITE | FILE_SHARE_READ, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+        if (hFile == INVALID_HANDLE_VALUE) {
+            //nvDebug("*** Failed to create dump file.\n");
+#if NV_OS_DURANGO
+            FreeLibrary(hToolHelpModule);
+#endif
+            return false;
+        }
+
+        MINIDUMP_EXCEPTION_INFORMATION * pExInfo = NULL;
+#if NV_OS_WIN32
+        MINIDUMP_CALLBACK_INFORMATION * pCallback = NULL;
+#else
+        void * pCallback = NULL;
+#endif
+
+        MINIDUMP_EXCEPTION_INFORMATION ExInfo;
+        if (pExceptionInfo != NULL) {
+            ExInfo.ThreadId = ::GetCurrentThreadId();
+            ExInfo.ExceptionPointers = pExceptionInfo;
+            ExInfo.ClientPointers = NULL;
+            pExInfo = &ExInfo;
+
+#if NV_OS_WIN32
+            MINIDUMP_CALLBACK_INFORMATION callback;
+            MinidumpCallbackContext context;
+
+            // Find a memory region of 256 bytes centered on the
+            // faulting instruction pointer.
+            const ULONG64 instruction_pointer = 
+            #if defined(_M_IX86)
+                pExceptionInfo->ContextRecord->Eip;
+            #elif defined(_M_AMD64)
+                pExceptionInfo->ContextRecord->Rip;
+            #else
+                #error Unsupported platform
+            #endif
+
+            MEMORY_BASIC_INFORMATION info;
+            
+            if (VirtualQuery(reinterpret_cast<LPCVOID>(instruction_pointer), &info, sizeof(MEMORY_BASIC_INFORMATION)) != 0 && info.State == MEM_COMMIT)
+            {
+                // Attempt to get 128 bytes before and after the instruction
+                // pointer, but settle for whatever's available up to the
+                // boundaries of the memory region.
+                const ULONG64 kIPMemorySize = 256;
+                context.memory_base = max(reinterpret_cast<ULONG64>(info.BaseAddress), instruction_pointer - (kIPMemorySize / 2));
+                ULONG64 end_of_range = min(instruction_pointer + (kIPMemorySize / 2), reinterpret_cast<ULONG64>(info.BaseAddress) + info.RegionSize);
+                context.memory_size = static_cast<ULONG>(end_of_range - context.memory_base);
+                context.finished = false;
+
+                callback.CallbackRoutine = miniDumpWriteDumpCallback;
+                callback.CallbackParam = reinterpret_cast<void*>(&context);
+                pCallback = &callback;
+            }
+#endif
+        }
+
+        MINIDUMP_TYPE miniDumpType = (MINIDUMP_TYPE)(MiniDumpNormal|MiniDumpWithHandleData|MiniDumpWithThreadInfo);
+
+        // write the dump
+        BOOL ok = MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, miniDumpType, pExInfo, NULL, pCallback) != 0;
+        CloseHandle(hFile);
+#if NV_OS_DURANGO
+        FreeLibrary(hToolHelpModule);
+#endif
+
+        if (ok == FALSE) {
+            //nvDebug("*** Failed to save dump file.\n");
+            return false;
+        }
+
+        //nvDebug("\nDump file saved.\n");
+
+        return true;
+    }
+
+#if NV_USE_SEPARATE_THREAD
+
+    static DWORD WINAPI ExceptionHandlerThreadMain(void* lpParameter) {
+        nvDebugCheck(s_handler_start_semaphore != NULL);
+        nvDebugCheck(s_handler_finish_semaphore != NULL);
+
+        while (true) {
+            if (WaitForSingleObject(s_handler_start_semaphore, INFINITE) == WAIT_OBJECT_0) {
+                writeMiniDump(s_exception_info);
+
+                // Allow the requesting thread to proceed.
+                ReleaseSemaphore(s_handler_finish_semaphore, 1, NULL);
+            }
+        }
+
+        // This statement is not reached when the thread is unconditionally
+        // terminated by the ExceptionHandler destructor.
+        return 0;
+    }
+
+#endif // NV_USE_SEPARATE_THREAD
+
+    static bool hasStackTrace() {
+        return true;
+    }
+
+    /*static NV_NOINLINE int backtrace(void * trace[], int maxcount) {
+
+        // In Windows XP and Windows Server 2003, the sum of the FramesToSkip and FramesToCapture parameters must be less than 63.
+        int xp_maxcount = min(63-1, maxcount);
+
+        int count = RtlCaptureStackBackTrace(1, xp_maxcount, trace, NULL);
+        nvDebugCheck(count <= maxcount);
+
+        return count;
+    }*/
+
+#if NV_OS_WIN32
+    static NV_NOINLINE int backtraceWithSymbols(CONTEXT * ctx, void * trace[], int maxcount, int skip = 0) {
+        
+        // Init the stack frame for this function
+        STACKFRAME64 stackFrame = { 0 };
+
+    #if NV_CPU_X86_64
+        DWORD dwMachineType = IMAGE_FILE_MACHINE_AMD64;
+        stackFrame.AddrPC.Offset = ctx->Rip;
+        stackFrame.AddrFrame.Offset = ctx->Rbp;
+        stackFrame.AddrStack.Offset = ctx->Rsp;
+    #elif NV_CPU_X86
+        DWORD dwMachineType = IMAGE_FILE_MACHINE_I386;
+        stackFrame.AddrPC.Offset = ctx->Eip;
+        stackFrame.AddrFrame.Offset = ctx->Ebp;
+        stackFrame.AddrStack.Offset = ctx->Esp;
+    #else
+        #error "Platform not supported!"
+    #endif
+        stackFrame.AddrPC.Mode = AddrModeFlat;
+        stackFrame.AddrFrame.Mode = AddrModeFlat;
+        stackFrame.AddrStack.Mode = AddrModeFlat;
+
+        // Walk up the stack
+        const HANDLE hThread = GetCurrentThread();
+        const HANDLE hProcess = GetCurrentProcess();
+        int i;
+        for (i = 0; i < maxcount; i++)
+        {
+            // walking once first makes us skip self
+            if (!StackWalk64(dwMachineType, hProcess, hThread, &stackFrame, ctx, NULL, &SymFunctionTableAccess64, &SymGetModuleBase64, NULL)) {
+                break;
+            }
+
+            /*if (stackFrame.AddrPC.Offset == stackFrame.AddrReturn.Offset || stackFrame.AddrPC.Offset == 0) {
+                break;
+            }*/
+
+            if (i >= skip) {
+                trace[i - skip] = (PVOID)stackFrame.AddrPC.Offset;
+            }
+        }
+
+        return i - skip;
+    }
+
+#pragma warning(push)
+#pragma warning(disable:4748)
+    static NV_NOINLINE int backtrace(void * trace[], int maxcount) {
+        CONTEXT ctx = { 0 };
+#if NV_CPU_X86 && !NV_CPU_X86_64
+        ctx.ContextFlags = CONTEXT_CONTROL;
+        _asm {
+             call x
+          x: pop eax
+             mov ctx.Eip, eax
+             mov ctx.Ebp, ebp
+             mov ctx.Esp, esp
+        }
+#else
+        RtlCaptureContext(&ctx); // Not implemented correctly in x86.
+#endif
+
+        return backtraceWithSymbols(&ctx, trace, maxcount, 1);
+    }
+#pragma warning(pop)
+
+    static NV_NOINLINE void writeStackTrace(void * trace[], int size, int start, Array<const char *> & lines)
+    {
+        StringBuilder builder(512);
+
+        HANDLE hProcess = GetCurrentProcess();
+        
+        // Resolve PC to function names
+        for (int i = start; i < size; i++)
+        {
+            // Check for end of stack walk
+            DWORD64 ip = (DWORD64)trace[i];
+            if (ip == NULL)
+                break;
+
+            // Get function name
+            #define MAX_STRING_LEN  (512)
+            unsigned char byBuffer[sizeof(IMAGEHLP_SYMBOL64) + MAX_STRING_LEN] = { 0 };
+            IMAGEHLP_SYMBOL64 * pSymbol = (IMAGEHLP_SYMBOL64*)byBuffer;
+            pSymbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
+            pSymbol->MaxNameLength = MAX_STRING_LEN;
+
+            DWORD64 dwDisplacement;
+            
+            if (SymGetSymFromAddr64(hProcess, ip, &dwDisplacement, pSymbol))
+            {
+                pSymbol->Name[MAX_STRING_LEN-1] = 0;
+                
+                /*
+                // Make the symbol readable for humans
+                UnDecorateSymbolName( pSym->Name, lpszNonUnicodeUnDSymbol, BUFFERSIZE, 
+                    UNDNAME_COMPLETE | 
+                    UNDNAME_NO_THISTYPE |
+                    UNDNAME_NO_SPECIAL_SYMS |
+                    UNDNAME_NO_MEMBER_TYPE |
+                    UNDNAME_NO_MS_KEYWORDS |
+                    UNDNAME_NO_ACCESS_SPECIFIERS );
+                */
+                
+                // pSymbol->Name
+                const char * pFunc = pSymbol->Name;
+
+                // Get file/line number
+                IMAGEHLP_LINE64 theLine = { 0 };
+                theLine.SizeOfStruct = sizeof(theLine);
+
+                DWORD dwDisplacement;
+                if (!SymGetLineFromAddr64(hProcess, ip, &dwDisplacement, &theLine))
+                {
+                    // Do not print unknown symbols anymore.
+                    //break;
+                    builder.format("unknown(%08X) : %s\n", (uint32)ip, pFunc);
+                }
+                else
+                {
+                    /*
+                    const char* pFile = strrchr(theLine.FileName, '\\');
+                    if ( pFile == NULL ) pFile = theLine.FileName;
+                    else pFile++;
+                    */
+                    const char * pFile = theLine.FileName;
+                    
+                    int line = theLine.LineNumber;
+                    
+                    builder.format("%s(%d) : %s\n", pFile, line, pFunc);
+                }
+
+                lines.append(builder.release());
+
+                if (pFunc != NULL && strcmp(pFunc, "WinMain") == 0) {
+                    break;
+                }
+            }
+        }
+    }
+#endif
+
+    // Write mini dump and print stack trace.
+    static LONG WINAPI handleException(EXCEPTION_POINTERS * pExceptionInfo)
+    {
+        EnterCriticalSection(&s_handler_critical_section);
+#if NV_USE_SEPARATE_THREAD
+        s_requesting_thread_id = GetCurrentThreadId();
+        s_exception_info = pExceptionInfo;
+
+        // This causes the handler thread to call writeMiniDump.
+        ReleaseSemaphore(s_handler_start_semaphore, 1, NULL);
+
+        // Wait until WriteMinidumpWithException is done and collect its return value.
+        WaitForSingleObject(s_handler_finish_semaphore, INFINITE);
+        //bool status = s_handler_return_value;
+
+        // Clean up.
+        s_requesting_thread_id = 0;
+        s_exception_info = NULL;
+#else
+        // First of all, write mini dump.
+        writeMiniDump(pExceptionInfo);
+#endif
+        LeaveCriticalSection(&s_handler_critical_section);
+
+        nvDebug("\nDump file saved.\n");
+
+        // Try to attach to debugger.
+        if (s_interactive && debug::attachToDebugger()) {
+            nvDebugBreak();
+            return EXCEPTION_CONTINUE_EXECUTION;
+        }
+
+#if NV_OS_WIN32
+        // If that fails, then try to pretty print a stack trace and terminate.
+        void * trace[64];
+        
+        int size = backtraceWithSymbols(pExceptionInfo->ContextRecord, trace, 64);
+
+        // @@ Use win32's CreateFile?
+        FILE * fp = fileOpen("crash.txt", "wb");
+        if (fp != NULL) {
+            Array<const char *> lines;
+            writeStackTrace(trace, size, 0, lines);
+
+            for (uint i = 0; i < lines.count(); i++) {
+                fputs(lines[i], fp);
+                delete lines[i];
+            }
+
+            // @@ Add more info to crash.txt?
+
+            fclose(fp);
+        }
+#endif
+
+        // This should terminate the process and set the error exit code.
+        TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 2);
+
+        return EXCEPTION_EXECUTE_HANDLER;   // Terminate app. In case terminate process did not succeed.
+    }
+
+    static void handlePureVirtualCall() {
+        nvDebugBreak();
+        TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8);
+    }
+
+    static void handleInvalidParameter(const wchar_t * wexpresion, const wchar_t * wfunction, const wchar_t * wfile, unsigned int line, uintptr_t reserved) {
+
+        size_t convertedCharCount = 0;
+        
+        StringBuilder expresion;
+        if (wexpresion != NULL) {
+            uint size = U32(wcslen(wexpresion) + 1);
+            expresion.reserve(size);
+            wcstombs_s(&convertedCharCount, expresion.str(), size, wexpresion, _TRUNCATE);
+        }
+
+        StringBuilder file;
+        if (wfile != NULL) {
+            uint size = U32(wcslen(wfile) + 1);
+            file.reserve(size);
+            wcstombs_s(&convertedCharCount, file.str(), size, wfile, _TRUNCATE);
+        }
+
+        StringBuilder function;
+        if (wfunction != NULL) {
+            uint size = U32(wcslen(wfunction) + 1);
+            function.reserve(size);
+            wcstombs_s(&convertedCharCount, function.str(), size, wfunction, _TRUNCATE);
+        }
+        
+        int result = nvAbort(expresion.str(), file.str(), line, function.str());
+        if (result == NV_ABORT_DEBUG) {
+            nvDebugBreak();
+        } 
+    }
+
+#elif !NV_OS_WIN32 && defined(NV_HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_DARWIN
+
+#if defined(NV_HAVE_EXECINFO_H)
+
+    static bool hasStackTrace() {
+        return true;
+    }
+
+
+    static void writeStackTrace(void * trace[], int size, int start, Array<const char *> & lines) {
+        StringBuilder builder(512);
+        char ** string_array = backtrace_symbols(trace, size);
+
+        for(int i = start; i < size-1; i++ ) {
+            // IC: Just in case.
+            if (string_array[i] == NULL || string_array[i][0] == '\0') break;
+
+#       if NV_CC_GNUC // defined(NV_HAVE_CXXABI_H)
+            // @@ Write a better parser for the possible formats.
+            char * begin = strchr(string_array[i], '(');
+            char * end = strrchr(string_array[i], '+');
+            char * module = string_array[i];
+
+            if (begin == 0 && end != 0) {
+                *(end - 1) = '\0';
+                begin = strrchr(string_array[i], ' ');
+                module = NULL; // Ignore module.
+            }
+
+            if (begin != 0 && begin < end) {
+                int stat;
+                *end = '\0';
+                *begin = '\0';
+                char * name = abi::__cxa_demangle(begin+1, 0, 0, &stat);
+                if (module == NULL) {
+                    if (name == NULL || stat != 0) {
+                        builder.format("  In: '%s'\n", begin+1);
+                    }
+                    else {
+                        builder.format("  In: '%s'\n", name);
+                    }
+                }
+                else {
+                    if (name == NULL || stat != 0) {
+                        builder.format("  In: [%s] '%s'\n", module, begin+1);
+                    }
+                    else {
+                        builder.format("  In: [%s] '%s'\n", module, name);
+                    }
+                }
+                free(name);
+            }
+            else {
+                builder.format("  In: '%s'\n", string_array[i]);
+            }
+#       else
+            builder.format("  In: '%s'\n", string_array[i]);
+#       endif
+            lines.append(builder.release());
+        }
+
+        free(string_array);
+    }
+
+    static void printStackTrace(void * trace[], int size, int start=0) {
+        nvDebug( "\nDumping stacktrace:\n" );
+
+        Array<const char *> lines;
+        writeStackTrace(trace, size, 1, lines);
+
+        for (uint i = 0; i < lines.count(); i++) {
+            nvDebug("%s", lines[i]);
+            delete lines[i];
+        }
+
+        nvDebug("\n");
+    }
+
+#endif // defined(NV_HAVE_EXECINFO_H)
+
+    static void * callerAddress(void * secret)
+    {
+#if NV_OS_DARWIN
+#  if defined(_STRUCT_MCONTEXT)
+#    if NV_CPU_PPC
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *) ucp->uc_mcontext->__ss.__srr0;
+#    elif NV_CPU_X86_64
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *) ucp->uc_mcontext->__ss.__rip;
+#    elif NV_CPU_X86
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *) ucp->uc_mcontext->__ss.__eip;
+#    elif NV_CPU_ARM
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *) ucp->uc_mcontext->__ss.__pc;
+#    else
+#      error "Unknown CPU"
+#    endif
+#  else
+#    if NV_CPU_PPC
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *) ucp->uc_mcontext->ss.srr0;
+#    elif NV_CPU_X86
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *) ucp->uc_mcontext->ss.eip;
+#    else
+#      error "Unknown CPU"
+#    endif
+#  endif
+#elif NV_OS_FREEBSD
+#  if NV_CPU_X86_64
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *)ucp->uc_mcontext.mc_rip;
+#  elif NV_CPU_X86
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *)ucp->uc_mcontext.mc_eip;
+#    else
+#      error "Unknown CPU"
+#    endif
+#elif NV_OS_OPENBSD
+#  if NV_CPU_X86_64
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *)ucp->sc_rip;
+#  elif NV_CPU_X86
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *)ucp->sc_eip;
+#  else
+#       error "Unknown CPU"
+#  endif        
+#else
+#  if NV_CPU_X86_64
+        // #define REG_RIP REG_INDEX(rip) // seems to be 16
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *)ucp->uc_mcontext.gregs[REG_RIP];
+#  elif NV_CPU_X86
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
+#  elif NV_CPU_PPC
+        ucontext_t * ucp = (ucontext_t *)secret;
+        return (void *) ucp->uc_mcontext.regs->nip;
+#    else
+#      error "Unknown CPU"
+#    endif
+#endif
+
+        // How to obtain the instruction pointers in different platforms, from mlton's source code.
+        // http://mlton.org/
+        // OpenBSD && NetBSD
+        // ucp->sc_eip
+        // FreeBSD:
+        // ucp->uc_mcontext.mc_eip
+        // HPUX:
+        // ucp->uc_link
+        // Solaris:
+        // ucp->uc_mcontext.gregs[REG_PC]
+        // Linux hppa:
+        // uc->uc_mcontext.sc_iaoq[0] & ~0x3UL
+        // Linux sparc:
+        // ((struct sigcontext*) secret)->sigc_regs.tpc
+        // Linux sparc64:
+        // ((struct sigcontext*) secret)->si_regs.pc
+
+        // potentially correct for other archs:
+        // Linux alpha: ucp->m_context.sc_pc
+        // Linux arm: ucp->m_context.ctx.arm_pc
+        // Linux ia64: ucp->m_context.sc_ip & ~0x3UL
+        // Linux mips: ucp->m_context.sc_pc
+        // Linux s390: ucp->m_context.sregs->regs.psw.addr
+    }
+
+    static void nvSigHandler(int sig, siginfo_t *info, void *secret)
+    {
+        void * pnt = callerAddress(secret);
+
+        // Do something useful with siginfo_t
+        if (sig == SIGSEGV) {
+            if (pnt != NULL) nvDebug("Got signal %d, faulty address is %p, from %p\n", sig, info->si_addr, pnt);
+            else nvDebug("Got signal %d, faulty address is %p\n", sig, info->si_addr);
+        }
+        else if(sig == SIGTRAP) {
+            nvDebug("Breakpoint hit.\n");
+        }
+        else {
+            nvDebug("Got signal %d\n", sig);
+        }
+
+#if defined(NV_HAVE_EXECINFO_H)
+        if (hasStackTrace()) // in case of weak linking
+        {
+            void * trace[64];
+            int size = backtrace(trace, 64);
+
+            if (pnt != NULL) {
+                // Overwrite sigaction with caller's address.
+                trace[1] = pnt;
+            }
+
+            printStackTrace(trace, size, 1);
+        }
+#endif // defined(NV_HAVE_EXECINFO_H)
+
+        exit(0);
+    }
+
+#endif // defined(NV_HAVE_SIGNAL_H)
+
+
+
+#if NV_OS_WIN32 //&& NV_CC_MSVC
+
+    /** Win32 assert handler. */
+    struct Win32AssertHandler : public AssertHandler 
+    {
+        // Flush the message queue. This is necessary for the message box to show up.
+        static void flushMessageQueue()
+        {
+            MSG msg;
+            while( PeekMessage( &msg, NULL, 0, 0, PM_REMOVE ) ) {
+                //if( msg.message == WM_QUIT ) break;
+                TranslateMessage( &msg );
+                DispatchMessage( &msg );
+            }
+        }
+
+        // Assert handler method.
+        virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
+        {
+            int ret = NV_ABORT_EXIT;
+
+            StringBuilder error_string;
+            error_string.format("*** Assertion failed: %s\n    On file: %s\n    On line: %d\n", exp, file, line );
+            if (func != NULL) {
+                error_string.appendFormat("    On function: %s\n", func);
+            }
+            if (msg != NULL) {
+                error_string.append("    Message: ");
+                va_list tmp;
+                va_copy(tmp, arg);
+                error_string.appendFormatList(msg, tmp);
+                va_end(tmp);
+                error_string.append("\n");
+            }
+            nvDebug( error_string.str() );
+
+            // Print stack trace:
+            debug::dumpInfo();
+
+            if (debug::isDebuggerPresent()) {
+                return NV_ABORT_DEBUG;
+            }
+
+            if (s_interactive) {
+                flushMessageQueue();
+                int action = MessageBoxA(NULL, error_string.str(), "Assertion failed", MB_ABORTRETRYIGNORE | MB_ICONERROR | MB_TOPMOST);
+                switch( action ) {
+                case IDRETRY:
+                    ret = NV_ABORT_DEBUG;
+                    break;
+                case IDIGNORE:
+                    ret = NV_ABORT_IGNORE;
+                    break;
+                case IDABORT:
+                default:
+                    ret = NV_ABORT_EXIT;
+                    break;
+                }
+                /*if( _CrtDbgReport( _CRT_ASSERT, file, line, module, exp ) == 1 ) {
+                    return NV_ABORT_DEBUG;
+                }*/
+            }
+
+            if (ret == NV_ABORT_EXIT) {
+                // Exit cleanly.
+                exit(EXIT_FAILURE + 1);
+            }
+
+            return ret;
+        }
+    };
+#elif NV_OS_XBOX
+
+    /** Xbox360 assert handler. */
+    struct Xbox360AssertHandler : public AssertHandler 
+    {
+        // Assert handler method.
+        virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
+        {
+            int ret = NV_ABORT_EXIT;
+
+            StringBuilder error_string;
+            if( func != NULL ) {
+                error_string.format( "*** Assertion failed: %s\n    On file: %s\n    On function: %s\n    On line: %d\n ", exp, file, func, line );
+                nvDebug( error_string.str() );
+            }
+            else {
+                error_string.format( "*** Assertion failed: %s\n    On file: %s\n    On line: %d\n ", exp, file, line );
+                nvDebug( error_string.str() );
+            }
+
+            if (debug::isDebuggerPresent()) {
+                return NV_ABORT_DEBUG;
+            }
+
+            if( ret == NV_ABORT_EXIT ) {
+                 // Exit cleanly.
+                exit(EXIT_FAILURE + 1);
+            }
+
+            return ret;
+        }
+    };
+#elif NV_OS_ORBIS || NV_OS_DURANGO
+
+    /** Console assert handler. */
+    struct ConsoleAssertHandler : public AssertHandler
+    {
+        // Assert handler method.
+        virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
+        {
+            if( func != NULL ) {
+                nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On function: %s\n    On line: %d\n ", exp, file, func, line );
+            }
+            else {
+                nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On line: %d\n ", exp, file, line );
+            }
+
+            //SBtodoORBIS print stack trace
+            /*if (hasStackTrace())
+            {
+                void * trace[64];
+                int size = backtrace(trace, 64);
+                printStackTrace(trace, size, 2);
+            }*/
+            
+            if (debug::isDebuggerPresent())
+                return NV_ABORT_DEBUG;
+
+            return NV_ABORT_IGNORE;
+        }
+    };
+
+#else
+
+    /** Unix assert handler. */
+    struct UnixAssertHandler : public AssertHandler
+    {
+        // Assert handler method.
+        virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg)
+        {
+            int ret = NV_ABORT_EXIT;            
+            
+            if( func != NULL ) {
+                nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On function: %s\n    On line: %d\n ", exp, file, func, line );
+            }
+            else {
+                nvDebug( "*** Assertion failed: %s\n    On file: %s\n    On line: %d\n ", exp, file, line );
+            }
+
+#if _DEBUG
+            if (debug::isDebuggerPresent()) {
+                return NV_ABORT_DEBUG;
+            }
+#endif
+
+#if defined(NV_HAVE_EXECINFO_H)
+            if (hasStackTrace())
+            {
+                void * trace[64];
+                int size = backtrace(trace, 64);
+                printStackTrace(trace, size, 2);
+            }
+#endif
+
+            if( ret == NV_ABORT_EXIT ) {
+                // Exit cleanly.
+                exit(EXIT_FAILURE + 1);
+            }
+            
+            return ret;
+        }
+    };
+
+#endif
+
+} // namespace
+
+
+/// Handle assertion through the assert handler.
+int nvAbort(const char * exp, const char * file, int line, const char * func/*=NULL*/, const char * msg/*= NULL*/, ...)
+{
+#if NV_OS_WIN32 //&& NV_CC_MSVC
+    static Win32AssertHandler s_default_assert_handler;
+#elif NV_OS_XBOX
+    static Xbox360AssertHandler s_default_assert_handler;
+#elif NV_OS_ORBIS || NV_OS_DURANGO
+    static ConsoleAssertHandler s_default_assert_handler;
+#else
+    static UnixAssertHandler s_default_assert_handler;
+#endif
+
+    va_list arg;
+    va_start(arg,msg);
+
+    AssertHandler * handler = s_assert_handler != NULL ? s_assert_handler : &s_default_assert_handler;
+    int result = handler->assertion(exp, file, line, func, msg, arg);
+
+    va_end(arg);
+
+    return result;
+}
+
+// Abnormal termination. Create mini dump and output call stack.
+void debug::terminate(int code)
+{
+#if NV_OS_WIN32 || NV_OS_DURANGO
+    EnterCriticalSection(&s_handler_critical_section);
+
+    writeMiniDump(NULL);
+
+#if NV_OS_WIN32
+    const int max_stack_size = 64;
+    void * trace[max_stack_size];
+    int size = backtrace(trace, max_stack_size);
+
+    // @@ Use win32's CreateFile?
+    FILE * fp = fileOpen("crash.txt", "wb");
+    if (fp != NULL) {
+        Array<const char *> lines;
+        writeStackTrace(trace, size, 0, lines);
+
+        for (uint i = 0; i < lines.count(); i++) {
+            fputs(lines[i], fp);
+            delete lines[i];
+        }
+
+        // @@ Add more info to crash.txt?
+
+        fclose(fp);
+    }
+#endif
+
+    LeaveCriticalSection(&s_handler_critical_section);
+#endif
+
+    exit(code);
+}
+
+
+/// Shows a message through the message handler.
+void NV_CDECL nvDebugPrint(const char *msg, ...)
+{
+    va_list arg;
+    va_start(arg,msg);
+    if (s_message_handler != NULL) {
+        s_message_handler->log( msg, arg );
+    }
+    else {
+        vprintf(msg, arg);
+    }
+    va_end(arg);
+}
+
+
+/// Dump debug info.
+void debug::dumpInfo()
+{
+#if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(NV_HAVE_SIGNAL_H) && defined(NV_HAVE_EXECINFO_H))
+    if (hasStackTrace())
+    {
+        void * trace[64];
+        int size = backtrace(trace, 64);
+
+        nvDebug( "\nDumping stacktrace:\n" );
+
+        Array<const char *> lines;
+        writeStackTrace(trace, size, 1, lines);
+
+        for (uint i = 0; i < lines.count(); i++) {
+            nvDebug("%s", lines[i]);
+            delete lines[i];
+        }
+    }
+#endif
+}
+
+/// Dump callstack using the specified handler.
+void debug::dumpCallstack(MessageHandler *messageHandler, int callstackLevelsToSkip /*= 0*/)
+{
+#if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(NV_HAVE_SIGNAL_H) && defined(NV_HAVE_EXECINFO_H))
+    if (hasStackTrace())
+    {
+        void * trace[64];
+        int size = backtrace(trace, 64);
+
+        Array<const char *> lines;
+        writeStackTrace(trace, size, callstackLevelsToSkip + 1, lines);     // + 1 to skip the call to dumpCallstack
+
+        for (uint i = 0; i < lines.count(); i++) {
+            messageHandler->log(lines[i], NULL);
+            delete lines[i];
+        }
+    }
+#endif
+}
+
+
+/// Set the debug message handler.
+void debug::setMessageHandler(MessageHandler * message_handler)
+{
+    s_message_handler = message_handler;
+}
+
+/// Reset the debug message handler.
+void debug::resetMessageHandler()
+{
+    s_message_handler = NULL;
+}
+
+/// Set the assert handler.
+void debug::setAssertHandler(AssertHandler * assert_handler)
+{
+    s_assert_handler = assert_handler;
+}
+
+/// Reset the assert handler.
+void debug::resetAssertHandler()
+{
+    s_assert_handler = NULL;
+}
+
+#if NV_OS_WIN32 || NV_OS_DURANGO
+#if NV_USE_SEPARATE_THREAD
+
+static void initHandlerThread()
+{
+    static const int kExceptionHandlerThreadInitialStackSize = 64 * 1024;
+
+    // Set synchronization primitives and the handler thread.  Each
+    // ExceptionHandler object gets its own handler thread because that's the
+    // only way to reliably guarantee sufficient stack space in an exception,
+    // and it allows an easy way to get a snapshot of the requesting thread's
+    // context outside of an exception.
+    InitializeCriticalSection(&s_handler_critical_section);
+    
+    s_handler_start_semaphore = CreateSemaphoreExW(NULL, 0, 1, NULL, 0,
+        SEMAPHORE_MODIFY_STATE | DELETE | SYNCHRONIZE);
+    nvDebugCheck(s_handler_start_semaphore != NULL);
+
+    s_handler_finish_semaphore = CreateSemaphoreExW(NULL, 0, 1, NULL, 0,
+        SEMAPHORE_MODIFY_STATE | DELETE | SYNCHRONIZE);
+    nvDebugCheck(s_handler_finish_semaphore != NULL);
+
+    // Don't attempt to create the thread if we could not create the semaphores.
+    if (s_handler_finish_semaphore != NULL && s_handler_start_semaphore != NULL) {
+        DWORD thread_id;
+        s_handler_thread = CreateThread(NULL,         // lpThreadAttributes
+                                        kExceptionHandlerThreadInitialStackSize,
+                                        ExceptionHandlerThreadMain,
+                                        NULL,         // lpParameter
+                                        0,            // dwCreationFlags
+                                        &thread_id);
+        nvDebugCheck(s_handler_thread != NULL);
+    }
+
+    /* @@ We should avoid loading modules in the exception handler!
+    dbghelp_module_ = LoadLibrary(L"dbghelp.dll");
+    if (dbghelp_module_) {
+        minidump_write_dump_ = reinterpret_cast<MiniDumpWriteDump_type>(GetProcAddress(dbghelp_module_, "MiniDumpWriteDump"));
+    }
+    */
+}
+
+static void shutHandlerThread() {
+    // @@ Free stuff. Terminate thread.
+}
+
+#endif // NV_USE_SEPARATE_THREAD
+#endif // NV_OS_WIN32
+
+
+// Enable signal handler.
+void debug::enableSigHandler(bool interactive)
+{
+    if (s_sig_handler_enabled) return;
+
+    s_sig_handler_enabled = true;
+    s_interactive = interactive;
+
+#if (NV_OS_WIN32 && NV_CC_MSVC) || NV_OS_DURANGO
+    if (interactive) {
+#if NV_OS_WIN32
+        // Do not display message boxes on error.
+        // http://msdn.microsoft.com/en-us/library/windows/desktop/ms680621(v=vs.85).aspx
+        SetErrorMode(SEM_FAILCRITICALERRORS|SEM_NOGPFAULTERRORBOX|SEM_NOOPENFILEERRORBOX);
+#endif
+
+        // CRT reports errors to debug output only.
+        // http://msdn.microsoft.com/en-us/library/1y71x448(v=vs.80).aspx
+        _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_DEBUG);
+        _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_DEBUG);
+        _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_DEBUG);
+    }
+
+
+#if NV_USE_SEPARATE_THREAD
+    initHandlerThread();
+#else
+    InitializeCriticalSection(&s_handler_critical_section);
+#endif
+
+    s_old_exception_filter = ::SetUnhandledExceptionFilter( handleException );
+
+#if _MSC_VER >= 1400  // MSVC 2005/8
+    _set_invalid_parameter_handler(handleInvalidParameter);
+#endif  // _MSC_VER >= 1400
+
+    _set_purecall_handler(handlePureVirtualCall);
+
+#if NV_OS_WIN32
+    // SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces
+    SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_FAIL_CRITICAL_ERRORS|SYMOPT_LOAD_LINES|SYMOPT_UNDNAME);
+
+    if (!SymInitialize(GetCurrentProcess(), NULL, TRUE)) {
+        DWORD error = GetLastError();
+        nvDebug("SymInitialize returned error : %d\n", error);
+    }
+#endif
+
+#elif !NV_OS_WIN32 && defined(NV_HAVE_SIGNAL_H)
+
+    // Install our signal handler
+    struct sigaction sa;
+    sa.sa_sigaction = nvSigHandler;
+    sigemptyset (&sa.sa_mask);
+    sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
+
+    sigaction(SIGSEGV, &sa, &s_old_sigsegv);
+    sigaction(SIGTRAP, &sa, &s_old_sigtrap);
+    sigaction(SIGFPE, &sa, &s_old_sigfpe);
+    sigaction(SIGBUS, &sa, &s_old_sigbus);
+
+#endif
+}
+
+/// Disable signal handler.
+void debug::disableSigHandler()
+{
+    nvCheck(s_sig_handler_enabled == true);
+    s_sig_handler_enabled = false;
+
+#if (NV_OS_WIN32 && NV_CC_MSVC) || NV_OS_DURANGO
+
+    ::SetUnhandledExceptionFilter( s_old_exception_filter );
+    s_old_exception_filter = NULL;
+
+#if NV_OS_WIN32
+    SymCleanup(GetCurrentProcess());
+#endif
+
+#elif !NV_OS_WIN32 && defined(NV_HAVE_SIGNAL_H)
+
+    sigaction(SIGSEGV, &s_old_sigsegv, NULL);
+    sigaction(SIGTRAP, &s_old_sigtrap, NULL);
+    sigaction(SIGFPE, &s_old_sigfpe, NULL);
+    sigaction(SIGBUS, &s_old_sigbus, NULL);
+
+#endif
+}
+
+
+bool debug::isDebuggerPresent()
+{
+#if NV_OS_WIN32
+    HINSTANCE kernel32 = GetModuleHandleA("kernel32.dll");
+    if (kernel32) {
+        FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent");
+        if (IsDebuggerPresent != NULL && IsDebuggerPresent()) {
+            return true;
+        }
+    }
+    return false;
+#elif NV_OS_XBOX
+#ifdef _DEBUG
+    return DmIsDebuggerPresent() == TRUE;
+#else
+    return false;
+#endif
+#elif NV_OS_ORBIS
+  #if PS4_FINAL_REQUIREMENTS
+    return false; 
+  #else
+    return sceDbgIsDebuggerAttached() == 1;
+  #endif
+#elif NV_OS_DURANGO
+  #if XB1_FINAL_REQUIREMENTS
+    return false;
+  #else
+    return IsDebuggerPresent() == TRUE;
+  #endif
+#elif NV_OS_DARWIN
+    int mib[4];
+    struct kinfo_proc info;
+    size_t size;
+    mib[0] = CTL_KERN;
+    mib[1] = KERN_PROC;
+    mib[2] = KERN_PROC_PID;
+    mib[3] = getpid();
+    size = sizeof(info);
+    info.kp_proc.p_flag = 0;
+    sysctl(mib,4,&info,&size,NULL,0);
+    return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED);
+#else
+    // if ppid != sid, some process spawned our app, probably a debugger. 
+    return getsid(getpid()) != getppid();
+#endif
+}
+
+bool debug::attachToDebugger()
+{
+#if NV_OS_WIN32
+    if (isDebuggerPresent() == FALSE) {
+        Path process(1024);
+        process.copy("\"");
+        GetSystemDirectoryA(process.str() + 1, 1024 - 1);
+
+        process.appendSeparator();
+
+        process.appendFormat("VSJitDebugger.exe\" -p %lu", ::GetCurrentProcessId());
+
+        STARTUPINFOA sSi;
+        memset(&sSi, 0, sizeof(sSi));
+
+        PROCESS_INFORMATION sPi;
+        memset(&sPi, 0, sizeof(sPi));
+        
+        BOOL b = CreateProcessA(NULL, process.str(), NULL, NULL, FALSE, 0, NULL, NULL, &sSi, &sPi);
+        if (b != FALSE) {
+            ::WaitForSingleObject(sPi.hProcess, INFINITE);
+            
+            DWORD dwExitCode;
+            ::GetExitCodeProcess(sPi.hProcess, &dwExitCode);
+            if (dwExitCode != 0) //if exit code is zero, a debugger was selected
+                b = FALSE;
+        }
+
+        if (sPi.hThread != NULL) ::CloseHandle(sPi.hThread);
+        if (sPi.hProcess != NULL) ::CloseHandle(sPi.hProcess);
+
+        if (b == FALSE)
+            return false;
+
+        for (int i = 0; i < 5*60; i++) {
+            if (isDebuggerPresent())
+                break;
+            ::Sleep(200);
+        }
+    }
+#endif // NV_OS_WIN32
+
+    return true;
+}
diff --git a/thirdparty/thekla_atlas/nvcore/Debug.h b/thirdparty/thekla_atlas/nvcore/Debug.h
new file mode 100644
index 0000000000..f37a05c453
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/Debug.h
@@ -0,0 +1,246 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_DEBUG_H
+#define NV_CORE_DEBUG_H
+
+#include "nvcore.h"
+
+#include <stdarg.h> // va_list
+
+#if NV_OS_IOS //ACS: maybe we want this for OSX too?
+#   ifdef __APPLE__
+#       include <TargetConditionals.h>
+#       include <signal.h>
+#   endif
+#endif
+
+// Make sure we are using our assert.
+#undef assert
+
+#define NV_ABORT_DEBUG      1
+#define NV_ABORT_IGNORE     2
+#define NV_ABORT_EXIT       3
+
+#define nvNoAssert(exp) \
+    NV_MULTI_LINE_MACRO_BEGIN \
+    (void)sizeof(exp); \
+    NV_MULTI_LINE_MACRO_END
+
+#if NV_NO_ASSERT
+
+#   define nvAssert(exp) nvNoAssert(exp)
+#   define nvCheck(exp) nvNoAssert(exp)
+#   define nvDebugAssert(exp) nvNoAssert(exp)
+#   define nvDebugCheck(exp) nvNoAssert(exp)
+#   define nvDebugBreak() nvNoAssert(0)
+
+#else // NV_NO_ASSERT
+
+#   if NV_CC_MSVC
+        // @@ Does this work in msvc-6 and earlier?
+#       define nvDebugBreak()       __debugbreak()
+//#       define nvDebugBreak()        __asm { int 3 }
+#   elif NV_OS_ORBIS
+#       define nvDebugBreak()       __debugbreak()
+#   elif NV_OS_IOS && TARGET_OS_IPHONE
+#       define nvDebugBreak()       raise(SIGINT)
+#   elif NV_CC_CLANG
+#       define nvDebugBreak()       __builtin_debugtrap()
+#   elif NV_CC_GNUC
+//#       define nvDebugBreak()       __builtin_debugtrap()     // Does GCC have debugtrap?
+#       define nvDebugBreak()		__builtin_trap()
+/*
+#   elif NV_CC_GNUC && NV_CPU_PPC && NV_OS_DARWIN
+// @@ Use __builtin_trap() on GCC
+#       define nvDebugBreak()       __asm__ volatile ("trap")
+#   elif NV_CC_GNUC && NV_CPU_X86 && NV_OS_DARWIN
+#       define nvDebugBreak()       __asm__ volatile ("int3")
+#   elif NV_CC_GNUC && NV_CPU_X86 
+#       define nvDebugBreak()       __asm__ ( "int %0" : :"I"(3) )
+#   elif NV_OS_ORBIS
+#       define nvDebugBreak()       __asm volatile ("int $0x41")
+#   else
+#       include <signal.h>
+#       define nvDebugBreak()       raise(SIGTRAP); 
+// define nvDebugBreak()        *((int *)(0)) = 0
+*/
+#   endif
+
+#  if NV_CC_MSVC
+#   define nvExpect(expr) (expr)
+#else
+#   define nvExpect(expr) __builtin_expect((expr) != 0, true)
+#endif
+
+#if NV_CC_CLANG 
+#   if __has_feature(attribute_analyzer_noreturn)
+#       define NV_ANALYZER_NORETURN __attribute__((analyzer_noreturn))
+#   else
+#       define NV_ANALYZER_NORETURN
+#   endif
+#else
+#   define NV_ANALYZER_NORETURN
+#endif
+
+#define nvDebugBreakOnce() \
+    NV_MULTI_LINE_MACRO_BEGIN \
+    static bool firstTime = true; \
+    if (firstTime) { firstTime = false; nvDebugBreak(); } \
+    NV_MULTI_LINE_MACRO_END
+
+#define nvAssertMacro(exp) \
+    NV_MULTI_LINE_MACRO_BEGIN \
+    if (!nvExpect(exp)) { \
+        if (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) { \
+            nvDebugBreak(); \
+        } \
+    } \
+    NV_MULTI_LINE_MACRO_END
+
+// GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care
+#define nvAssertMacroWithIgnoreAll(exp,...) \
+    NV_MULTI_LINE_MACRO_BEGIN \
+        static bool ignoreAll = false; \
+        if (!ignoreAll && !nvExpect(exp)) { \
+            int _result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \
+            if (_result == NV_ABORT_DEBUG) { \
+                nvDebugBreak(); \
+            } else if (_result == NV_ABORT_IGNORE) { \
+                ignoreAll = true; \
+            } \
+        } \
+    NV_MULTI_LINE_MACRO_END
+
+// Interesting assert macro from Insomniac:
+// http://www.gdcvault.com/play/1015319/Developing-Imperfect-Software-How-to
+// Used as follows:
+// if (nvCheck(i < count)) {
+//     normal path
+// } else {
+//     fixup code.
+// }
+// This style of macro could be combined with __builtin_expect to let the compiler know failure is unlikely.
+#define nvCheckMacro(exp) \
+    (\
+        (exp) ? true : ( \
+            (nvAbort(#exp, __FILE__, __LINE__, __FUNC__) == NV_ABORT_DEBUG) ? (nvDebugBreak(), true) : ( false ) \
+        ) \
+    )
+
+
+#define nvAssert(exp)    nvAssertMacro(exp)
+#define nvCheck(exp)     nvAssertMacro(exp)
+
+#if defined(_DEBUG)
+#   define nvDebugAssert(exp)   nvAssertMacro(exp)
+#   define nvDebugCheck(exp)    nvAssertMacro(exp)
+#else // _DEBUG
+#   define nvDebugAssert(exp)   nvNoAssert(exp)
+#   define nvDebugCheck(exp)    nvNoAssert(exp)
+#endif // _DEBUG
+
+#endif // NV_NO_ASSERT
+
+// Use nvAssume for very simple expresions only: nvAssume(0), nvAssume(value == true), etc.
+/*#if !defined(_DEBUG)
+#   if NV_CC_MSVC
+#       define nvAssume(exp)    __assume(exp)
+#   else
+#       define nvAssume(exp)    nvCheck(exp)
+#   endif
+#else
+#   define nvAssume(exp)    nvCheck(exp)
+#endif*/
+
+#if defined(_DEBUG)
+#  if NV_CC_MSVC
+#   define nvUnreachable() nvAssert(0 && "unreachable"); __assume(0)
+#  else
+#   define nvUnreachable() nvAssert(0 && "unreachable"); __builtin_unreachable()
+#  endif
+#else
+#  if NV_CC_MSVC
+#   define nvUnreachable() __assume(0)
+#  else
+#   define nvUnreachable() __builtin_unreachable()
+#  endif
+#endif
+
+#define nvError(x)      nvAbort(x, __FILE__, __LINE__, __FUNC__)
+#define nvWarning(x)    nvDebugPrint("*** Warning %s/%d: %s\n", __FILE__, __LINE__, (x))
+
+#ifndef NV_DEBUG_PRINT
+#define NV_DEBUG_PRINT 1 //defined(_DEBUG)
+#endif
+
+#if NV_DEBUG_PRINT
+#define nvDebug(...)    nvDebugPrint(__VA_ARGS__)
+#else
+#if NV_CC_MSVC
+#define nvDebug(...)    __noop(__VA_ARGS__)
+#else
+#define nvDebug(...)    ((void)0) // Non-msvc platforms do not evaluate arguments?
+#endif
+#endif
+
+
+NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6))) NV_ANALYZER_NORETURN;
+NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
+
+namespace nv
+{
+    inline bool isValidPtr(const void * ptr) {
+    #if NV_OS_DARWIN
+        return true;    // IC: Not sure what ranges are OK on OSX.
+    #endif
+        
+    #if NV_CPU_X86_64
+        if (ptr == NULL) return true;
+        if (reinterpret_cast<uint64>(ptr) < 0x10000ULL) return false;
+        if (reinterpret_cast<uint64>(ptr) >= 0x000007FFFFFEFFFFULL) return false;
+    #else
+	    if (reinterpret_cast<uint32>(ptr) == 0xcccccccc) return false;
+	    if (reinterpret_cast<uint32>(ptr) == 0xcdcdcdcd) return false;
+	    if (reinterpret_cast<uint32>(ptr) == 0xdddddddd) return false;
+	    if (reinterpret_cast<uint32>(ptr) == 0xffffffff) return false;
+    #endif
+        return true;
+    }
+
+    // Message handler interface.
+    struct MessageHandler {
+        virtual void log(const char * str, va_list arg) = 0;
+        virtual ~MessageHandler() {}
+    };
+
+    // Assert handler interface.
+    struct AssertHandler {
+        virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0;
+        virtual ~AssertHandler() {}
+    };
+
+
+    namespace debug
+    {
+        NVCORE_API void dumpInfo();
+        NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 );
+
+        NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
+        NVCORE_API void resetMessageHandler();
+
+        NVCORE_API void setAssertHandler( AssertHandler * assertHanlder );
+        NVCORE_API void resetAssertHandler();
+
+        NVCORE_API void enableSigHandler(bool interactive);
+        NVCORE_API void disableSigHandler();
+
+        NVCORE_API bool isDebuggerPresent();
+        NVCORE_API bool attachToDebugger();
+
+        NVCORE_API void terminate(int code);
+    }
+
+} // nv namespace
+
+#endif // NV_CORE_DEBUG_H
diff --git a/thirdparty/thekla_atlas/nvcore/DefsGnucDarwin.h b/thirdparty/thekla_atlas/nvcore/DefsGnucDarwin.h
new file mode 100644
index 0000000000..afb21c3d25
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/DefsGnucDarwin.h
@@ -0,0 +1,57 @@
+#ifndef NV_CORE_H
+#error "Do not include this file directly."
+#endif
+
+#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
+#include <stddef.h> // operator new, size_t, NULL
+
+// Function linkage
+#define DLL_IMPORT
+#if __GNUC__ >= 4
+#	define DLL_EXPORT __attribute__((visibility("default")))
+#	define DLL_EXPORT_CLASS DLL_EXPORT
+#else
+#	define DLL_EXPORT
+#	define DLL_EXPORT_CLASS
+#endif
+
+// Function calling modes
+#if NV_CPU_X86
+#	define NV_CDECL 	__attribute__((cdecl))
+#	define NV_STDCALL	__attribute__((stdcall))
+#else
+#	define NV_CDECL 
+#	define NV_STDCALL
+#endif
+
+#define NV_FASTCALL		__attribute__((fastcall))
+#define NV_FORCEINLINE	__attribute__((always_inline)) inline
+#define NV_DEPRECATED   __attribute__((deprecated))
+#if NV_OS_IOS
+#define NV_THREAD_LOCAL // @@ IC: Looks like iOS does not have support for TLS declarations.
+#else
+#define NV_THREAD_LOCAL __thread
+#endif
+
+#if __GNUC__ > 2
+#define NV_PURE     __attribute__((pure))
+#define NV_CONST    __attribute__((const))
+#else
+#define NV_PURE
+#define NV_CONST
+#endif
+
+#define NV_NOINLINE __attribute__((noinline))
+
+// Define __FUNC__ properly.
+#if __STDC_VERSION__ < 199901L
+#	if __GNUC__ >= 2
+#		define __FUNC__ __PRETTY_FUNCTION__	// __FUNCTION__
+#	else
+#		define __FUNC__ "<unknown>"
+#	endif
+#else
+#	define __FUNC__ __PRETTY_FUNCTION__
+#endif
+
+#define restrict    __restrict__
diff --git a/thirdparty/thekla_atlas/nvcore/DefsGnucLinux.h b/thirdparty/thekla_atlas/nvcore/DefsGnucLinux.h
new file mode 100644
index 0000000000..2126d866f5
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/DefsGnucLinux.h
@@ -0,0 +1,59 @@
+#ifndef NV_CORE_H
+#error "Do not include this file directly."
+#endif
+
+#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
+#include <stddef.h> // operator new, size_t, NULL
+
+// Function linkage
+#define DLL_IMPORT
+#if __GNUC__ >= 4
+#   define DLL_EXPORT   __attribute__((visibility("default")))
+#   define DLL_EXPORT_CLASS DLL_EXPORT
+#else
+#   define DLL_EXPORT
+#   define DLL_EXPORT_CLASS
+#endif
+
+// Function calling modes
+#if NV_CPU_X86
+#   define NV_CDECL     __attribute__((cdecl))
+#   define NV_STDCALL   __attribute__((stdcall))
+#else
+#   define NV_CDECL 
+#   define NV_STDCALL
+#endif
+
+#define NV_FASTCALL     __attribute__((fastcall))
+//#if __GNUC__ > 3
+// It seems that GCC does not assume always_inline implies inline. I think this depends on the GCC version :(
+#define NV_FORCEINLINE  inline __attribute__((always_inline))
+//#else
+// Some compilers complain that inline and always_inline are redundant.
+//#define NV_FORCEINLINE  __attribute__((always_inline))
+//#endif
+#define NV_DEPRECATED   __attribute__((deprecated))
+#define NV_THREAD_LOCAL __thread 
+
+#if __GNUC__ > 2
+#define NV_PURE     __attribute__((pure))
+#define NV_CONST    __attribute__((const))
+#else
+#define NV_PURE
+#define NV_CONST
+#endif
+
+#define NV_NOINLINE __attribute__((noinline))
+
+// Define __FUNC__ properly.
+#if __STDC_VERSION__ < 199901L
+#   if __GNUC__ >= 2
+#       define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
+#   else
+#       define __FUNC__ "<unknown>"
+#   endif
+#else
+#   define __FUNC__ __PRETTY_FUNCTION__
+#endif
+
+#define restrict    __restrict__
diff --git a/thirdparty/thekla_atlas/nvcore/DefsGnucWin32.h b/thirdparty/thekla_atlas/nvcore/DefsGnucWin32.h
new file mode 100644
index 0000000000..f35ed88575
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/DefsGnucWin32.h
@@ -0,0 +1,65 @@
+#ifndef NV_CORE_H
+#error "Do not include this file directly."
+#endif
+
+//#include <cstddef> // size_t, NULL
+
+// Function linkage
+#define DLL_IMPORT	__declspec(dllimport)
+#define DLL_EXPORT	__declspec(dllexport)
+#define DLL_EXPORT_CLASS DLL_EXPORT
+
+// Function calling modes
+#if NV_CPU_X86
+#	define NV_CDECL 	__attribute__((cdecl))
+#	define NV_STDCALL	__attribute__((stdcall))
+#else
+#	define NV_CDECL 
+#	define NV_STDCALL
+#endif
+
+#define NV_FASTCALL		__attribute__((fastcall))
+#define NV_FORCEINLINE	__attribute__((always_inline))
+#define NV_DEPRECATED   __attribute__((deprecated))
+
+#if __GNUC__ > 2
+#define NV_PURE		__attribute__((pure))
+#define NV_CONST	__attribute__((const))
+#else
+#define NV_PURE
+#define NV_CONST
+#endif
+
+#define NV_NOINLINE __attribute__((noinline))
+
+// Define __FUNC__ properly.
+#if __STDC_VERSION__ < 199901L
+#	if __GNUC__ >= 2
+#		define __FUNC__ __PRETTY_FUNCTION__	// __FUNCTION__
+#	else
+#		define __FUNC__ "<unknown>"
+#	endif
+#else
+#	define __FUNC__ __PRETTY_FUNCTION__
+#endif
+
+#define restrict	__restrict__
+
+/*
+// Type definitions
+typedef unsigned char		uint8;
+typedef signed char			int8;
+
+typedef unsigned short		uint16;
+typedef signed short		int16;
+
+typedef unsigned int		uint32;
+typedef signed int			int32;
+
+typedef unsigned long long	uint64;
+typedef signed long long	int64;
+
+// Aliases
+typedef uint32				uint;
+*/
+
diff --git a/thirdparty/thekla_atlas/nvcore/DefsVcWin32.h b/thirdparty/thekla_atlas/nvcore/DefsVcWin32.h
new file mode 100644
index 0000000000..a915f3791a
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/DefsVcWin32.h
@@ -0,0 +1,94 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#ifndef NV_CORE_H
+#error "Do not include this file directly."
+#endif
+
+// Function linkage
+#define DLL_IMPORT __declspec(dllimport)
+#define DLL_EXPORT __declspec(dllexport)
+#define DLL_EXPORT_CLASS DLL_EXPORT
+
+// Function calling modes
+#define NV_CDECL        __cdecl
+#define NV_STDCALL      __stdcall
+#define NV_FASTCALL     __fastcall
+#define NV_DEPRECATED
+
+#define NV_PURE
+#define NV_CONST
+
+// Set standard function names.
+#if _MSC_VER < 1900
+#	define snprintf _snprintf
+#endif
+#if _MSC_VER < 1500
+#   define vsnprintf _vsnprintf
+#endif
+#if _MSC_VER < 1700
+#   define strtoll _strtoi64
+#   define strtoull _strtoui64
+#endif
+//#define chdir _chdir
+#define getcwd _getcwd 
+
+#if _MSC_VER <= 1600
+#define va_copy(a, b) (a) = (b)
+#endif
+
+#if !defined restrict
+#define restrict
+#endif
+
+// Ignore gcc attributes.
+#define __attribute__(X)
+
+#if !defined __FUNC__
+#define __FUNC__ __FUNCTION__ 
+#endif
+
+#define NV_NOINLINE __declspec(noinline)
+#define NV_FORCEINLINE __forceinline
+
+#define NV_THREAD_LOCAL __declspec(thread)
+
+/*
+// Type definitions
+typedef unsigned char       uint8;
+typedef signed char         int8;
+
+typedef unsigned short      uint16;
+typedef signed short        int16;
+
+typedef unsigned int        uint32;
+typedef signed int          int32;
+
+typedef unsigned __int64    uint64;
+typedef signed __int64      int64;
+
+// Aliases
+typedef uint32              uint;
+*/
+
+// Unwanted VC++ warnings to disable.
+/*
+#pragma warning(disable : 4244)     // conversion to float, possible loss of data
+#pragma warning(disable : 4245)     // conversion from 'enum ' to 'unsigned long', signed/unsigned mismatch
+#pragma warning(disable : 4100)     // unreferenced formal parameter
+#pragma warning(disable : 4514)     // unreferenced inline function has been removed
+#pragma warning(disable : 4710)     // inline function not expanded
+#pragma warning(disable : 4127)     // Conditional expression is constant
+#pragma warning(disable : 4305)     // truncation from 'const double' to 'float'
+#pragma warning(disable : 4505)     // unreferenced local function has been removed
+
+#pragma warning(disable : 4702)     // unreachable code in inline expanded function
+#pragma warning(disable : 4711)     // function selected for automatic inlining
+#pragma warning(disable : 4725)     // Pentium fdiv bug
+
+#pragma warning(disable : 4786)     // Identifier was truncated and cannot be debugged.
+
+#pragma warning(disable : 4675)     // resolved overload was found by argument-dependent lookup
+*/
+
+#pragma warning(1 : 4705)     // Report unused local variables.
+#pragma warning(1 : 4555)     // Expression has no effect.
diff --git a/thirdparty/thekla_atlas/nvcore/FileSystem.cpp b/thirdparty/thekla_atlas/nvcore/FileSystem.cpp
new file mode 100644
index 0000000000..5ed0ca074f
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/FileSystem.cpp
@@ -0,0 +1,75 @@
+// This code is in the public domain -- castano@gmail.com
+
+#include "FileSystem.h"
+
+#if NV_OS_WIN32
+#define _CRT_NONSTDC_NO_WARNINGS // _chdir is defined deprecated, but that's a bug, chdir is deprecated, _chdir is *not*.
+//#include <shlwapi.h> // PathFileExists
+#include <windows.h> // GetFileAttributes
+#include <direct.h> // _mkdir
+#elif NV_OS_XBOX
+#include <Xtl.h>
+#elif NV_OS_DURANGO
+#include <Windows.h>
+#else
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+#include <stdio.h> // remove, unlink
+
+using namespace nv;
+
+
+bool FileSystem::exists(const char * path)
+{
+#if NV_OS_UNIX
+	return access(path, F_OK|R_OK) == 0;
+	//struct stat buf;
+	//return stat(path, &buf) == 0;
+#elif NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
+    // PathFileExists requires linking to shlwapi.lib
+    //return PathFileExists(path) != 0;
+    return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES;
+#else
+	if (FILE * fp = fopen(path, "r"))
+	{
+		fclose(fp);
+		return true;
+	}
+	return false;
+#endif
+}
+
+bool FileSystem::createDirectory(const char * path)
+{
+#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
+    return CreateDirectoryA(path, NULL) != 0;
+#elif NV_OS_ORBIS
+    // not implemented
+	return false;
+#else
+    return mkdir(path, 0777) != -1;
+#endif
+}
+
+bool FileSystem::changeDirectory(const char * path)
+{
+#if NV_OS_WIN32
+    return _chdir(path) != -1;
+#elif NV_OS_XBOX || NV_OS_DURANGO
+	// Xbox doesn't support Current Working Directory!
+	return false;
+#elif NV_OS_ORBIS
+    // Orbis doesn't support Current Working Directory!
+	return false;
+#else
+    return chdir(path) != -1;
+#endif
+}
+
+bool FileSystem::removeFile(const char * path)
+{
+    // @@ Use unlink or remove?
+    return remove(path) == 0;
+}
diff --git a/thirdparty/thekla_atlas/nvcore/FileSystem.h b/thirdparty/thekla_atlas/nvcore/FileSystem.h
new file mode 100644
index 0000000000..afd0f449d3
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/FileSystem.h
@@ -0,0 +1,24 @@
+// This code is in the public domain -- castano@gmail.com
+
+#pragma once
+#ifndef NV_CORE_FILESYSTEM_H
+#define NV_CORE_FILESYSTEM_H
+
+#include "nvcore.h"
+
+namespace nv
+{
+
+    namespace FileSystem
+    {
+        NVCORE_API bool exists(const char * path);
+        NVCORE_API bool createDirectory(const char * path);
+        NVCORE_API bool changeDirectory(const char * path);
+        NVCORE_API bool removeFile(const char * path);
+
+    } // FileSystem namespace
+
+} // nv namespace
+
+
+#endif // NV_CORE_FILESYSTEM_H
diff --git a/thirdparty/thekla_atlas/nvcore/ForEach.h b/thirdparty/thekla_atlas/nvcore/ForEach.h
new file mode 100644
index 0000000000..bc66f424ef
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/ForEach.h
@@ -0,0 +1,71 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_FOREACH_H
+#define NV_CORE_FOREACH_H
+
+/*
+These foreach macros are very non-standard and somewhat confusing, but I like them.
+*/
+
+#include "nvcore.h"
+
+
+#if NV_CC_CPP11
+
+#define NV_FOREACH(i, container) \
+    for (auto i = (container).start(); !(container).isDone(i); (container).advance(i))
+
+#elif NV_CC_GNUC // If typeof is available:
+
+/*
+Ideally we would like to write this:
+
+#define NV_FOREACH(i, container) \
+    for(decltype(container)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
+
+But gcc versions prior to 4.7 required an intermediate type. See:
+https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
+*/
+
+#define NV_FOREACH(i, container) \
+    typedef typeof(container) NV_STRING_JOIN2(cont,__LINE__); \
+    for(NV_STRING_JOIN2(cont,__LINE__)::PseudoIndex i((container).start()); !(container).isDone(i); (container).advance(i))
+
+#else // If typeof not available:
+
+#define NV_NEED_PSEUDOINDEX_WRAPPER 1
+
+#include <new> // placement new
+
+struct PseudoIndexWrapper {
+    template <typename T>
+    PseudoIndexWrapper(const T & container) {
+        nvStaticCheck(sizeof(typename T::PseudoIndex) <= sizeof(memory));
+        new (memory) typename T::PseudoIndex(container.start());
+    }
+    // PseudoIndex cannot have a dtor!
+
+    template <typename T> typename T::PseudoIndex & operator()(const T * /*container*/) {
+        return *reinterpret_cast<typename T::PseudoIndex *>(memory);
+    }
+    template <typename T> const typename T::PseudoIndex & operator()(const T * /*container*/) const {
+        return *reinterpret_cast<const typename T::PseudoIndex *>(memory);
+    }
+
+    uint8 memory[4];	// Increase the size if we have bigger enumerators.
+};
+
+#define NV_FOREACH(i, container) \
+    for(PseudoIndexWrapper i(container); !(container).isDone(i(&(container))); (container).advance(i(&(container))))
+
+#endif
+
+// Declare foreach keyword.
+#if !defined NV_NO_USE_KEYWORDS
+#   define foreach NV_FOREACH
+#   define foreach_index NV_FOREACH
+#endif
+
+
+#endif // NV_CORE_FOREACH_H
diff --git a/thirdparty/thekla_atlas/nvcore/Hash.h b/thirdparty/thekla_atlas/nvcore/Hash.h
new file mode 100644
index 0000000000..a8b0b2c63b
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/Hash.h
@@ -0,0 +1,83 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_HASH_H
+#define NV_CORE_HASH_H
+
+#include "nvcore.h"
+
+namespace nv
+{
+    inline uint sdbmHash(const void * data_in, uint size, uint h = 5381)
+    {
+        const uint8 * data = (const uint8 *) data_in;
+        uint i = 0;
+        while (i < size) {
+            h = (h << 16) + (h << 6) - h + (uint) data[i++];
+        }
+        return h;
+    }
+
+    // Note that this hash does not handle NaN properly.
+    inline uint sdbmFloatHash(const float * f, uint count, uint h = 5381)
+    {
+        for (uint i = 0; i < count; i++) {
+            //nvDebugCheck(nv::isFinite(*f));
+            union { float f; uint32 i; } x = { f[i] };
+            if (x.i == 0x80000000) x.i = 0;
+            h = sdbmHash(&x, 4, h);
+        }
+        return h;
+    }
+
+
+    template <typename T>
+    inline uint hash(const T & t, uint h = 5381)
+    {
+        return sdbmHash(&t, sizeof(T), h);
+    }
+
+    template <>
+    inline uint hash(const float & f, uint h)
+    {
+        return sdbmFloatHash(&f, 1, h);
+    }
+
+
+    // Functors for hash table:
+    template <typename Key> struct Hash 
+    {
+        uint operator()(const Key & k) const {
+            return hash(k);
+        }
+    };
+
+    template <typename Key> struct Equal
+    {
+        bool operator()(const Key & k0, const Key & k1) const {
+            return k0 == k1;
+        }
+    };
+
+
+    // @@ Move to Utils.h?
+    template <typename T1, typename T2>
+    struct Pair {
+        T1 first;
+        T2 second;
+    };
+
+    template <typename T1, typename T2>
+    bool operator==(const Pair<T1,T2> & p0, const Pair<T1,T2> & p1) {
+        return p0.first == p1.first && p0.second == p1.second;
+    }
+
+    template <typename T1, typename T2>
+    uint hash(const Pair<T1,T2> & p, uint h = 5381) {
+        return hash(p.second, hash(p.first));
+    }
+
+
+} // nv namespace
+
+#endif // NV_CORE_HASH_H
diff --git a/thirdparty/thekla_atlas/nvcore/HashMap.h b/thirdparty/thekla_atlas/nvcore/HashMap.h
new file mode 100644
index 0000000000..7856d6a8c9
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/HashMap.h
@@ -0,0 +1,174 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_HASHMAP_H
+#define NV_CORE_HASHMAP_H
+
+/*
+HashMap based on Thatcher Ulrich <tu@tulrich.com> container, donated to the Public Domain.
+
+I'd like to do something to reduce the amount of code generated with this template. The type of 
+U is largely irrelevant to the generated code, except for calls to constructors and destructors,
+but the combination of all T and U pairs, generate a large amounts of code.
+
+HashMap is not used in NVTT, so it could be removed from the repository.
+*/
+
+
+#include "Memory.h"
+#include "Debug.h"
+#include "ForEach.h"
+#include "Hash.h"
+
+namespace nv 
+{
+    class Stream;
+
+    /** Thatcher Ulrich's hash table.
+    *
+    * Hash table, linear probing, internal chaining.  One
+    * interesting/nice thing about this implementation is that the table
+    * itself is a flat chunk of memory containing no pointers, only
+    * relative indices.  If the key and value types of the hash contain
+    * no pointers, then the hash can be serialized using raw IO.  Could
+    * come in handy.
+    *
+    * Never shrinks, unless you explicitly clear() it.  Expands on
+    * demand, though.  For best results, if you know roughly how big your
+    * table will be, default it to that size when you create it.
+    */
+    template<typename T, typename U, typename H = Hash<T>, typename E = Equal<T> >
+    class NVCORE_CLASS HashMap
+    {
+        NV_FORBID_COPY(HashMap);
+    public:
+
+        /// Default ctor.
+        HashMap() : entry_count(0), size_mask(-1), table(NULL) { }
+
+        /// Ctor with size hint.
+        explicit HashMap(int size_hint) : entry_count(0), size_mask(-1), table(NULL) { setCapacity(size_hint); }
+
+        /// Dtor.
+        ~HashMap() { clear(); }
+
+
+        void set(const T& key, const U& value);
+        void add(const T& key, const U& value);
+        bool remove(const T& key);
+        void clear();
+        bool isEmpty() const;
+        bool get(const T& key, U* value = NULL, T* other_key = NULL) const;
+        bool contains(const T & key) const;
+        int	size() const;
+        int	count() const;
+        int	capacity() const;
+        void checkExpand();
+        void resize(int n);
+
+        void setCapacity(int new_size);
+
+        // Behaves much like std::pair.
+        struct Entry
+        {
+            int	next_in_chain;	// internal chaining for collisions
+            uint hash_value;	// avoids recomputing.  Worthwhile?
+            T key;
+            U value;
+
+            Entry() : next_in_chain(-2) {}
+            Entry(const Entry& e) : next_in_chain(e.next_in_chain), hash_value(e.hash_value), key(e.key), value(e.value) {}
+            Entry(const T& k, const U& v, int next, int hash) : next_in_chain(next), hash_value(hash), key(k), value(v) {}
+            
+            bool isEmpty() const { return next_in_chain == -2; }
+            bool isEndOfChain() const { return next_in_chain == -1; }
+            bool isTombstone() const { return hash_value == TOMBSTONE_HASH; }
+
+            void clear() {
+                key.~T();	// placement delete
+                value.~U();	// placement delete
+                next_in_chain = -2;
+                hash_value = ~TOMBSTONE_HASH;
+            }
+
+            void makeTombstone() {
+                key.~T();
+                value.~U();
+                hash_value = TOMBSTONE_HASH;
+            }
+        };
+
+
+        // HashMap enumerator.
+        typedef int PseudoIndex;
+        PseudoIndex start() const { PseudoIndex i = 0; findNext(i); return i; }
+        bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= size_mask+1); return i == size_mask+1; };
+        void advance(PseudoIndex & i) const { nvDebugCheck(i <= size_mask+1); i++; findNext(i); }
+
+#if NV_NEED_PSEUDOINDEX_WRAPPER
+        Entry & operator[]( const PseudoIndexWrapper & i ) {
+            Entry & e = entry(i(this));
+            nvDebugCheck(e.isTombstone() == false);
+            return e;
+        }
+        const Entry & operator[]( const PseudoIndexWrapper & i ) const {
+            const Entry & e = entry(i(this));
+            nvDebugCheck(e.isTombstone() == false);
+            return e;
+        }
+#else
+        Entry & operator[](const PseudoIndex & i) {
+            Entry & e = entry(i);
+            nvDebugCheck(e.isTombstone() == false);
+            return e;
+        }
+        const Entry & operator[](const PseudoIndex & i) const {
+            const Entry & e = entry(i);
+            nvDebugCheck(e.isTombstone() == false);
+            return e;
+        }
+#endif
+
+
+        // By default we serialize the key-value pairs compactl	y.
+        template<typename _T, typename _U, typename _H, typename _E>
+        friend Stream & operator<< (Stream & s, HashMap<_T, _U, _H, _E> & map);
+
+        // This requires more storage, but saves us from rehashing the elements.
+        template<typename _T, typename _U, typename _H, typename _E>
+        friend Stream & rawSerialize(Stream & s, HashMap<_T, _U, _H, _E> & map);
+
+        /// Swap the members of this vector and the given vector.
+        template<typename _T, typename _U, typename _H, typename _E>
+        friend void swap(HashMap<_T, _U, _H, _E> & a, HashMap<_T, _U, _H, _E> & b);
+	
+    private:
+        static const uint TOMBSTONE_HASH = (uint) -1;
+
+        uint compute_hash(const T& key) const;
+
+        // Find the index of the matching entry. If no match, then return -1.
+        int	findIndex(const T& key) const;
+
+        // Return the index of the newly cleared element.
+        int removeTombstone(int index);
+
+        // Helpers.
+        Entry & entry(int index);
+        const Entry & entry(int index) const;
+
+        void setRawCapacity(int new_size);
+
+        // Move the enumerator to the next valid element.
+        void findNext(PseudoIndex & i) const;
+
+
+        int	entry_count;
+        int	size_mask;
+        Entry * table;
+
+    };
+
+} // nv namespace
+
+#endif // NV_CORE_HASHMAP_H
diff --git a/thirdparty/thekla_atlas/nvcore/HashMap.inl b/thirdparty/thekla_atlas/nvcore/HashMap.inl
new file mode 100644
index 0000000000..f0b6bfea62
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/HashMap.inl
@@ -0,0 +1,550 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_HASHMAP_INL
+#define NV_CORE_HASHMAP_INL
+
+#include "HashMap.h"
+
+#include "Stream.h"
+#include "Utils.h" // swap
+
+#include <new> // for placement new
+
+
+namespace nv 
+{
+
+    // Set a new or existing value under the key, to the value.
+    template<typename T, typename U, typename H, typename E>
+    void HashMap<T, U, H, E>::set(const T& key, const U& value)
+    {
+        int	index = findIndex(key);
+        if (index >= 0)
+        {
+            entry(index).value = value;
+            return;
+        }
+
+        // Entry under key doesn't exist.
+        add(key, value);
+    }
+
+
+    // Add a new value to the hash table, under the specified key.
+    template<typename T, typename U, typename H, typename E>
+    void HashMap<T, U, H, E>::add(const T& key, const U& value)
+    {
+        nvCheck(findIndex(key) == -1);
+
+        checkExpand();
+        nvCheck(table != NULL);
+        entry_count++;
+
+        const uint hash_value = compute_hash(key);
+        const int index = hash_value & size_mask;
+
+        Entry * natural_entry = &(entry(index));
+
+        if (natural_entry->isEmpty())
+        {
+            // Put the new entry in.
+            new (natural_entry) Entry(key, value, -1, hash_value);
+        } 
+        else if (natural_entry->isTombstone()) {
+            // Put the new entry in, without disturbing the rest of the chain.
+            int next_in_chain = natural_entry->next_in_chain;
+            new (natural_entry) Entry(key, value, next_in_chain, hash_value);
+        }
+        else
+        {
+            // Find a blank spot.
+            int	blank_index = index;
+            for (int search_count = 0; ; search_count++)
+            {
+                blank_index = (blank_index + 1) & size_mask;
+                if (entry(blank_index).isEmpty()) break;	// found it
+                if (entry(blank_index).isTombstone()) {
+                    blank_index = removeTombstone(blank_index);
+                    break;
+                }
+                nvCheck(search_count < this->size_mask);
+            }
+            Entry * blank_entry = &entry(blank_index);
+
+            if (int(natural_entry->hash_value & size_mask) == index)
+            {
+                // Collision.  Link into this chain.
+
+                // Move existing list head.
+                new (blank_entry) Entry(*natural_entry);	// placement new, copy ctor
+
+                // Put the new info in the natural entry.
+                natural_entry->key = key;
+                natural_entry->value = value;
+                natural_entry->next_in_chain = blank_index;
+                natural_entry->hash_value = hash_value;
+            }
+            else
+            {
+                // Existing entry does not naturally
+                // belong in this slot.  Existing
+                // entry must be moved.
+
+                // Find natural location of collided element (i.e. root of chain)
+                int	collided_index = natural_entry->hash_value & size_mask;
+                for (int search_count = 0; ; search_count++)
+                {
+                    Entry * e = &entry(collided_index);
+                    if (e->next_in_chain == index)
+                    {
+                        // Here's where we need to splice.
+                        new (blank_entry) Entry(*natural_entry);
+                        e->next_in_chain = blank_index;
+                        break;
+                    }
+                    collided_index = e->next_in_chain;
+                    nvCheck(collided_index >= 0 && collided_index <= size_mask);
+                    nvCheck(search_count <= size_mask);
+                }
+
+                // Put the new data in the natural entry.
+                natural_entry->key = key;
+                natural_entry->value = value;
+                natural_entry->hash_value = hash_value;
+                natural_entry->next_in_chain = -1;
+            }
+        }
+    }
+
+
+    // Remove the first value under the specified key.
+    template<typename T, typename U, typename H, typename E>
+    bool HashMap<T, U, H, E>::remove(const T& key)
+    {
+        if (table == NULL)
+        {
+            return false;
+        }
+
+        int	index = findIndex(key);
+        if (index < 0)
+        {
+            return false;
+        }
+
+        Entry * pos = &entry(index);
+
+        int natural_index = (int) (pos->hash_value & size_mask);
+
+        if (index != natural_index) {
+            // We're not the head of our chain, so we can
+            // be spliced out of it.
+
+            // Iterate up the chain, and splice out when
+            // we get to m_index.
+            Entry* e = &entry(natural_index);
+            while (e->next_in_chain != index) {
+                nvDebugCheck(e->isEndOfChain() == false);
+                e = &entry(e->next_in_chain);
+            }
+
+            if (e->isTombstone() && pos->isEndOfChain()) {
+                // Tombstone has nothing else to point
+                // to, so mark it empty.
+                e->next_in_chain = -2;
+            } else {
+                e->next_in_chain = pos->next_in_chain;
+            }
+
+            pos->clear();
+        }
+        else if (pos->isEndOfChain() == false) {
+            // We're the head of our chain, and there are
+            // additional elements.
+            //
+            // We need to put a tombstone here.
+            //
+            // We can't clear the element, because the
+            // rest of the elements in the chain must be
+            // linked to this position.
+            //
+            // We can't move any of the succeeding
+            // elements in the chain (i.e. to fill this
+            // entry), because we don't want to invalidate
+            // any other existing iterators.
+            pos->makeTombstone();
+        } else {
+            // We're the head of the chain, but we're the
+            // only member of the chain.
+            pos->clear();
+        }
+
+        entry_count--;
+
+        return true;
+    }
+
+
+    // Remove all entries from the hash table.
+    template<typename T, typename U, typename H, typename E>
+    void HashMap<T, U, H, E>::clear()
+    {
+        if (table != NULL)
+        {
+            // Delete the entries.
+            for (int i = 0, n = size_mask; i <= n; i++)
+            {
+                Entry * e = &entry(i);
+                if (e->isEmpty() == false && e->isTombstone() == false)
+                {
+                    e->clear();
+                }
+            }
+            free(table);
+            table = NULL;
+            entry_count = 0;
+            size_mask = -1;
+        }
+    }
+
+
+    // Returns true if the hash is empty.
+    template<typename T, typename U, typename H, typename E>
+    bool HashMap<T, U, H, E>::isEmpty() const
+    {
+        return table == NULL || entry_count == 0;
+    }
+
+
+    // Retrieve the value under the given key.
+    // - If there's no value under the key, then return false and leave *value alone.
+    // - If there is a value, return true, and set *value to the entry's value.
+    // - If value == NULL, return true or false according to the presence of the key, but don't touch *value.
+    template<typename T, typename U, typename H, typename E>
+    bool HashMap<T, U, H, E>::get(const T& key, U* value/*= NULL*/, T* other_key/*= NULL*/) const
+    {
+        int	index = findIndex(key);
+        if (index >= 0)
+        {
+            if (value != NULL) {
+                *value = entry(index).value;	// take care with side-effects!
+            }
+            if (other_key != NULL) {
+                *other_key = entry(index).key;
+            }
+            return true;
+        }
+        return false;
+    }
+
+    // Determine if the given key is contained in the hash.
+    template<typename T, typename U, typename H, typename E>
+    bool HashMap<T, U, H, E>::contains(const T & key) const
+    {
+        return get(key);
+    }
+
+    // Number of entries in the hash.
+    template<typename T, typename U, typename H, typename E>
+    int	HashMap<T, U, H, E>::size() const
+    {
+        return entry_count;
+    }
+
+    // Number of entries in the hash.
+    template<typename T, typename U, typename H, typename E>
+    int	HashMap<T, U, H, E>::count() const
+    {
+        return size();
+    }
+
+    template<typename T, typename U, typename H, typename E>
+    int	HashMap<T, U, H, E>::capacity() const
+    {
+        return size_mask+1;
+    }
+
+
+    // Resize the hash table to fit one more entry.  Often this doesn't involve any action.
+    template<typename T, typename U, typename H, typename E>
+    void HashMap<T, U, H, E>::checkExpand()
+    {
+        if (table == NULL) {
+            // Initial creation of table.  Make a minimum-sized table.
+            setRawCapacity(16);
+        } 
+        else if (entry_count * 3 > (size_mask + 1) * 2) {
+            // Table is more than 2/3rds full.  Expand.
+            setRawCapacity(entry_count * 2);
+        }
+    }
+
+
+    // Hint the bucket count to >= n.
+    template<typename T, typename U, typename H, typename E>
+    void HashMap<T, U, H, E>::resize(int n)
+    {
+        // Not really sure what this means in relation to
+        // STLport's hash_map... they say they "increase the
+        // bucket count to at least n" -- but does that mean
+        // their real capacity after resize(n) is more like
+        // n*2 (since they do linked-list chaining within
+        // buckets?).
+        setCapacity(n);
+    }
+
+
+    // Size the hash so that it can comfortably contain the given number of elements.  If the hash already contains more
+    // elements than new_size, then this may be a no-op.
+    template<typename T, typename U, typename H, typename E>
+    void HashMap<T, U, H, E>::setCapacity(int new_size)
+    {
+        int	new_raw_size = (new_size * 3) / 2;
+        if (new_raw_size < size()) { return; }
+
+        setRawCapacity(new_raw_size);
+    }
+
+
+    // By default we serialize the key-value pairs compactly.
+    template<typename _T, typename _U, typename _H, typename _E>
+    Stream & operator<< (Stream & s, HashMap<_T, _U, _H, _E> & map)
+    {
+        typedef typename HashMap<_T, _U, _H, _E>::Entry HashMapEntry;
+
+        int entry_count = map.entry_count;
+        s << entry_count;
+
+        if (s.isLoading()) {
+            map.clear();
+            if(entry_count == 0) {
+                return s;
+            }
+            map.entry_count = entry_count;
+            map.size_mask = nextPowerOfTwo(U32(entry_count)) - 1;
+            map.table = malloc<HashMapEntry>(map.size_mask + 1);
+
+            for (int i = 0; i <= map.size_mask; i++) {
+                map.table[i].next_in_chain = -2;	// mark empty
+            }
+
+            _T key;
+            _U value;
+            for (int i = 0; i < entry_count; i++) {
+                s << key << value;
+                map.add(key, value);
+            }
+        }
+        else {
+            int i = 0;
+            map.findNext(i);
+            while (i != map.size_mask+1) {
+                HashMapEntry & e = map.entry(i);
+                
+                s << e.key << e.value;
+                
+                i++;
+                map.findNext(i);
+            }
+            //for(HashMap<_T, _U, _H, _E>::PseudoIndex i((map).start()); !(map).isDone(i); (map).advance(i)) {
+            //foreach(i, map) {
+            //    s << map[i].key << map[i].value;
+            //}
+        }
+
+        return s;
+    }
+
+    // This requires more storage, but saves us from rehashing the elements.
+    template<typename _T, typename _U, typename _H, typename _E>
+    Stream & rawSerialize(Stream & s, HashMap<_T, _U, _H, _E> & map)
+    {
+        typedef typename HashMap<_T, _U, _H, _E>::Entry HashMapEntry;
+
+        if (s.isLoading()) {
+            map.clear();
+        }
+
+        s << map.size_mask;
+
+        if (map.size_mask != -1) {
+            s << map.entry_count;
+
+            if (s.isLoading()) {  
+                map.table = new HashMapEntry[map.size_mask+1];
+            }
+
+            for (int i = 0; i <= map.size_mask; i++) {
+                HashMapEntry & e = map.table[i];
+                s << e.next_in_chain << e.hash_value;
+                s << e.key;
+                s << e.value;
+            }
+        }
+
+        return s;
+    }
+
+    // Swap the members of this vector and the given vector.
+    template<typename _T, typename _U, typename _H, typename _E>
+    void swap(HashMap<_T, _U, _H, _E> & a, HashMap<_T, _U, _H, _E> & b)
+    {
+        swap(a.entry_count, b.entry_count);
+        swap(a.size_mask, b.size_mask);
+        swap(a.table, b.table);
+    }
+
+
+    template<typename T, typename U, typename H, typename E>
+    uint HashMap<T, U, H, E>::compute_hash(const T& key) const
+    {
+        H hash;
+        uint hash_value = hash(key);
+        if (hash_value == TOMBSTONE_HASH) {
+            hash_value ^= 0x8000;
+        }
+        return hash_value;
+    }
+
+    // Find the index of the matching entry. If no match, then return -1.
+    template<typename T, typename U, typename H, typename E>
+    int	HashMap<T, U, H, E>::findIndex(const T& key) const
+    {
+        if (table == NULL) return -1;
+
+        E equal;
+
+        uint hash_value = compute_hash(key);
+        int	index = hash_value & size_mask;
+
+        const Entry * e = &entry(index);
+        if (e->isEmpty()) return -1;
+        if (e->isTombstone() == false && int(e->hash_value & size_mask) != index) {
+            // occupied by a collider
+            return -1;
+        }
+
+        for (;;)
+        {
+            nvCheck(e->isTombstone() || (e->hash_value & size_mask) == (hash_value & size_mask));
+
+            if (e->hash_value == hash_value && equal(e->key, key))
+            {
+                // Found it.
+                return index;
+            }
+            nvDebugCheck(e->isTombstone() || !equal(e->key, key));   // keys are equal, but hash differs!
+
+            // Keep looking through the chain.
+            index = e->next_in_chain;
+            if (index == -1) break;	// end of chain
+
+            nvCheck(index >= 0 && index <= size_mask);
+            e = &entry(index);
+
+            nvCheck(e->isEmpty() == false || e->isTombstone());
+        }
+        return -1;
+    }
+
+    // Return the index of the newly cleared element.
+    template<typename T, typename U, typename H, typename E>
+    int HashMap<T, U, H, E>::removeTombstone(int index) {
+        Entry* e = &entry(index);
+        nvCheck(e->isTombstone());
+        nvCheck(!e->isEndOfChain());
+
+        // Move the next element of the chain into the
+        // tombstone slot, and return the vacated element.
+        int new_blank_index = e->next_in_chain;
+        Entry* new_blank = &entry(new_blank_index);
+        new (e) Entry(*new_blank);
+        new_blank->clear();
+        return new_blank_index;
+    }
+
+    // Helpers.
+    template<typename T, typename U, typename H, typename E>
+    typename HashMap<T, U, H, E>::Entry & HashMap<T, U, H, E>::entry(int index)
+    {
+        nvDebugCheck(table != NULL);
+        nvDebugCheck(index >= 0 && index <= size_mask);
+        return table[index];
+    }
+    template<typename T, typename U, typename H, typename E>
+    const typename HashMap<T, U, H, E>::Entry & HashMap<T, U, H, E>::entry(int index) const
+    {
+        nvDebugCheck(table != NULL);
+        nvDebugCheck(index >= 0 && index <= size_mask);
+        return table[index];
+    }
+
+
+    // Resize the hash table to the given size (Rehash the contents of the current table).  The arg is the number of
+    // hash table entries, not the number of elements we should actually contain (which will be less than this).
+    template<typename T, typename U, typename H, typename E>
+    void HashMap<T, U, H, E>::setRawCapacity(int new_size)
+    {
+        if (new_size <= 0) {
+            // Special case.
+            clear();
+            return;
+        }
+
+        // Force new_size to be a power of two.
+        new_size = nextPowerOfTwo(U32(new_size));
+
+        HashMap<T, U, H, E> new_hash;
+        new_hash.table = malloc<Entry>(new_size);
+        nvDebugCheck(new_hash.table != NULL);
+
+        new_hash.entry_count = 0;
+        new_hash.size_mask = new_size - 1;
+        for (int i = 0; i < new_size; i++)
+        {
+            new_hash.entry(i).next_in_chain = -2;	// mark empty
+        }
+
+        // Copy stuff to new_hash
+        if (table != NULL)
+        {
+            for (int i = 0, n = size_mask; i <= n; i++)
+            {
+                Entry * e = &entry(i);
+                if (e->isEmpty() == false && e->isTombstone() == false)
+                {
+                    // Insert old entry into new hash.
+                    new_hash.add(e->key, e->value);
+                    e->clear();	// placement delete of old element
+                }
+            }
+
+            // Delete our old data buffer.
+            free(table);
+        }
+
+        // Steal new_hash's data.
+        entry_count = new_hash.entry_count;
+        size_mask = new_hash.size_mask;
+        table = new_hash.table;
+        new_hash.entry_count = 0;
+        new_hash.size_mask = -1;
+        new_hash.table = NULL;
+    }
+
+    // Move the enumerator to the next valid element.
+    template<typename T, typename U, typename H, typename E>
+    void HashMap<T, U, H, E>::findNext(PseudoIndex & i) const {
+        while (i <= size_mask) {
+            const Entry & e = entry(i);
+            if (e.isEmpty() == false && e.isTombstone() == false) {
+                break;
+            }
+            i++;
+        }
+    }
+
+} // nv namespace
+
+#endif // NV_CORE_HASHMAP_INL
diff --git a/thirdparty/thekla_atlas/nvcore/Memory.cpp b/thirdparty/thekla_atlas/nvcore/Memory.cpp
new file mode 100644
index 0000000000..302a2d84cb
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/Memory.cpp
@@ -0,0 +1,153 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "Memory.h"
+#include "Debug.h"
+#include "Utils.h"
+
+#include <stdlib.h>
+
+#ifdef NV_OS_LINUX
+#include <malloc.h>
+#endif
+
+#define USE_EFENCE 0
+
+#if USE_EFENCE
+extern "C" void *EF_malloc(size_t size);
+extern "C" void *EF_realloc(void * oldBuffer, size_t newSize);
+extern "C" void EF_free(void * address);
+#endif
+
+using namespace nv;
+
+#if NV_OVERRIDE_ALLOC
+
+void * malloc(size_t size)
+{
+#if USE_EFENCE
+    return EF_malloc(size);
+#else
+    return ::malloc(size);
+#endif
+}
+
+void * debug_malloc(size_t size, const char * file, int line)
+{
+    NV_UNUSED(file);
+    NV_UNUSED(line);
+#if USE_EFENCE
+    return EF_malloc(size);
+#else
+    return ::malloc(size);
+#endif
+}
+
+void free(void * ptr)
+{
+#if USE_EFENCE
+    return EF_free(const_cast<void *>(ptr));
+#else
+    ::free(const_cast<void *>(ptr));
+#endif
+}
+
+void * realloc(void * ptr, size_t size)
+{
+    nvDebugCheck(ptr != NULL || size != 0); // undefined realloc behavior.
+#if USE_EFENCE
+    return EF_realloc(ptr, size);
+#else
+    return ::realloc(ptr, size);
+#endif
+}
+
+
+/* No need to override this unless we want line info.
+void * operator new (size_t size) throw()
+{
+    return malloc(size);
+}
+
+void operator delete (void *p) throw()
+{
+    free(p);
+}
+
+void * operator new [] (size_t size) throw()
+{
+    return malloc(size);
+}
+
+void operator delete [] (void * p) throw()
+{
+    free(p);
+}
+*/
+
+#if 0 // Code from Apple:
+void* operator new(std::size_t sz) throw (std::bad_alloc)
+{
+        void *result = std::malloc (sz == 0 ? 1 : sz);
+        if (result == NULL)
+                throw std::bad_alloc();
+        gNewCounter++;
+        return result;
+}
+void operator delete(void* p) throw()
+{
+        if (p == NULL)
+                return;
+        std::free (p);
+        gDeleteCounter++;
+}
+
+/* These are the 'nothrow' versions of the above operators.
+   The system version will try to call a std::new_handler if they
+   fail, but your overriding versions are not required to do this.  */
+void* operator new(std::size_t sz, const std::nothrow_t&) throw()
+{
+        try {
+                void * result = ::operator new (sz);  // calls our overridden operator new
+                return result;
+        } catch (std::bad_alloc &) {
+          return NULL;
+        }
+}
+void operator delete(void* p, const std::nothrow_t&) throw()
+{
+        ::operator delete (p);
+}
+
+#endif // 0
+
+#endif // NV_OVERRIDE_ALLOC
+
+void * nv::aligned_malloc(size_t size, size_t alignment)
+{
+    // alignment must be a power of two, multiple of sizeof(void*)
+    nvDebugCheck(isPowerOfTwo(alignment));
+    nvDebugCheck((alignment & (sizeof(void*) - 1)) == 0);
+
+#if NV_OS_WIN32 || NV_OS_DURANGO
+    return _aligned_malloc(size, alignment);
+#elif NV_OS_DARWIN && !NV_OS_IOS
+    void * ptr = NULL;
+    posix_memalign(&ptr, alignment, size);
+    return ptr;
+#elif NV_OS_LINUX
+    return memalign(alignment, size);
+#else // NV_OS_ORBIS || NV_OS_IOS
+    // @@ IC: iOS appears to be 16 byte aligned, should we check alignment and assert if we request a higher alignment factor?
+    return ::malloc(size);
+#endif
+}
+
+void nv::aligned_free(void * ptr)
+{
+#if NV_OS_WIN32 || NV_OS_DURANGO
+    _aligned_free(ptr);
+#else
+    ::free(ptr);
+#endif
+}
+
diff --git a/thirdparty/thekla_atlas/nvcore/Memory.h b/thirdparty/thekla_atlas/nvcore/Memory.h
new file mode 100644
index 0000000000..1f71b60947
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/Memory.h
@@ -0,0 +1,72 @@
+// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_MEMORY_H
+#define NV_CORE_MEMORY_H
+
+#include "nvcore.h"
+
+#include <stdlib.h> // malloc(), realloc() and free()
+#include <string.h> // memset
+//#include <stddef.h> // size_t
+
+//#include <new>	// new and delete
+
+#define TRACK_MEMORY_LEAKS 0
+#if TRACK_MEMORY_LEAKS
+#include <vld.h>
+#endif
+
+
+#if NV_CC_GNUC
+#   define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
+#else
+#   define NV_ALIGN_16 __declspec(align(16))
+#endif
+
+
+#define NV_OVERRIDE_ALLOC 0
+
+#if NV_OVERRIDE_ALLOC
+
+// Custom memory allocator
+extern "C" {
+    NVCORE_API void * malloc(size_t size);
+    NVCORE_API void * debug_malloc(size_t size, const char * file, int line);
+    NVCORE_API void free(void * ptr);
+    NVCORE_API void * realloc(void * ptr, size_t size);
+}
+
+/*
+#ifdef _DEBUG
+#define new new(__FILE__, __LINE__)
+#define malloc(i) debug_malloc(i, __FILE__, __LINE__)
+#endif
+*/
+
+#endif
+
+namespace nv {
+    NVCORE_API void * aligned_malloc(size_t size, size_t alignment);
+    NVCORE_API void aligned_free(void * );
+
+    // C++ helpers.
+    template <typename T> NV_FORCEINLINE T * malloc(size_t count) {
+        return (T *)::malloc(sizeof(T) * count);
+    }
+
+    template <typename T> NV_FORCEINLINE T * realloc(T * ptr, size_t count) {
+        return (T *)::realloc(ptr, sizeof(T) * count);
+    }
+
+    template <typename T> NV_FORCEINLINE void free(const T * ptr) {
+        ::free((void *)ptr);
+    }
+
+    template <typename T> NV_FORCEINLINE void zero(T & data) {
+        memset(&data, 0, sizeof(T));
+    }
+
+} // nv namespace
+
+#endif // NV_CORE_MEMORY_H
diff --git a/thirdparty/thekla_atlas/nvcore/Ptr.h b/thirdparty/thekla_atlas/nvcore/Ptr.h
new file mode 100644
index 0000000000..b43039274b
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/Ptr.h
@@ -0,0 +1,322 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#ifndef NV_CORE_PTR_H
+#define NV_CORE_PTR_H
+
+#include "nvcore.h"
+#include "Debug.h"
+
+#include "RefCounted.h"
+
+namespace nv
+{
+    class WeakProxy;
+
+    /** Simple auto pointer template class.
+    *
+    * This is very similar to the standard auto_ptr class, but with some 
+    * additional limitations to make its use less error prone:
+    * - Copy constructor and assignment operator are disabled.
+    * - reset method is removed.
+    * 
+    * The semantics of the standard auto_ptr are not clear and change depending
+    * on the std implementation. For a discussion of the problems of auto_ptr read:
+    * http://www.awprofessional.com/content/images/020163371X/autoptrupdate\auto_ptr_update.html
+    */
+    template <class T>
+    class AutoPtr
+    {
+        NV_FORBID_COPY(AutoPtr);
+        NV_FORBID_HEAPALLOC();
+    public:
+
+        /// Ctor.
+        AutoPtr(T * p = NULL) : m_ptr(p) { }
+
+        template <class Q>
+        AutoPtr(Q * p) : m_ptr(static_cast<T *>(p)) { }
+
+        /// Dtor. Deletes owned pointer.
+        ~AutoPtr() {
+            delete m_ptr;
+            m_ptr = NULL;
+        }
+
+        /// Delete owned pointer and assign new one.
+        void operator=( T * p ) {
+            if (p != m_ptr)
+            {
+                delete m_ptr;
+                m_ptr = p;
+            }
+        }
+
+        template <class Q>
+        void operator=( Q * p ) {
+            if (p != m_ptr)
+            {
+                delete m_ptr;
+                m_ptr = static_cast<T *>(p);
+            }
+        }
+
+        /// Member access.
+        T * operator -> () const {
+            nvDebugCheck(m_ptr != NULL);
+            return m_ptr;
+        }
+
+        /// Get reference.
+        T & operator*() const {
+            nvDebugCheck(m_ptr != NULL);
+            return *m_ptr;
+        }
+
+        /// Get pointer.
+        T * ptr() const { return m_ptr; }
+
+        /// Relinquish ownership of the underlying pointer and returns that pointer.
+        T * release() {
+            T * tmp = m_ptr;
+            m_ptr = NULL;
+            return tmp;
+        }
+
+        /// Const pointer equal comparation.
+        friend bool operator == (const AutoPtr<T> & ap, const T * const p) {
+            return (ap.ptr() == p);
+        }
+
+        /// Const pointer nequal comparation.
+        friend bool operator != (const AutoPtr<T> & ap, const T * const p) {
+            return (ap.ptr() != p);
+        }
+
+        /// Const pointer equal comparation.
+        friend bool operator == (const T * const p, const AutoPtr<T> & ap) {
+            return (ap.ptr() == p);
+        }
+
+        /// Const pointer nequal comparation.
+        friend bool operator != (const T * const p, const AutoPtr<T> & ap) {
+            return (ap.ptr() != p);
+        }
+
+    private:
+        T * m_ptr;
+    };
+
+
+    /// Smart pointer template class.
+    template <class BaseClass>
+    class SmartPtr {
+    public:
+
+        // BaseClass must implement addRef() and release().
+        typedef SmartPtr<BaseClass> ThisType;
+
+        /// Default ctor.
+        SmartPtr() : m_ptr(NULL) 
+        {
+        }
+
+        /// Other type assignment.
+        template <class OtherBase>
+        SmartPtr( const SmartPtr<OtherBase> & tc )
+        {
+            m_ptr = static_cast<BaseClass *>( tc.ptr() );
+            if (m_ptr) {
+                m_ptr->addRef();
+            }
+        }
+
+        /// Copy ctor.
+        SmartPtr( const ThisType & bc )
+        {
+            m_ptr = bc.ptr();
+            if (m_ptr) {
+                m_ptr->addRef();
+            }
+        }
+
+        /// Copy cast ctor. SmartPtr(NULL) is valid.
+        explicit SmartPtr( BaseClass * bc )
+        {
+            m_ptr = bc;
+            if (m_ptr) {
+                m_ptr->addRef();
+            }
+        }
+
+        /// Dtor.
+        ~SmartPtr()
+        {
+            set(NULL);
+        }
+
+
+        /// -> operator.
+        BaseClass * operator -> () const
+        {
+            nvCheck( m_ptr != NULL );
+            return m_ptr;
+        }
+
+        /// * operator.
+        BaseClass & operator*() const
+        {
+            nvCheck( m_ptr != NULL );
+            return *m_ptr;
+        }
+
+        /// Get pointer.
+        BaseClass * ptr() const
+        {
+            return m_ptr;
+        }
+
+        /// Other type assignment.
+        template <class OtherBase>
+        void operator = ( const SmartPtr<OtherBase> & tc )
+        {
+            set( static_cast<BaseClass *>(tc.ptr()) );
+        }
+
+        /// This type assignment.
+        void operator = ( const ThisType & bc )
+        {
+            set( bc.ptr() );
+        }
+
+        /// Pointer assignment.
+        void operator = ( BaseClass * bc )
+        {
+            set( bc );
+        }
+
+
+        /// Other type equal comparation.
+        template <class OtherBase>
+        bool operator == ( const SmartPtr<OtherBase> & other ) const
+        {
+            return m_ptr == other.ptr();
+        }
+
+        /// This type equal comparation.
+        bool operator == ( const ThisType & bc ) const
+        {
+            return m_ptr == bc.ptr();
+        }
+
+        /// Const pointer equal comparation.
+        bool operator == ( const BaseClass * const bc ) const
+        {
+            return m_ptr == bc;
+        }
+
+        /// Other type not equal comparation.
+        template <class OtherBase>
+        bool operator != ( const SmartPtr<OtherBase> & other ) const
+        {
+            return m_ptr != other.ptr();
+        }
+
+        /// Other type not equal comparation.
+        bool operator != ( const ThisType & bc ) const
+        {
+            return m_ptr != bc.ptr();
+        }
+
+        /// Const pointer not equal comparation.
+        bool operator != (const BaseClass * const bc) const
+        {
+            return m_ptr != bc;
+        }
+
+        /// This type lower than comparation.
+        bool operator < (const ThisType & p) const
+        {
+            return m_ptr < p.ptr();
+        }
+
+        bool isValid() const {
+            return isValidPtr(m_ptr);
+        }
+
+    private:
+
+        // Set this pointer.
+        void set( BaseClass * p )
+        {
+            if (p) p->addRef();
+            if (m_ptr) m_ptr->release();
+            m_ptr = p;
+        }
+
+    private:
+
+        BaseClass * m_ptr;
+
+    };
+
+
+    /// Smart pointer template class.
+    template <class T>
+    class WeakPtr {
+    public:
+
+        WeakPtr() {}
+
+        WeakPtr(T * p)  { operator=(p); }
+        WeakPtr(const SmartPtr<T> & p) { operator=(p.ptr()); }
+
+        // Default constructor and assignment from weak_ptr<T> are OK.
+
+        void operator=(T * p)
+        {
+            if (p) {
+                m_proxy = p->getWeakProxy();
+                nvDebugCheck(m_proxy != NULL);
+                nvDebugCheck(m_proxy->ptr() == p);
+            }
+            else {
+                m_proxy = NULL;
+            }
+        }
+
+        void operator=(const SmartPtr<T> & ptr) { operator=(ptr.ptr()); }
+
+        bool operator==(const SmartPtr<T> & p) const { return ptr() == p.ptr(); }
+        bool operator!=(const SmartPtr<T> & p) const { return ptr() != p.ptr(); }
+
+        bool operator==(const WeakPtr<T> & p) const { return ptr() == p.ptr(); }
+        bool operator!=(const WeakPtr<T> & p) const { return ptr() != p.ptr(); }
+
+        bool operator==(T * p) const { return ptr() == p; }
+        bool operator!=(T * p) const { return ptr() != p; }
+
+        T * operator->() const
+        {
+            T * p = ptr();
+            nvDebugCheck(p != NULL);
+            return p;
+        }
+
+        T * ptr() const
+        {
+            if (m_proxy != NULL) {
+                return static_cast<T *>(m_proxy->ptr());
+            }
+            return NULL;
+        }
+
+    private:
+
+        mutable SmartPtr<WeakProxy> m_proxy;
+
+    };
+
+
+} // nv namespace
+
+#endif // NV_CORE_PTR_H
diff --git a/thirdparty/thekla_atlas/nvcore/RadixSort.cpp b/thirdparty/thekla_atlas/nvcore/RadixSort.cpp
new file mode 100644
index 0000000000..3f44620c99
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/RadixSort.cpp
@@ -0,0 +1,285 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "RadixSort.h"
+
+#include "Utils.h"
+
+#include <string.h> // memset
+
+using namespace nv;
+
+static inline void FloatFlip(uint32 & f) {
+    //uint32 mask = -int32(f >> 31) | 0x80000000; // Michael Herf.
+    int32 mask = (int32(f) >> 31) | 0x80000000; // Warren Hunt, Manchor Ko.
+    f ^= mask;
+}
+
+static inline void IFloatFlip(uint32 & f) {
+    uint32 mask = ((f >> 31) - 1) | 0x80000000; // Michael Herf.
+    //uint32 mask = (int32(f ^ 0x80000000) >> 31) | 0x80000000; // Warren Hunt, Manchor Ko. @@ Correct, but fails in release on gcc-4.2.1
+    f ^= mask;
+}
+
+
+template<typename T> 
+void createHistograms(const T * buffer, uint count, uint * histogram)
+{
+    const uint bucketCount = sizeof(T); // (8 * sizeof(T)) / log2(radix)
+
+    // Init bucket pointers.
+    uint * h[bucketCount];
+    for (uint i = 0; i < bucketCount; i++) {
+#if NV_BIG_ENDIAN
+        h[sizeof(T)-1-i] = histogram + 256 * i;
+#else
+        h[i] = histogram + 256 * i;
+#endif
+    }
+
+    // Clear histograms.
+    memset(histogram, 0, 256 * bucketCount * sizeof(uint));
+
+    // @@ Add support for signed integers.
+
+    // Build histograms.
+    const uint8 * p = (const uint8 *)buffer; // @@ Does this break aliasing rules?
+    const uint8 * pe = p + count * sizeof(T);
+
+    while (p != pe) {
+        h[0][*p++]++, h[1][*p++]++, h[2][*p++]++, h[3][*p++]++;
+        if (bucketCount == 8) h[4][*p++]++, h[5][*p++]++, h[6][*p++]++, h[7][*p++]++;
+    }
+}
+
+/*
+template <>
+void createHistograms<float>(const float * buffer, uint count, uint * histogram)
+{
+    // Init bucket pointers.
+    uint32 * h[4];
+    for (uint i = 0; i < 4; i++) {
+#if NV_BIG_ENDIAN
+        h[3-i] = histogram + 256 * i;
+#else
+        h[i] = histogram + 256 * i;
+#endif
+    }
+
+    // Clear histograms.
+    memset(histogram, 0, 256 * 4 * sizeof(uint32));
+
+    // Build histograms.
+    for (uint i = 0; i < count; i++) {
+        uint32 fi = FloatFlip(buffer[i]);
+
+        h[0][fi & 0xFF]++;
+        h[1][(fi >> 8) & 0xFF]++;
+        h[2][(fi >> 16) & 0xFF]++;
+        h[3][fi >> 24]++;
+    }
+}
+*/
+
+RadixSort::RadixSort() : m_size(0), m_ranks(NULL), m_ranks2(NULL), m_validRanks(false)
+{
+}
+
+RadixSort::RadixSort(uint reserve_count) : m_size(0), m_ranks(NULL), m_ranks2(NULL), m_validRanks(false)
+{
+    checkResize(reserve_count);
+}
+
+RadixSort::~RadixSort()
+{
+    // Release everything
+    free(m_ranks2);
+    free(m_ranks);
+}
+
+
+void RadixSort::resize(uint count)
+{
+    m_ranks2 = realloc<uint>(m_ranks2, count);
+    m_ranks = realloc<uint>(m_ranks, count);
+}
+
+inline void RadixSort::checkResize(uint count)
+{
+    if (count != m_size)
+    {
+        if (count > m_size) resize(count);
+        m_size = count;
+        m_validRanks = false;
+    }
+}
+
+template <typename T> inline void RadixSort::insertionSort(const T * input, uint count)
+{
+    if (!m_validRanks) {
+        /*for (uint i = 0; i < count; i++) {
+            m_ranks[i] = i;
+        }*/
+
+        m_ranks[0] = 0;
+        for (uint i = 1; i != count; ++i)
+        {
+            int rank = m_ranks[i] = i;
+
+            uint j = i;
+            while (j != 0 && input[rank] < input[m_ranks[j-1]])
+            {
+                m_ranks[j] = m_ranks[j-1];
+                --j;
+            }
+            if (i != j)
+            {
+                m_ranks[j] = rank;
+            }
+        }
+
+        m_validRanks = true;
+    }
+    else {
+        for (uint i = 1; i != count; ++i)
+        {
+            int rank = m_ranks[i];
+
+            uint j = i;
+            while (j != 0 && input[rank] < input[m_ranks[j-1]])
+            {
+                m_ranks[j] = m_ranks[j-1];
+                --j;
+            }
+            if (i != j)
+            {
+                m_ranks[j] = rank;
+            }
+        }
+    }
+}
+
+template <typename T> inline void RadixSort::radixSort(const T * input, uint count)
+{
+    const uint P = sizeof(T); // pass count
+
+    // Allocate histograms & offsets on the stack
+    uint histogram[256 * P];
+    uint * link[256];
+
+    createHistograms(input, count, histogram);
+
+    // Radix sort, j is the pass number (0=LSB, P=MSB)
+    for (uint j = 0; j < P; j++)
+    {
+        // Pointer to this bucket.
+        const uint * h = &histogram[j * 256];
+
+        const uint8 * inputBytes = (const uint8*)input; // @@ Is this aliasing legal?
+
+#if NV_BIG_ENDIAN
+        inputBytes += P - 1 - j;
+#else
+        inputBytes += j;
+#endif
+
+        if (h[inputBytes[0]] == count) {
+            // Skip this pass, all values are the same.
+            continue;
+        }
+
+        // Create offsets
+        link[0] = m_ranks2;
+        for (uint i = 1; i < 256; i++) link[i] = link[i-1] + h[i-1];
+
+        // Perform Radix Sort
+        if (!m_validRanks)
+        {
+            for (uint i = 0; i < count; i++)
+            {
+                *link[inputBytes[i*P]]++ = i;
+            }
+            m_validRanks = true;
+        }
+        else
+        {
+            for (uint i = 0; i < count; i++)
+            {
+                const uint idx = m_ranks[i];
+                *link[inputBytes[idx*P]]++ = idx;
+            }
+        }
+
+        // Swap pointers for next pass. Valid indices - the most recent ones - are in m_ranks after the swap.
+        swap(m_ranks, m_ranks2);
+    }
+
+    // All values were equal, generate linear ranks.
+    if (!m_validRanks)
+    {
+        for (uint i = 0; i < count; i++)
+        {
+            m_ranks[i] = i;
+        }
+        m_validRanks = true;
+    }
+}
+
+
+RadixSort & RadixSort::sort(const uint32 * input, uint count)
+{
+    if (input == NULL || count == 0) return *this;
+
+    // Resize lists if needed
+    checkResize(count);
+
+    if (count < 32) {
+        insertionSort(input, count);
+    }
+    else {
+        radixSort<uint32>(input, count);
+    }
+    return *this;
+}
+
+
+RadixSort & RadixSort::sort(const uint64 * input, uint count)
+{
+    if (input == NULL || count == 0) return *this;
+
+    // Resize lists if needed
+    checkResize(count);
+
+    if (count < 64) {
+        insertionSort(input, count);
+    }
+    else {
+        radixSort(input, count);
+    }
+    return *this;
+}
+
+RadixSort& RadixSort::sort(const float * input, uint count)
+{
+    if (input == NULL || count == 0) return *this;
+
+    // Resize lists if needed
+    checkResize(count);
+
+    if (count < 32) {
+        insertionSort(input, count);
+    }
+    else {
+        // @@ Avoid touching the input multiple times.
+        for (uint i = 0; i < count; i++) {
+            FloatFlip((uint32 &)input[i]);
+        }
+
+        radixSort<uint32>((const uint32 *)input, count);
+
+        for (uint i = 0; i < count; i++) {
+            IFloatFlip((uint32 &)input[i]);
+        }
+    }
+
+    return *this;
+}
diff --git a/thirdparty/thekla_atlas/nvcore/RadixSort.h b/thirdparty/thekla_atlas/nvcore/RadixSort.h
new file mode 100644
index 0000000000..82325ebb24
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/RadixSort.h
@@ -0,0 +1,75 @@
+#pragma once
+#ifndef NV_CORE_RADIXSORT_H
+#define NV_CORE_RADIXSORT_H
+
+// Based on Pierre Terdiman's and Michael Herf's source code.
+// http://www.codercorner.com/RadixSortRevisited.htm
+// http://www.stereopsis.com/radix.html
+
+#include "nvcore.h"
+#include "Array.h"
+
+namespace nv
+{
+
+    class NVCORE_CLASS RadixSort
+    {
+        NV_FORBID_COPY(RadixSort);
+    public:
+        // Constructor/Destructor
+        RadixSort();
+        RadixSort(uint reserve_count);
+        ~RadixSort();
+
+        // Invalidate ranks.
+        RadixSort & reset() { m_validRanks = false; return *this; }
+
+        // Sorting methods.
+        RadixSort & sort(const uint32 * input, uint count);
+        RadixSort & sort(const uint64 * input, uint count);
+        RadixSort & sort(const float * input, uint count);
+
+        // Helpers.
+        RadixSort & sort(const Array<uint32> & input);
+        RadixSort & sort(const Array<uint64> & input);
+        RadixSort & sort(const Array<float> & input);
+
+        // Access to results. m_ranks is a list of indices in sorted order, i.e. in the order you may further process your data
+        inline const uint * ranks() const { nvDebugCheck(m_validRanks); return m_ranks; }
+        inline uint * ranks() { nvDebugCheck(m_validRanks); return m_ranks; }
+        inline uint rank(uint i) const { nvDebugCheck(m_validRanks); return m_ranks[i]; }
+
+        // query whether the sort has been performed
+        inline bool valid() const { return m_validRanks; }
+
+    private:
+        uint m_size;
+        uint * m_ranks;
+        uint * m_ranks2;
+        bool m_validRanks;
+
+        // Internal methods
+        template <typename T> void insertionSort(const T * input, uint count);
+        template <typename T> void radixSort(const T * input, uint count);
+
+        void checkResize(uint nb);
+        void resize(uint nb);
+    };
+
+    inline RadixSort & RadixSort::sort(const Array<uint32> & input) {
+        return sort(input.buffer(), input.count());
+    }
+
+    inline RadixSort & RadixSort::sort(const Array<uint64> & input) {
+        return sort(input.buffer(), input.count());
+    }
+
+    inline RadixSort & RadixSort::sort(const Array<float> & input) {
+        return sort(input.buffer(), input.count());
+    }
+
+} // nv namespace
+
+
+
+#endif // NV_CORE_RADIXSORT_H
diff --git a/thirdparty/thekla_atlas/nvcore/RefCounted.h b/thirdparty/thekla_atlas/nvcore/RefCounted.h
new file mode 100644
index 0000000000..b8d68edee3
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/RefCounted.h
@@ -0,0 +1,149 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#ifndef NV_CORE_REFCOUNTED_H
+#define NV_CORE_REFCOUNTED_H
+
+#include "nvcore.h"
+#include "Debug.h"
+
+#define NV_DECLARE_PTR(Class) \
+    template <class T> class SmartPtr; \
+    typedef SmartPtr<class Class> Class ## Ptr; \
+    typedef SmartPtr<const class Class> Class ## ConstPtr
+
+
+namespace nv
+{
+    /// Weak proxy.
+    class WeakProxy
+    {
+        NV_FORBID_COPY(WeakProxy);
+    public:
+	    /// Ctor.
+	    WeakProxy(void * ptr) : m_count(0), m_ptr(ptr) { }
+
+        /// Dtor.
+        ~WeakProxy()
+        {
+            nvCheck( m_count == 0 );
+        }
+
+        /// Increase reference count.
+        uint addRef() const
+        {
+            m_count++;
+            return m_count;
+        }
+
+        /// Decrease reference count and remove when 0.
+        uint release() const
+        {
+            nvCheck( m_count > 0 );
+
+            m_count--;
+            if( m_count == 0 ) {
+                delete this;
+                return 0;
+            }
+            return m_count;
+        }
+
+	    /// WeakPtr's call this to determine if their pointer is valid or not.
+	    bool isAlive() const {
+		    return m_ptr != NULL;
+	    }
+
+	    /// Only the actual object should call this.
+	    void notifyObjectDied() {
+		    m_ptr = NULL;
+	    }
+
+        /// Return proxy pointer.
+        void * ptr() const {
+            return m_ptr;
+        }
+
+    private:
+        mutable int m_count;
+	    void * m_ptr;
+    };
+
+
+    /// Reference counted base class to be used with SmartPtr and WeakPtr.
+    class RefCounted
+    {
+        NV_FORBID_COPY(RefCounted);
+    public:
+
+        /// Ctor.
+        RefCounted() : m_count(0), m_weak_proxy(NULL)
+        {
+        }
+
+        /// Virtual dtor.
+        virtual ~RefCounted()
+        {
+            nvCheck( m_count == 0 );
+            releaseWeakProxy();
+        }
+
+
+        /// Increase reference count.
+        uint addRef() const
+        {
+            m_count++;
+            return m_count;
+        }
+
+
+        /// Decrease reference count and remove when 0.
+        uint release() const
+        {
+            nvCheck( m_count > 0 );
+
+            m_count--;
+            if( m_count == 0 ) {
+                delete this;
+                return 0;
+            }
+            return m_count;
+        }
+
+        /// Get weak proxy.
+        WeakProxy * getWeakProxy() const
+        {
+            if (m_weak_proxy == NULL) {
+                m_weak_proxy = new WeakProxy((void *)this);
+                m_weak_proxy->addRef();
+            }
+            return m_weak_proxy;
+        }
+
+        /// Release the weak proxy.	
+        void releaseWeakProxy() const
+        {
+            if (m_weak_proxy != NULL) {
+                m_weak_proxy->notifyObjectDied();
+                m_weak_proxy->release();
+                m_weak_proxy = NULL;
+            }
+        }
+
+        /// Get reference count.
+        int refCount() const
+        {
+            return m_count;
+        }
+
+
+    private:
+
+        mutable int m_count;
+        mutable WeakProxy * m_weak_proxy;
+
+    };
+
+} // nv namespace
+
+
+#endif // NV_CORE_REFCOUNTED_H
diff --git a/thirdparty/thekla_atlas/nvcore/StdStream.h b/thirdparty/thekla_atlas/nvcore/StdStream.h
new file mode 100644
index 0000000000..f65d6dab59
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/StdStream.h
@@ -0,0 +1,474 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+//#pragma once
+//#ifndef NV_CORE_STDSTREAM_H
+//#define NV_CORE_STDSTREAM_H
+
+#include "nvcore.h"
+#include "Stream.h"
+#include "Array.h"
+
+#include <stdio.h> // fopen
+#include <string.h> // memcpy
+
+namespace nv
+{
+
+    // Portable version of fopen.
+    inline FILE * fileOpen(const char * fileName, const char * mode)
+    {
+        nvCheck(fileName != NULL);
+#if NV_CC_MSVC && _MSC_VER >= 1400
+        FILE * fp;
+        if (fopen_s(&fp, fileName, mode) == 0) {
+            return fp;
+        }
+        return NULL;
+#else
+        return fopen(fileName, mode);
+#endif
+    }
+
+
+    /// Base stdio stream.
+    class NVCORE_CLASS StdStream : public Stream
+    {
+        NV_FORBID_COPY(StdStream);
+    public:
+
+        /// Ctor.
+        StdStream( FILE * fp, bool autoclose ) : m_fp(fp), m_autoclose(autoclose) { }
+
+        /// Dtor. 
+        virtual ~StdStream()
+        {
+            if( m_fp != NULL && m_autoclose ) {
+#if NV_OS_WIN32
+                _fclose_nolock( m_fp );
+#else
+                fclose( m_fp );
+#endif
+            }
+        }
+
+
+        /** @name Stream implementation. */
+        //@{
+        virtual void seek( uint pos )
+        {
+            nvDebugCheck(m_fp != NULL);
+            nvDebugCheck(pos <= size());
+#if NV_OS_WIN32
+            _fseek_nolock(m_fp, pos, SEEK_SET);
+#else
+            fseek(m_fp, pos, SEEK_SET);
+#endif
+        }
+
+        virtual uint tell() const
+        {
+            nvDebugCheck(m_fp != NULL);
+#if NV_OS_WIN32
+            return _ftell_nolock(m_fp);
+#else
+            return (uint)ftell(m_fp);
+#endif
+        }
+
+        virtual uint size() const
+        {
+            nvDebugCheck(m_fp != NULL);
+#if NV_OS_WIN32
+            uint pos = _ftell_nolock(m_fp);
+            _fseek_nolock(m_fp, 0, SEEK_END);
+            uint end = _ftell_nolock(m_fp);
+            _fseek_nolock(m_fp, pos, SEEK_SET);
+#else
+            uint pos = (uint)ftell(m_fp);
+            fseek(m_fp, 0, SEEK_END);
+            uint end = (uint)ftell(m_fp);
+            fseek(m_fp, pos, SEEK_SET);
+#endif
+            return end;
+        }
+
+        virtual bool isError() const
+        {
+            return m_fp == NULL || ferror( m_fp ) != 0;
+        }
+
+        virtual void clearError()
+        {
+            nvDebugCheck(m_fp != NULL);
+            clearerr(m_fp);
+        }
+
+        // @@ The original implementation uses feof, which only returns true when we attempt to read *past* the end of the stream. 
+        // That is, if we read the last byte of a file, then isAtEnd would still return false, even though the stream pointer is at the file end. This is not the intent and was inconsistent with the implementation of the MemoryStream, a better 
+        // implementation uses use ftell and fseek to determine our location within the file.
+        virtual bool isAtEnd() const
+        {
+            if (m_fp == NULL) return true;
+            //nvDebugCheck(m_fp != NULL);
+            //return feof( m_fp ) != 0;
+#if NV_OS_WIN32
+            uint pos = _ftell_nolock(m_fp);
+            _fseek_nolock(m_fp, 0, SEEK_END);
+            uint end = _ftell_nolock(m_fp);
+            _fseek_nolock(m_fp, pos, SEEK_SET);
+#else
+            uint pos = (uint)ftell(m_fp);
+            fseek(m_fp, 0, SEEK_END);
+            uint end = (uint)ftell(m_fp);
+            fseek(m_fp, pos, SEEK_SET);
+#endif
+            return pos == end;
+        }
+
+        /// Always true.
+        virtual bool isSeekable() const { return true; }
+        //@}
+
+    protected:
+
+        FILE * m_fp;
+        bool m_autoclose;
+
+    };
+
+
+    /// Standard output stream.
+    class NVCORE_CLASS StdOutputStream : public StdStream
+    {
+        NV_FORBID_COPY(StdOutputStream);
+    public:
+
+        /// Construct stream by file name.
+        StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb"), /*autoclose=*/true) { }
+
+        /// Construct stream by file handle.
+        StdOutputStream( FILE * fp, bool autoclose ) : StdStream(fp, autoclose)
+        {
+        }
+
+        /** @name Stream implementation. */
+        //@{
+        /// Write data.
+        virtual uint serialize( void * data, uint len )
+        {
+            nvDebugCheck(data != NULL);
+            nvDebugCheck(m_fp != NULL);
+#if NV_OS_WIN32
+            return (uint)_fwrite_nolock(data, 1, len, m_fp);
+#elif NV_OS_LINUX
+            return (uint)fwrite_unlocked(data, 1, len, m_fp);
+#elif NV_OS_DARWIN
+            // @@ No error checking, always returns len.
+            for (uint i = 0; i < len; i++) {
+                putc_unlocked(((char *)data)[i], m_fp);
+            }
+            return len;
+#else
+            return (uint)fwrite(data, 1, len, m_fp);
+#endif
+        }
+
+        virtual bool isLoading() const
+        {
+            return false;
+        }
+
+        virtual bool isSaving() const
+        {
+            return true;
+        }
+        //@}
+
+    };
+
+
+    /// Standard input stream.
+    class NVCORE_CLASS StdInputStream : public StdStream
+    {
+        NV_FORBID_COPY(StdInputStream);
+    public:
+
+        /// Construct stream by file name.
+        StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb"), /*autoclose=*/true) { }
+
+        /// Construct stream by file handle.
+        StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
+        {
+        }
+
+        /** @name Stream implementation. */
+        //@{
+        /// Read data.
+        virtual uint serialize( void * data, uint len )
+        {
+            nvDebugCheck(data != NULL);
+            nvDebugCheck(m_fp != NULL);
+#if NV_OS_WIN32
+            return (uint)_fread_nolock(data, 1, len, m_fp);
+#elif NV_OS_LINUX
+            return (uint)fread_unlocked(data, 1, len, m_fp);
+#elif NV_OS_DARWIN
+            // This is rather lame. Not sure if it's faster than the locked version.
+            for (uint i = 0; i < len; i++) {
+                ((char *)data)[i] = getc_unlocked(m_fp);
+                if (feof_unlocked(m_fp) != 0) {
+                    return i;
+                }
+            }
+            return len;
+#else
+            return (uint)fread(data, 1, len, m_fp);
+#endif
+            
+        }
+
+        virtual bool isLoading() const
+        {
+            return true;
+        }
+
+        virtual bool isSaving() const
+        {
+            return false;
+        }
+        //@}
+    };
+
+
+
+    /// Memory input stream.
+    class NVCORE_CLASS MemoryInputStream : public Stream
+    {
+        NV_FORBID_COPY(MemoryInputStream);
+    public:
+
+        /// Ctor.
+        MemoryInputStream( const uint8 * mem, uint size ) : m_mem(mem), m_ptr(mem), m_size(size) { }
+
+        /** @name Stream implementation. */
+        //@{
+        /// Read data.
+        virtual uint serialize( void * data, uint len )
+        {
+            nvDebugCheck(data != NULL);
+            nvDebugCheck(!isError());
+
+            uint left = m_size - tell();
+            if (len > left) len = left;
+
+            memcpy( data, m_ptr, len );
+            m_ptr += len;
+
+            return len;
+        }
+
+        virtual void seek( uint pos )
+        {
+            nvDebugCheck(!isError());
+            m_ptr = m_mem + pos;
+            nvDebugCheck(!isError());
+        }
+
+        virtual uint tell() const
+        {
+            nvDebugCheck(m_ptr >= m_mem);
+            return uint(m_ptr - m_mem);
+        }
+
+        virtual uint size() const
+        {
+            return m_size;
+        }
+
+        virtual bool isError() const
+        {
+            return m_mem == NULL || m_ptr > m_mem + m_size || m_ptr < m_mem;
+        }
+
+        virtual void clearError()
+        {
+            // Nothing to do.
+        }
+
+        virtual bool isAtEnd() const
+        {
+            return m_ptr == m_mem + m_size;
+        }
+
+        /// Always true.
+        virtual bool isSeekable() const
+        {
+            return true;
+        }
+
+        virtual bool isLoading() const
+        {
+            return true;
+        }
+
+        virtual bool isSaving() const
+        {
+            return false;
+        }
+        //@}
+
+        const uint8 * ptr() const { return m_ptr; }
+
+
+    private:
+
+        const uint8 * m_mem;
+        const uint8 * m_ptr;
+        uint m_size;
+
+    };
+
+
+    /// Buffer output stream.
+    class NVCORE_CLASS BufferOutputStream : public Stream
+    {
+        NV_FORBID_COPY(BufferOutputStream);
+    public:
+
+        BufferOutputStream(Array<uint8> & buffer) : m_buffer(buffer) { }
+
+        virtual uint serialize( void * data, uint len )
+        {
+            nvDebugCheck(data != NULL);
+            m_buffer.append((uint8 *)data, len);
+            return len;
+        }
+
+        virtual void seek( uint /*pos*/ ) { /*Not implemented*/ }
+        virtual uint tell() const { return m_buffer.size(); }
+        virtual uint size() const { return m_buffer.size(); }
+
+        virtual bool isError() const { return false; }
+        virtual void clearError() {}
+
+        virtual bool isAtEnd() const { return true; }
+        virtual bool isSeekable() const { return false; }
+        virtual bool isLoading() const { return false; }
+        virtual bool isSaving() const { return true; }
+
+    private:
+        Array<uint8> & m_buffer;
+    };
+
+
+    /// Protected input stream.
+    class NVCORE_CLASS ProtectedStream : public Stream
+    {
+        NV_FORBID_COPY(ProtectedStream);
+    public:
+
+        /// Ctor.
+        ProtectedStream( Stream & s ) : m_s(&s), m_autodelete(false)
+        { 
+        }
+
+        /// Ctor.
+        ProtectedStream( Stream * s, bool autodelete = true ) : 
+        m_s(s), m_autodelete(autodelete) 
+        {
+            nvDebugCheck(m_s != NULL);
+        }
+
+        /// Dtor.
+        virtual ~ProtectedStream()
+        {
+            if( m_autodelete ) {
+                delete m_s;
+            }
+        }
+
+        /** @name Stream implementation. */
+        //@{
+        /// Read data.
+        virtual uint serialize( void * data, uint len )
+        {
+            nvDebugCheck(data != NULL);
+            len = m_s->serialize( data, len );
+
+            if( m_s->isError() ) {
+#if NV_OS_ORBIS
+                //SBtodoORBIS disabled (no exceptions)
+#else
+                throw;
+#endif
+            }
+
+            return len;
+        }
+
+        virtual void seek( uint pos )
+        {
+            m_s->seek( pos );
+
+            if( m_s->isError() ) {
+#if NV_OS_ORBIS
+                //SBtodoORBIS disabled (no exceptions)
+#else
+                throw;
+#endif
+            }
+        }
+
+        virtual uint tell() const
+        {
+            return m_s->tell();
+        }
+
+        virtual uint size() const
+        {
+            return m_s->size();
+        }
+
+        virtual bool isError() const
+        {
+            return m_s->isError();
+        }
+
+        virtual void clearError()
+        {
+            m_s->clearError();
+        }
+
+        virtual bool isAtEnd() const
+        {
+            return m_s->isAtEnd();
+        }
+
+        virtual bool isSeekable() const
+        {
+            return m_s->isSeekable();
+        }
+
+        virtual bool isLoading() const
+        {
+            return m_s->isLoading();
+        }
+
+        virtual bool isSaving() const
+        {
+            return m_s->isSaving();
+        }
+        //@}
+
+
+    private:
+
+        Stream * const m_s;
+        bool const m_autodelete;
+
+    };
+
+} // nv namespace
+
+
+//#endif // NV_CORE_STDSTREAM_H
diff --git a/thirdparty/thekla_atlas/nvcore/StrLib.cpp b/thirdparty/thekla_atlas/nvcore/StrLib.cpp
new file mode 100644
index 0000000000..7ec6c70136
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/StrLib.cpp
@@ -0,0 +1,796 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "StrLib.h"
+
+#include "Memory.h"
+#include "Utils.h" // swap
+
+#include <math.h>   // log
+#include <stdio.h>  // vsnprintf
+#include <string.h> // strlen, strcmp, etc.
+
+#if NV_CC_MSVC
+#include <stdarg.h> // vsnprintf
+#endif
+
+using namespace nv;
+
+namespace 
+{
+    static char * strAlloc(uint size)
+    {
+        return malloc<char>(size);
+    }
+
+    static char * strReAlloc(char * str, uint size)
+    {
+        return realloc<char>(str, size);
+    }
+
+    static void strFree(const char * str)
+    {
+        return free<char>(str);
+    }
+
+    /*static char * strDup( const char * str )
+    {
+        nvDebugCheck( str != NULL );
+        uint len = uint(strlen( str ) + 1);
+        char * dup = strAlloc( len );
+        memcpy( dup, str, len );
+        return dup;
+    }*/
+
+    // helper function for integer to string conversion.
+    static char * i2a( uint i, char *a, uint r )
+    {
+        if( i / r > 0 ) {
+            a = i2a( i / r, a, r );
+        }
+        *a = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % r];
+        return a + 1;
+    }
+
+    // Locale independent functions.
+    static inline char toUpper( char c ) {
+        return (c<'a' || c>'z') ? (c) : (c+'A'-'a');
+    }
+    static inline char toLower( char c ) {
+        return (c<'A' || c>'Z') ? (c) : (c+'a'-'A');
+    }
+    static inline bool isAlpha( char c ) {
+        return (c>='a' && c<='z') || (c>='A' && c<='Z');
+    }
+    static inline bool isDigit( char c ) {
+        return c>='0' && c<='9';
+    }
+    static inline bool isAlnum( char c ) {
+        return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9');
+    }
+
+}
+
+uint nv::strLen(const char * str)
+{
+    nvDebugCheck(str != NULL);
+    return U32(strlen(str));
+}
+
+int nv::strDiff(const char * s1, const char * s2)
+{
+    nvDebugCheck(s1 != NULL);
+    nvDebugCheck(s2 != NULL);
+    return strcmp(s1, s2);
+}
+
+int nv::strCaseDiff(const char * s1, const char * s2)
+{
+    nvDebugCheck(s1 != NULL);
+    nvDebugCheck(s1 != NULL);
+#if NV_CC_MSVC
+    return _stricmp(s1, s2);
+#else
+    return strcasecmp(s1, s2);
+#endif
+}
+
+bool nv::strEqual(const char * s1, const char * s2)
+{
+    if (s1 == s2) return true;
+    if (s1 == NULL || s2 == NULL) return false;
+    return strcmp(s1, s2) == 0;
+}
+
+bool nv::strCaseEqual(const char * s1, const char * s2)
+{
+    if (s1 == s2) return true;
+    if (s1 == NULL || s2 == NULL) return false;
+    return strCaseDiff(s1, s2) == 0;
+}
+
+bool nv::strBeginsWith(const char * str, const char * prefix)
+{
+    //return strstr(str, prefix) == dst;
+    return strncmp(str, prefix, strlen(prefix)) == 0;
+}
+
+bool nv::strEndsWith(const char * str, const char * suffix)
+{
+    uint ml = strLen(str);
+    uint sl = strLen(suffix);
+    if (ml < sl) return false;
+    return strncmp(str + ml - sl, suffix, sl) == 0;
+}
+
+// @@ Add asserts to detect overlap between dst and src?
+void nv::strCpy(char * dst, uint size, const char * src)
+{
+    nvDebugCheck(dst != NULL);
+    nvDebugCheck(src != NULL);
+#if NV_CC_MSVC && _MSC_VER >= 1400
+    strcpy_s(dst, size, src);
+#else
+    NV_UNUSED(size);
+    strcpy(dst, src);
+#endif
+}
+
+void nv::strCpy(char * dst, uint size, const char * src, uint len)
+{
+    nvDebugCheck(dst != NULL);
+    nvDebugCheck(src != NULL);
+#if NV_CC_MSVC && _MSC_VER >= 1400
+    strncpy_s(dst, size, src, len);
+#else
+    int n = min(len+1, size);
+    strncpy(dst, src, n);
+    dst[n-1] = '\0';
+#endif
+}
+
+void nv::strCat(char * dst, uint size, const char * src)
+{
+    nvDebugCheck(dst != NULL);
+    nvDebugCheck(src != NULL);
+#if NV_CC_MSVC && _MSC_VER >= 1400
+    strcat_s(dst, size, src);
+#else
+    NV_UNUSED(size);
+    strcat(dst, src);
+#endif
+}
+
+NVCORE_API const char * nv::strSkipWhiteSpace(const char * str)
+{
+    nvDebugCheck(str != NULL);
+    while (*str == ' ') str++;
+    return str;
+}
+
+NVCORE_API char * nv::strSkipWhiteSpace(char * str)
+{
+    nvDebugCheck(str != NULL);
+    while (*str == ' ') str++;
+    return str;
+}
+
+
+/** Pattern matching routine. I don't remember where did I get this. */
+bool nv::strMatch(const char * str, const char * pat)
+{
+    nvDebugCheck(str != NULL);
+    nvDebugCheck(pat != NULL);
+
+    char c2;
+
+    while (true) {
+        if (*pat==0) {
+            if (*str==0) return true;
+            else         return false;
+        }
+        if ((*str==0) && (*pat!='*')) return false;
+        if (*pat=='*') {
+            pat++;
+            if (*pat==0) return true;
+            while (true) {
+                if (strMatch(str, pat)) return true;
+                if (*str==0) return false;
+                str++;
+            }
+        }
+        if (*pat=='?') goto match;
+        if (*pat=='[') {
+            pat++;
+            while (true) {
+                if ((*pat==']') || (*pat==0)) return false;
+                if (*pat==*str) break;
+                if (pat[1] == '-') {
+                    c2 = pat[2];
+                    if (c2==0) return false;
+                    if ((*pat<=*str) && (c2>=*str)) break;
+                    if ((*pat>=*str) && (c2<=*str)) break;
+                    pat+=2;
+                }
+                pat++;
+            }
+            while (*pat!=']') {
+                if (*pat==0) {
+                    pat--;
+                    break;
+                }
+                pat++;
+            }
+            goto match;
+        }
+
+        if (*pat == NV_PATH_SEPARATOR) {
+            pat++;
+            if (*pat==0) return false;
+        }
+        if (*pat!=*str) return false;
+
+match:
+        pat++;
+        str++;
+    }
+}
+
+bool nv::isNumber(const char * str) {
+    while(*str != '\0') {
+        if (!isDigit(*str)) return false;
+        str++;
+    }
+    return true;
+}
+
+
+/** Empty string. */
+StringBuilder::StringBuilder() : m_size(0), m_str(NULL)
+{
+}
+
+/** Preallocate space. */
+StringBuilder::StringBuilder( uint size_hint ) : m_size(size_hint)
+{
+    nvDebugCheck(m_size > 0);
+    m_str = strAlloc(m_size);
+    *m_str = '\0';
+}
+
+/** Copy ctor. */
+StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL)
+{
+    copy(s);
+}
+
+/** Copy string. */
+StringBuilder::StringBuilder(const char * s) : m_size(0), m_str(NULL)
+{
+    if (s != NULL) {
+        copy(s);
+    }
+}
+
+/** Copy string. */
+StringBuilder::StringBuilder(const char * s, uint len) : m_size(0), m_str(NULL)
+{
+    copy(s, len);
+}
+
+/** Delete the string. */
+StringBuilder::~StringBuilder()
+{
+    strFree(m_str);
+}
+
+
+/** Format a string safely. */
+StringBuilder & StringBuilder::format( const char * fmt, ... )
+{
+    nvDebugCheck(fmt != NULL);
+    va_list arg;
+    va_start( arg, fmt );
+
+    formatList( fmt, arg );
+
+    va_end( arg );
+
+    return *this;
+}
+
+
+/** Format a string safely. */
+StringBuilder & StringBuilder::formatList( const char * fmt, va_list arg )
+{
+    nvDebugCheck(fmt != NULL);
+
+    if (m_size == 0) {
+        m_size = 64;
+        m_str = strAlloc( m_size );
+    }
+
+    va_list tmp;
+    va_copy(tmp, arg);
+#if NV_CC_MSVC && _MSC_VER >= 1400
+    int n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp);
+#else
+    int n = vsnprintf(m_str, m_size, fmt, tmp);
+#endif
+    va_end(tmp);
+
+    while( n < 0 || n >= int(m_size) ) {
+        if( n > -1 ) {
+            m_size = n + 1;
+        }
+        else {
+            m_size *= 2;
+        }
+
+        m_str = strReAlloc(m_str, m_size);
+
+        va_copy(tmp, arg);
+#if NV_CC_MSVC && _MSC_VER >= 1400
+        n = vsnprintf_s(m_str, m_size, _TRUNCATE, fmt, tmp);
+#else
+        n = vsnprintf(m_str, m_size, fmt, tmp);
+#endif
+        va_end(tmp);
+    }
+
+    nvDebugCheck(n < int(m_size));
+
+    // Make sure it's null terminated.
+    nvDebugCheck(m_str[n] == '\0');
+    //str[n] = '\0';
+
+    return *this;
+}
+
+
+// Append a character.
+StringBuilder & StringBuilder::append( char c )
+{
+    return append(&c, 1);
+}
+
+// Append a string.
+StringBuilder & StringBuilder::append( const char * s )
+{
+    return append(s, U32(strlen( s )));
+}
+
+// Append a string.
+StringBuilder & StringBuilder::append(const char * s, uint len)
+{
+    nvDebugCheck(s != NULL);
+
+    uint offset = length();
+    const uint size = offset + len + 1;
+    reserve(size);
+    strCpy(m_str + offset, len + 1, s, len);
+
+    return *this;
+}
+
+StringBuilder & StringBuilder::append(const StringBuilder & str)
+{
+    return append(str.m_str, str.length());
+}
+
+
+/** Append a formatted string. */
+StringBuilder & StringBuilder::appendFormat( const char * fmt, ... )
+{
+    nvDebugCheck( fmt != NULL );
+
+    va_list arg;
+    va_start( arg, fmt );
+
+    appendFormatList( fmt, arg );
+
+    va_end( arg );
+
+    return *this;
+}
+
+
+/** Append a formatted string. */
+StringBuilder & StringBuilder::appendFormatList( const char * fmt, va_list arg )
+{
+    nvDebugCheck( fmt != NULL );
+
+    va_list tmp;
+    va_copy(tmp, arg);
+
+    if (m_size == 0) {
+        formatList(fmt, arg);
+    }
+    else {
+        StringBuilder tmp_str;
+        tmp_str.formatList( fmt, tmp );
+        append( tmp_str.str() );
+    }
+
+    va_end(tmp);
+
+    return *this;
+}
+
+// Append n spaces.
+StringBuilder & StringBuilder::appendSpace(uint n)
+{
+    if (m_str == NULL) {
+        m_size = n + 1;
+        m_str = strAlloc(m_size);
+        memset(m_str, ' ', m_size);
+        m_str[n] = '\0';
+    }
+    else {
+        const uint len = strLen(m_str);
+        if (m_size < len + n + 1) {
+            m_size = len + n + 1;
+            m_str = strReAlloc(m_str, m_size);
+        }
+        memset(m_str + len, ' ', n);
+        m_str[len+n] = '\0';
+    }
+
+    return *this;
+}
+
+
+/** Convert number to string in the given base. */
+StringBuilder & StringBuilder::number( int i, int base )
+{
+    nvCheck( base >= 2 );
+    nvCheck( base <= 36 );
+
+    // @@ This needs to be done correctly.
+    // length = floor(log(i, base));
+    uint len = uint(log(float(i)) / log(float(base)) + 2); // one more if negative
+    reserve(len);
+
+    if( i < 0 ) {
+        *m_str = '-';
+        *i2a(uint(-i), m_str+1, base) = 0;
+    }
+    else {
+        *i2a(i, m_str, base) = 0;
+    }
+
+    return *this;
+}
+
+
+/** Convert number to string in the given base. */
+StringBuilder & StringBuilder::number( uint i, int base )
+{
+    nvCheck( base >= 2 );
+    nvCheck( base <= 36 );
+
+    // @@ This needs to be done correctly.
+    // length = floor(log(i, base));
+    uint len = uint(log(float(i)) / log(float(base)) - 0.5f + 1);
+    reserve(len);
+
+    *i2a(i, m_str, base) = 0;
+
+    return *this;
+}
+
+
+/** Resize the string preserving the contents. */
+StringBuilder & StringBuilder::reserve( uint size_hint )
+{
+    nvCheck(size_hint != 0);
+    if (size_hint > m_size) {
+        m_str = strReAlloc(m_str, size_hint);
+        m_size = size_hint;
+    }
+    return *this;
+}
+
+
+/** Copy a string safely. */
+StringBuilder & StringBuilder::copy(const char * s)
+{
+    nvCheck( s != NULL );
+    const uint str_size = uint(strlen( s )) + 1;
+    reserve(str_size);
+    memcpy(m_str, s, str_size);
+    return *this;
+}
+
+/** Copy a string safely. */
+StringBuilder & StringBuilder::copy(const char * s, uint len)
+{
+    nvCheck( s != NULL );
+    const uint str_size = len + 1;
+    reserve(str_size);
+    strCpy(m_str, str_size, s, len);
+    return *this;
+}
+
+
+/** Copy an StringBuilder. */
+StringBuilder & StringBuilder::copy( const StringBuilder & s )
+{
+    if (s.m_str == NULL) {
+        nvCheck( s.m_size == 0 );
+        reset();
+    }
+    else {
+        reserve( s.m_size );
+        strCpy( m_str, s.m_size, s.m_str );
+    }
+    return *this;
+}
+
+bool StringBuilder::endsWith(const char * str) const
+{
+    uint l = uint(strlen(str));
+    uint ml = uint(strlen(m_str));
+    if (ml < l) return false;
+    return strncmp(m_str + ml - l, str, l) == 0;
+}
+
+bool StringBuilder::beginsWith(const char * str) const 
+{
+    size_t l = strlen(str);
+    return strncmp(m_str, str, l) == 0;
+}
+
+// Find given char starting from the end.
+char * StringBuilder::reverseFind(char c)
+{
+    int length = (int)strlen(m_str) - 1;
+    while (length >= 0 && m_str[length] != c) {
+        length--;
+    }
+    if (length >= 0) {
+        return m_str + length;
+    }
+    else {
+        return NULL;
+    }
+}
+
+
+/** Reset the string. */
+void StringBuilder::reset()
+{
+    m_size = 0;
+    strFree( m_str );
+    m_str = NULL;
+}
+
+/** Release the allocated string. */
+char * StringBuilder::release()
+{
+    char * str = m_str;
+    m_size = 0;
+    m_str = NULL;
+    return str;
+}
+
+// Take ownership of string.
+void StringBuilder::acquire(char * str)
+{
+    if (str) {
+        m_size = strLen(str) + 1;
+        m_str = str;
+    }
+    else {
+        m_size = 0;
+        m_str = NULL;
+    }
+}
+
+// Swap strings.
+void nv::swap(StringBuilder & a, StringBuilder & b) {
+    swap(a.m_size, b.m_size);
+    swap(a.m_str, b.m_str);
+}
+
+
+/// Get the file name from a path.
+const char * Path::fileName() const
+{
+    return fileName(m_str);
+}
+
+
+/// Get the extension from a file path.
+const char * Path::extension() const
+{
+    return extension(m_str);
+}
+
+
+/*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) {
+    if (path != NULL) {
+        for (int i = 0;; i++) {
+            if (path[i] == '\0') break;
+            if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator;
+        }
+    }
+}
+
+/// Toggles path separators (ie. \\ into /).
+void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/)
+{
+    if (!isNull()) {
+        translatePath(m_str, pathSeparator);
+    }
+}
+
+void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/)
+{
+    nvCheck(!isNull());
+
+    const uint l = length();
+    
+    if (m_str[l] != '\\' && m_str[l] != '/') {
+        char separatorString[] = { pathSeparator, '\0' };
+        append(separatorString);
+    }
+}
+
+
+/**
+* Strip the file name from a path.
+* @warning path cannot end with '/' o '\\', can't it?
+*/
+void Path::stripFileName()
+{
+    nvCheck( m_str != NULL );
+
+    int length = (int)strlen(m_str) - 1;
+    while (length > 0 && m_str[length] != '/' && m_str[length] != '\\'){
+        length--;
+    }
+    if( length ) {
+        m_str[length+1] = 0;
+    }
+    else {
+        m_str[0] = 0;
+    }
+}
+
+
+/// Strip the extension from a path name.
+void Path::stripExtension()
+{
+    nvCheck( m_str != NULL );
+
+    int length = (int)strlen(m_str) - 1;
+    while (length > 0 && m_str[length] != '.') {
+        length--;
+        if( m_str[length] == NV_PATH_SEPARATOR ) {
+            return; // no extension
+        }
+    }
+    if (length > 0) {
+        m_str[length] = 0;
+    }
+}
+
+
+/// Get the path separator.
+// static
+char Path::separator()
+{
+    return NV_PATH_SEPARATOR;
+}
+
+// static 
+const char * Path::fileName(const char * str)
+{
+    nvCheck( str != NULL );
+
+    int length = (int)strlen(str) - 1;
+    while (length >= 0 && str[length] != '\\' && str[length] != '/') {
+        length--;
+    }
+
+    return &str[length+1];
+}
+
+// static 
+const char * Path::extension(const char * str)
+{
+    nvCheck( str != NULL );
+
+    int length, l;
+    l = length = (int)strlen( str );
+    while (length > 0 && str[length] != '.') {
+        length--;
+        if (str[length] == '\\' || str[length] == '/') {
+            return &str[l]; // no extension
+        }
+    }
+    if (length == 0) {
+        return &str[l];
+    }
+    return &str[length];
+}
+
+
+
+/// Clone this string
+String String::clone() const
+{
+    String str(data);
+    return str;
+}
+
+void String::setString(const char * str)
+{
+    if (str == NULL) {
+        data = NULL;
+    }
+    else {
+        allocString( str );
+        addRef();
+    }
+}
+
+void String::setString(const char * str, uint length)
+{
+    nvDebugCheck(str != NULL);
+
+    allocString(str, length);
+    addRef();
+}
+
+void String::setString(const StringBuilder & str)
+{
+    if (str.str() == NULL) {
+        data =	NULL;
+    }
+    else {
+        allocString(str.str());
+        addRef();
+    }
+}	
+
+// Add reference count.
+void String::addRef()
+{
+    if (data != NULL)
+    {
+        setRefCount(getRefCount() + 1);
+    }
+}
+
+// Decrease reference count.
+void String::release()
+{
+    if (data != NULL)
+    {
+        const uint16 count = getRefCount();
+        setRefCount(count - 1);
+        if (count - 1 == 0) {
+            free(data - 2);
+            data = NULL;
+        }
+    }
+}
+
+void String::allocString(const char * str, uint len)
+{
+    const char * ptr = malloc<char>(2 + len + 1);
+
+    setData( ptr );
+    setRefCount( 0 );
+
+    // Copy string.
+    strCpy(const_cast<char *>(data), len+1, str, len);
+
+    // Add terminating character.
+    const_cast<char *>(data)[len] = '\0';
+}
+
+void nv::swap(String & a, String & b) {
+    swap(a.data, b.data);
+}
diff --git a/thirdparty/thekla_atlas/nvcore/StrLib.h b/thirdparty/thekla_atlas/nvcore/StrLib.h
new file mode 100644
index 0000000000..ae4b5d12a0
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/StrLib.h
@@ -0,0 +1,433 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_STRING_H
+#define NV_CORE_STRING_H
+
+#include "Debug.h"
+#include "Hash.h" // hash
+
+//#include <string.h> // strlen, etc.
+
+#if NV_OS_WIN32
+#define NV_PATH_SEPARATOR '\\'
+#else
+#define NV_PATH_SEPARATOR '/'
+#endif
+
+namespace nv
+{
+
+    NVCORE_API uint strHash(const char * str, uint h) NV_PURE;
+
+    /// String hash based on Bernstein's hash.
+    inline uint strHash(const char * data, uint h = 5381)
+    {
+        uint i = 0;
+        while(data[i] != 0) {
+            h = (33 * h) ^ uint(data[i]);
+            i++;
+        }
+        return h;
+    }
+
+    template <> struct Hash<const char *> {
+        uint operator()(const char * str) const { return strHash(str); }
+    };
+
+    NVCORE_API uint strLen(const char * str) NV_PURE;                       // Asserts on NULL strings.
+
+    NVCORE_API int strDiff(const char * s1, const char * s2) NV_PURE;       // Asserts on NULL strings.
+    NVCORE_API int strCaseDiff(const char * s1, const char * s2) NV_PURE;   // Asserts on NULL strings.
+    NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE;     // Accepts NULL strings.
+    NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
+
+    template <> struct Equal<const char *> {
+        bool operator()(const char * a, const char * b) const { return strEqual(a, b); }
+    };
+
+    NVCORE_API bool strBeginsWith(const char * dst, const char * prefix) NV_PURE;
+    NVCORE_API bool strEndsWith(const char * dst, const char * suffix) NV_PURE;
+
+
+    NVCORE_API void strCpy(char * dst, uint size, const char * src);
+    NVCORE_API void strCpy(char * dst, uint size, const char * src, uint len);
+    NVCORE_API void strCat(char * dst, uint size, const char * src);
+
+    NVCORE_API const char * strSkipWhiteSpace(const char * str);
+    NVCORE_API char * strSkipWhiteSpace(char * str);
+
+    NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE;
+
+    NVCORE_API bool isNumber(const char * str) NV_PURE;
+
+    /* @@ Implement these two functions and modify StringBuilder to use them?
+    NVCORE_API void strFormat(const char * dst, const char * fmt, ...);
+    NVCORE_API void strFormatList(const char * dst, const char * fmt, va_list arg);
+
+    template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) __attribute__((format (printf, 2, 3)));
+    template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) {
+        va_list args;
+        va_start(args, fmt);
+        strFormatList(buffer, count, fmt, args);
+        va_end(args);
+    }
+    template <size_t count> void strFormatListSafe(char (&buffer)[count], const char *fmt, va_list arg) {
+        va_list tmp;
+        va_copy(tmp, args);
+        strFormatList(buffer, count, fmt, tmp);
+        va_end(tmp);
+    }*/
+
+    template <int count> void strCpySafe(char (&buffer)[count], const char *src) {
+        strCpy(buffer, count, src);
+    }
+
+    template <int count> void strCatSafe(char (&buffer)[count], const char * src) {
+        strCat(buffer, count, src);
+    }
+
+
+
+    /// String builder.
+    class NVCORE_CLASS StringBuilder
+    {
+    public:
+
+        StringBuilder();
+        explicit StringBuilder( uint size_hint );
+        StringBuilder(const char * str);
+        StringBuilder(const char * str, uint len);
+        StringBuilder(const StringBuilder & other);
+
+        ~StringBuilder();
+
+        StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
+        StringBuilder & formatList( const char * format, va_list arg );
+
+        StringBuilder & append(char c);
+        StringBuilder & append(const char * str);
+        StringBuilder & append(const char * str, uint len);
+        StringBuilder & append(const StringBuilder & str);
+        StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
+        StringBuilder & appendFormatList(const char * format, va_list arg);
+
+        StringBuilder & appendSpace(uint n);
+
+        StringBuilder & number( int i, int base = 10 );
+        StringBuilder & number( uint i, int base = 10 );
+
+        StringBuilder & reserve(uint size_hint);
+        StringBuilder & copy(const char * str);
+        StringBuilder & copy(const char * str, uint len);
+        StringBuilder & copy(const StringBuilder & str);
+
+        StringBuilder & toLower();
+        StringBuilder & toUpper();
+
+        bool endsWith(const char * str) const;
+        bool beginsWith(const char * str) const;
+
+        char * reverseFind(char c);
+
+        void reset();
+        bool isNull() const { return m_size == 0; }
+
+        // const char * accessors
+        //operator const char * () const { return m_str; }
+        //operator char * () { return m_str; }
+        const char * str() const { return m_str; }
+        char * str() { return m_str; }
+
+        char * release();       // Release ownership of string.
+        void acquire(char *);   // Take ownership of string.
+
+        /// Implement value semantics.
+        StringBuilder & operator=( const StringBuilder & s ) {
+            return copy(s);
+        }
+
+        /// Implement value semantics.
+        StringBuilder & operator=( const char * s ) {
+            return copy(s);
+        }
+
+        /// Equal operator.
+        bool operator==( const StringBuilder & s ) const {
+            return strMatch(s.m_str, m_str);
+        }
+
+        /// Return the exact length.
+        uint length() const { return isNull() ? 0 : strLen(m_str); }
+
+        /// Return the size of the string container.
+        uint capacity() const { return m_size; }
+
+        /// Return the hash of the string.
+        uint hash() const { return isNull() ? 0 : strHash(m_str); }
+
+        // Swap strings.
+        friend void swap(StringBuilder & a, StringBuilder & b);
+
+    protected:
+
+        /// Size of the string container.
+        uint m_size;
+
+        /// String.
+        char * m_str;
+
+    };
+
+
+    /// Path string. @@ This should be called PathBuilder.
+    class NVCORE_CLASS Path : public StringBuilder
+    {
+    public:
+        Path() : StringBuilder() {}
+        explicit Path(int size_hint) : StringBuilder(size_hint) {}
+        Path(const char * str) : StringBuilder(str) {}
+        Path(const Path & path) : StringBuilder(path) {}
+
+        const char * fileName() const;
+        const char * extension() const;
+
+        void translatePath(char pathSeparator = NV_PATH_SEPARATOR);
+
+        void appendSeparator(char pathSeparator = NV_PATH_SEPARATOR);
+
+        void stripFileName();
+        void stripExtension();
+
+        // statics
+        NVCORE_API static char separator();
+        NVCORE_API static const char * fileName(const char *);
+        NVCORE_API static const char * extension(const char *);
+
+        NVCORE_API static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR);
+    };
+
+
+    /// String class.
+    class NVCORE_CLASS String
+    {
+    public:
+
+        /// Constructs a null string. @sa isNull()
+        String()
+        {
+            data = NULL;
+        }
+
+        /// Constructs a shared copy of str.
+        String(const String & str)
+        {
+            data = str.data;
+            if (data != NULL) addRef();
+        }
+
+        /// Constructs a shared string from a standard string.
+        String(const char * str)
+        {
+            setString(str);
+        }
+
+        /// Constructs a shared string from a standard string.
+        String(const char * str, int length)
+        {
+            setString(str, length);
+        }
+
+        /// Constructs a shared string from a StringBuilder.
+        String(const StringBuilder & str)
+        {
+            setString(str);
+        }
+
+        /// Dtor.
+        ~String()
+        {
+            release();
+        }
+
+        String clone() const;
+
+        /// Release the current string and allocate a new one.
+        const String & operator=( const char * str )
+        {
+            release();
+            setString( str );
+            return *this;
+        }
+
+        /// Release the current string and allocate a new one.
+        const String & operator=( const StringBuilder & str )
+        {
+            release();
+            setString( str );
+            return *this;
+        }
+
+        /// Implement value semantics.
+        String & operator=( const String & str )
+        {
+            if (str.data != data)
+            {
+                release();
+                data = str.data;
+                addRef();
+            }
+            return *this;
+        }
+
+        /// Equal operator.
+        bool operator==( const String & str ) const
+        {
+            return strMatch(str.data, data);
+        }
+
+        /// Equal operator.
+        bool operator==( const char * str ) const
+        {
+            return strMatch(str, data);
+        }
+
+        /// Not equal operator.
+        bool operator!=( const String & str ) const
+        {
+            return !strMatch(str.data, data);
+        }
+
+        /// Not equal operator.
+        bool operator!=( const char * str ) const
+        {
+            return !strMatch(str, data);
+        }
+
+        /// Returns true if this string is the null string.
+        bool isNull() const { return data == NULL; }
+
+        /// Return the exact length.
+        uint length() const { nvDebugCheck(data != NULL); return strLen(data); }
+
+        /// Return the hash of the string.
+        uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
+
+        /// const char * cast operator.
+        operator const char * () const { return data; }
+
+        /// Get string pointer.
+        const char * str() const { return data; }
+
+
+    private:
+
+        // Add reference count.
+        void addRef();
+
+        // Decrease reference count.
+        void release();
+
+        uint16 getRefCount() const
+        {
+            nvDebugCheck(data != NULL);
+            return *reinterpret_cast<const uint16 *>(data - 2);
+        }
+
+        void setRefCount(uint16 count) {
+            nvDebugCheck(data != NULL);
+            nvCheck(count < 0xFFFF);
+            *reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
+        }
+
+        void setData(const char * str) {
+            data = str + 2;
+        }
+
+        void allocString(const char * str)
+        {
+            allocString(str, strLen(str));
+        }
+
+        void allocString(const char * str, uint length);
+
+        void setString(const char * str);
+        void setString(const char * str, uint length);
+        void setString(const StringBuilder & str);
+
+        // Swap strings.
+        friend void swap(String & a, String & b);
+
+    private:
+
+        const char * data;
+
+    };
+
+    template <> struct Hash<String> {
+        uint operator()(const String & str) const { return str.hash(); }
+    };
+
+
+    // Like AutoPtr, but for const char strings.
+    class AutoString
+    {
+        NV_FORBID_COPY(AutoString);
+        NV_FORBID_HEAPALLOC();
+    public:
+
+        // Ctor.
+        AutoString(const char * p = NULL) : m_ptr(p) { }
+
+#if NV_CC_CPP11
+        // Move ctor.
+        AutoString(AutoString && ap) : m_ptr(ap.m_ptr) { ap.m_ptr = NULL; }
+#endif
+        
+        // Dtor. Deletes owned pointer.
+        ~AutoString() {
+            delete [] m_ptr;
+            m_ptr = NULL;
+        }
+
+        // Delete owned pointer and assign new one.
+        void operator=(const char * p) {
+            if (p != m_ptr) 
+            {
+                delete [] m_ptr;
+                m_ptr = p;
+            }
+        }
+
+        // Get pointer.
+        const char * ptr() const { return m_ptr; }
+        operator const char *() const { return m_ptr; }
+
+        // Relinquish ownership of the underlying pointer and returns that pointer.
+        const char * release() {
+            const char * tmp = m_ptr;
+            m_ptr = NULL;
+            return tmp;
+        }
+
+        // comparison operators.
+        friend bool operator == (const AutoString & ap, const char * const p) {
+            return (ap.ptr() == p);
+        }
+        friend bool operator != (const AutoString & ap, const char * const p) {
+            return (ap.ptr() != p);
+        }
+        friend bool operator == (const char * const p, const AutoString & ap) {
+            return (ap.ptr() == p);
+        }
+        friend bool operator != (const char * const p, const AutoString & ap) {
+            return (ap.ptr() != p);
+        }
+
+    private:
+        const char * m_ptr;
+    };
+
+} // nv namespace
+
+#endif // NV_CORE_STRING_H
diff --git a/thirdparty/thekla_atlas/nvcore/Stream.h b/thirdparty/thekla_atlas/nvcore/Stream.h
new file mode 100644
index 0000000000..c35c0d0c78
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/Stream.h
@@ -0,0 +1,164 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_STREAM_H
+#define NV_CORE_STREAM_H
+
+#include "nvcore.h"
+#include "Debug.h"
+
+namespace nv
+{
+
+    /// Base stream class.
+    class NVCORE_CLASS Stream {
+    public:
+
+        enum ByteOrder {
+            LittleEndian = false,
+            BigEndian = true,
+        };
+
+        /// Get the byte order of the system.
+        static ByteOrder getSystemByteOrder() { 
+#if NV_LITTLE_ENDIAN
+            return LittleEndian;
+#else
+            return BigEndian;
+#endif
+        }
+
+
+        /// Ctor.
+        Stream() : m_byteOrder(LittleEndian) { }
+
+        /// Virtual destructor.
+        virtual ~Stream() {}
+
+        /// Set byte order.
+        void setByteOrder(ByteOrder bo) { m_byteOrder = bo; }
+
+        /// Get byte order.
+        ByteOrder byteOrder() const { return m_byteOrder; }
+
+
+        /// Serialize the given data.
+        virtual uint serialize( void * data, uint len ) = 0;
+
+        /// Move to the given position in the archive.
+        virtual void seek( uint pos ) = 0;
+
+        /// Return the current position in the archive.
+        virtual uint tell() const = 0;
+
+        /// Return the current size of the archive.
+        virtual uint size() const = 0;
+
+        /// Determine if there has been any error.
+        virtual bool isError() const = 0;
+
+        /// Clear errors.
+        virtual void clearError() = 0;
+
+        /// Return true if the stream is at the end.
+        virtual bool isAtEnd() const = 0;
+
+        /// Return true if the stream is seekable.
+        virtual bool isSeekable() const = 0;
+
+        /// Return true if this is an input stream.
+        virtual bool isLoading() const = 0;
+
+        /// Return true if this is an output stream.
+        virtual bool isSaving() const = 0;
+
+
+        void advance(uint offset) { seek(tell() + offset); }
+
+
+        // friends
+        friend Stream & operator<<( Stream & s, bool & c ) {
+#if NV_OS_DARWIN && !NV_CC_CPP11
+            nvStaticCheck(sizeof(bool) == 4);
+            uint8 b = c ? 1 : 0;
+            s.serialize( &b, 1 );
+            c = (b != 0);
+#else
+            nvStaticCheck(sizeof(bool) == 1);
+            s.serialize( &c, 1 );
+#endif
+            return s;
+        }
+        friend Stream & operator<<( Stream & s, char & c ) {
+            nvStaticCheck(sizeof(char) == 1);
+            s.serialize( &c, 1 );
+            return s;
+        }
+        friend Stream & operator<<( Stream & s, uint8 & c ) {
+            nvStaticCheck(sizeof(uint8) == 1);
+            s.serialize( &c, 1 );
+            return s;
+        }
+        friend Stream & operator<<( Stream & s, int8 & c ) {
+            nvStaticCheck(sizeof(int8) == 1);
+            s.serialize( &c, 1 );
+            return s;
+        }
+        friend Stream & operator<<( Stream & s, uint16 & c ) {
+            nvStaticCheck(sizeof(uint16) == 2);
+            return s.byteOrderSerialize( &c, 2 );
+        }
+        friend Stream & operator<<( Stream & s, int16 & c ) {
+            nvStaticCheck(sizeof(int16) == 2);
+            return s.byteOrderSerialize( &c, 2 );
+        }
+        friend Stream & operator<<( Stream & s, uint32 & c ) {
+            nvStaticCheck(sizeof(uint32) == 4);
+            return s.byteOrderSerialize( &c, 4 );
+        }
+        friend Stream & operator<<( Stream & s, int32 & c ) {
+            nvStaticCheck(sizeof(int32) == 4);
+            return s.byteOrderSerialize( &c, 4 );
+        }
+        friend Stream & operator<<( Stream & s, uint64 & c ) {
+            nvStaticCheck(sizeof(uint64) == 8);
+            return s.byteOrderSerialize( &c, 8 );
+        }
+        friend Stream & operator<<( Stream & s, int64 & c ) {
+            nvStaticCheck(sizeof(int64) == 8);
+            return s.byteOrderSerialize( &c, 8 );
+        }
+        friend Stream & operator<<( Stream & s, float & c ) {
+            nvStaticCheck(sizeof(float) == 4);
+            return s.byteOrderSerialize( &c, 4 );
+        }
+        friend Stream & operator<<( Stream & s, double & c ) {
+            nvStaticCheck(sizeof(double) == 8);
+            return s.byteOrderSerialize( &c, 8 );
+        }
+
+    protected:
+
+        /// Serialize in the stream byte order.
+        Stream & byteOrderSerialize( void * v, uint len ) {
+            if( m_byteOrder == getSystemByteOrder() ) {
+                serialize( v, len );
+            }
+            else {
+                for( uint i = len; i > 0; i-- ) {
+                    serialize( (uint8 *)v + i - 1, 1 );
+                }
+            }
+            return *this;
+        }
+
+
+    private:
+
+        ByteOrder m_byteOrder;
+
+    };
+
+} // nv namespace
+
+#endif // NV_CORE_STREAM_H
diff --git a/thirdparty/thekla_atlas/nvcore/Utils.h b/thirdparty/thekla_atlas/nvcore/Utils.h
new file mode 100644
index 0000000000..f20e42cda8
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/Utils.h
@@ -0,0 +1,315 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_UTILS_H
+#define NV_CORE_UTILS_H
+
+#include "Debug.h" // nvDebugCheck
+
+#include <new> // for placement new
+
+
+// Just in case. Grrr.
+#undef min
+#undef max
+
+#define NV_INT8_MIN    (-128)
+#define NV_INT8_MAX    127
+#define NV_UINT8_MAX    255
+#define NV_INT16_MIN    (-32767-1)
+#define NV_INT16_MAX    32767
+#define NV_UINT16_MAX   0xffff
+#define NV_INT32_MIN    (-2147483647-1)
+#define NV_INT32_MAX    2147483647
+#define NV_UINT32_MAX   0xffffffff
+#define NV_INT64_MAX    POSH_I64(9223372036854775807)
+#define NV_INT64_MIN    (-POSH_I64(9223372036854775807)-1)
+#define NV_UINT64_MAX   POSH_U64(0xffffffffffffffff)
+
+#define NV_HALF_MAX     65504.0F
+#define NV_FLOAT_MAX    3.402823466e+38F
+
+#define NV_INTEGER_TO_FLOAT_MAX  16777217     // Largest integer such that it and all smaller integers can be stored in a 32bit float.
+
+
+namespace nv
+{
+    // Less error prone than casting. From CB:
+    // http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
+
+    // These intentionally look like casts.
+
+    // uint64 casts:
+    template <typename T> inline uint64 U64(T x) { return x; }
+    //template <> inline uint64 U64<uint64>(uint64 x) { return x; }
+    template <> inline uint64 U64<int64>(int64 x) { nvDebugCheck(x >= 0); return (uint64)x; }
+    //template <> inline uint64 U32<uint32>(uint32 x) { return x; }
+    template <> inline uint64 U64<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint64)x; }
+    //template <> inline uint64 U64<uint16>(uint16 x) { return x; }
+    template <> inline uint64 U64<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint64)x; }
+    //template <> inline uint64 U64<uint8>(uint8 x) { return x; }
+    template <> inline uint64 U64<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint64)x; }
+
+    // int64 casts:
+    template <typename T> inline int64 I64(T x) { return x; }
+    template <> inline int64 I64<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT64_MAX); return (int64)x; }
+    //template <> inline uint64 U64<int64>(int64 x) { return x; }
+    //template <> inline uint64 U32<uint32>(uint32 x) { return x; }
+    //template <> inline uint64 U64<int32>(int32 x) { return x; }
+    //template <> inline uint64 U64<uint16>(uint16 x) { return x; }
+    //template <> inline uint64 U64<int16>(int16 x) { return x; }
+    //template <> inline uint64 U64<uint8>(uint8 x) { return x; }
+    //template <> inline uint64 U64<int8>(int8 x) { return x; }
+
+    // uint32 casts:
+    template <typename T> inline uint32 U32(T x) { return x; }
+    template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
+    template <> inline uint32 U32<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
+    //template <> inline uint32 U32<uint32>(uint32 x) { return x; }
+    template <> inline uint32 U32<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint32)x; }
+    //template <> inline uint32 U32<uint16>(uint16 x) { return x; }
+    template <> inline uint32 U32<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint32)x; }
+    //template <> inline uint32 U32<uint8>(uint8 x) { return x; }
+    template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
+
+    // int32 casts:
+    template <typename T> inline int32 I32(T x) { return x; }
+    template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
+    template <> inline int32 I32<int64>(int64 x) { nvDebugCheck(x >= NV_INT32_MIN && x <= NV_UINT32_MAX); return (int32)x; }
+    template <> inline int32 I32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
+    //template <> inline int32 I32<int32>(int32 x) { return x; }
+    //template <> inline int32 I32<uint16>(uint16 x) { return x; }
+    //template <> inline int32 I32<int16>(int16 x) { return x; }
+    //template <> inline int32 I32<uint8>(uint8 x) { return x; }
+    //template <> inline int32 I32<int8>(int8 x) { return x; }
+
+    // uint16 casts:
+    template <typename T> inline uint16 U16(T x) { return x; }
+    template <> inline uint16 U16<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
+    template <> inline uint16 U16<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
+    template <> inline uint16 U16<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
+    template <> inline uint16 U16<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
+    //template <> inline uint16 U16<uint16>(uint16 x) { return x; }
+    template <> inline uint16 U16<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint16)x; }
+    //template <> inline uint16 U16<uint8>(uint8 x) { return x; }
+    template <> inline uint16 U16<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint16)x; }
+
+    // int16 casts:
+    template <typename T> inline int16 I16(T x) { return x; }
+    template <> inline int16 I16<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
+    template <> inline int16 I16<int64>(int64 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
+    template <> inline int16 I16<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
+    template <> inline int16 I16<int32>(int32 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
+    template <> inline int16 I16<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
+    //template <> inline int16 I16<int16>(int16 x) { return x; }
+    //template <> inline int16 I16<uint8>(uint8 x) { return x; }
+    //template <> inline int16 I16<int8>(int8 x) { return x; }
+
+    // uint8 casts:
+    template <typename T> inline uint8 U8(T x) { return x; }
+    template <> inline uint8 U8<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
+    template <> inline uint8 U8<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
+    template <> inline uint8 U8<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
+    template <> inline uint8 U8<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
+    template <> inline uint8 U8<uint16>(uint16 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
+    template <> inline uint8 U8<int16>(int16 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
+    //template <> inline uint8 U8<uint8>(uint8 x) { return x; }
+    template <> inline uint8 U8<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint8)x; }
+    //template <> inline uint8 U8<float>(int8 x) { nvDebugCheck(x >= 0.0f && x <= 255.0f); return (uint8)x; }
+
+    // int8 casts:
+    template <typename T> inline int8 I8(T x) { return x; }
+    template <> inline int8 I8<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
+    template <> inline int8 I8<int64>(int64 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
+    template <> inline int8 I8<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
+    template <> inline int8 I8<int32>(int32 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
+    template <> inline int8 I8<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
+    template <> inline int8 I8<int16>(int16 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
+    template <> inline int8 I8<uint8>(uint8 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
+    //template <> inline int8 I8<int8>(int8 x) { return x; }
+
+    // float casts:
+    template <typename T> inline float F32(T x) { return x; }
+    template <> inline float F32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
+    template <> inline float F32<int64>(int64 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
+    template <> inline float F32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
+    template <> inline float F32<int32>(int32 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
+    // The compiler should not complain about these conversions:
+    //template <> inline float F32<uint16>(uint16 x) { nvDebugCheck(return (float)x; }
+    //template <> inline float F32<int16>(int16 x) { nvDebugCheck(return (float)x; }
+    //template <> inline float F32<uint8>(uint8 x) { nvDebugCheck(return (float)x; }
+    //template <> inline float F32<int8>(int8 x) { nvDebugCheck(return (float)x; }
+
+
+    /// Swap two values.
+    template <typename T> 
+    inline void swap(T & a, T & b)
+    {
+        T temp(a);
+        a = b; 
+        b = temp;
+    }
+
+    /// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN.
+    template <typename T> 
+    //inline const T & max(const T & a, const T & b)
+    inline T max(const T & a, const T & b)
+    {
+        return (b < a) ? a : b;
+    }
+
+	/// Return the maximum of the four arguments.
+	template <typename T> 
+	//inline const T & max4(const T & a, const T & b, const T & c)
+	inline T max4(const T & a, const T & b, const T & c, const T & d)
+	{
+		return max(max(a, b), max(c, d));
+	}
+
+    /// Return the maximum of the three arguments.
+    template <typename T> 
+    //inline const T & max3(const T & a, const T & b, const T & c)
+    inline T max3(const T & a, const T & b, const T & c)
+    {
+        return max(a, max(b, c));
+    }
+
+    /// Return the minimum of two values.
+    template <typename T> 
+    //inline const T & min(const T & a, const T & b)
+    inline T min(const T & a, const T & b)
+    {
+        return (a < b) ? a : b;
+    }
+
+    /// Return the maximum of the three arguments.
+    template <typename T> 
+    //inline const T & min3(const T & a, const T & b, const T & c)
+    inline T min3(const T & a, const T & b, const T & c)
+    {
+        return min(a, min(b, c));
+    }
+
+    /// Clamp between two values.
+    template <typename T> 
+    //inline const T & clamp(const T & x, const T & a, const T & b)
+    inline T clamp(const T & x, const T & a, const T & b)
+    {
+        return min(max(x, a), b);
+    }
+
+    /** Return the next power of two. 
+    * @see http://graphics.stanford.edu/~seander/bithacks.html
+    * @warning Behaviour for 0 is undefined.
+    * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
+    * @note nextPowerOfTwo(x) = 2 << log2(x-1)
+    */
+    inline uint32 nextPowerOfTwo(uint32 x)
+    {
+        nvDebugCheck( x != 0 );
+#if 1	// On modern CPUs this is supposed to be as fast as using the bsr instruction.
+        x--;
+        x |= x >> 1;
+        x |= x >> 2;
+        x |= x >> 4;
+        x |= x >> 8;
+        x |= x >> 16;
+        return x+1;	
+#else
+        uint p = 1;
+        while( x > p ) {
+            p += p;
+        }
+        return p;
+#endif
+    }
+
+    inline uint64 nextPowerOfTwo(uint64 x)
+    {
+        nvDebugCheck(x != 0);
+        uint p = 1;
+        while (x > p) {
+            p += p;
+        }
+        return p;
+    }
+
+    // @@ Should I just use a macro instead?
+    template <typename T>
+    inline bool isPowerOfTwo(T n)
+    {
+        return (n & (n-1)) == 0;
+    }
+
+
+    // @@ Move this to utils?
+    /// Delete all the elements of a container.
+    template <typename T>
+    void deleteAll(T & container)
+    {
+        for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
+        {
+            delete container[i];
+        }
+    }
+
+
+
+    // @@ Specialize these methods for numeric, pointer, and pod types.
+
+    template <typename T>
+    void construct_range(T * restrict ptr, uint new_size, uint old_size) {
+        for (uint i = old_size; i < new_size; i++) {
+            new(ptr+i) T; // placement new
+        }
+    }
+
+    template <typename T>
+    void construct_range(T * restrict ptr, uint new_size, uint old_size, const T & elem) {
+        for (uint i = old_size; i < new_size; i++) {
+            new(ptr+i) T(elem); // placement new
+        }
+    }
+
+    template <typename T>
+    void construct_range(T * restrict ptr, uint new_size, uint old_size, const T * src) {
+        for (uint i = old_size; i < new_size; i++) {
+            new(ptr+i) T(src[i]); // placement new
+        }
+    }
+
+    template <typename T>
+    void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
+        for (uint i = new_size; i < old_size; i++) {
+            (ptr+i)->~T(); // Explicit call to the destructor
+        }
+    }
+
+    template <typename T>
+    void fill(T * restrict dst, uint count, const T & value) {
+        for (uint i = 0; i < count; i++) {
+            dst[i] = value;
+        }
+    }
+
+    template <typename T>
+    void copy_range(T * restrict dst, const T * restrict src, uint count) {
+        for (uint i = 0; i < count; i++) {
+            dst[i] = src[i];
+        }
+    }
+
+    template <typename T>
+    bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) {
+        for (uint i = begin; i < end; i++) {
+            if (ptr[i] == element) {
+                if (index != NULL) *index = i;
+                return true;
+            }
+        }
+        return false;
+    }
+
+} // nv namespace
+
+#endif // NV_CORE_UTILS_H
diff --git a/thirdparty/thekla_atlas/nvcore/nvcore.h b/thirdparty/thekla_atlas/nvcore/nvcore.h
new file mode 100644
index 0000000000..a3deb66be2
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/nvcore.h
@@ -0,0 +1,357 @@
+// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+
+#pragma once
+#ifndef NV_CORE_H
+#define NV_CORE_H
+
+// Function linkage
+#if NVCORE_SHARED
+#ifdef NVCORE_EXPORTS
+#define NVCORE_API DLL_EXPORT
+#define NVCORE_CLASS DLL_EXPORT_CLASS
+#else
+#define NVCORE_API DLL_IMPORT
+#define NVCORE_CLASS DLL_IMPORT
+#endif
+#else // NVCORE_SHARED
+#define NVCORE_API
+#define NVCORE_CLASS
+#endif // NVCORE_SHARED
+
+
+// Platform definitions
+#include <posh.h>
+
+// OS:
+// NV_OS_WIN32
+// NV_OS_WIN64
+// NV_OS_MINGW
+// NV_OS_CYGWIN
+// NV_OS_LINUX
+// NV_OS_UNIX
+// NV_OS_DARWIN
+// NV_OS_XBOX
+// NV_OS_ORBIS
+// NV_OS_IOS
+
+#define NV_OS_STRING POSH_OS_STRING
+
+#if defined POSH_OS_LINUX
+#   define NV_OS_LINUX 1
+#   define NV_OS_UNIX 1
+#elif defined POSH_OS_ORBIS
+#   define NV_OS_ORBIS 1
+#elif defined POSH_OS_FREEBSD
+#   define NV_OS_FREEBSD 1
+#   define NV_OS_UNIX 1
+#elif defined POSH_OS_OPENBSD
+#   define NV_OS_OPENBSD 1
+#   define NV_OS_UNIX 1
+#elif defined POSH_OS_CYGWIN32
+#   define NV_OS_CYGWIN 1
+#elif defined POSH_OS_MINGW
+#   define NV_OS_MINGW 1
+#   define NV_OS_WIN32 1
+#elif defined POSH_OS_OSX
+#   define NV_OS_OSX 1      // IC: Adding this, because iOS defines NV_OS_DARWIN too.
+#   define NV_OS_DARWIN 1
+#   define NV_OS_UNIX 1
+#elif defined POSH_OS_IOS
+#   define NV_OS_DARWIN 1 //ACS should we keep this on IOS?
+#   define NV_OS_UNIX 1
+#   define NV_OS_IOS 1
+#elif defined POSH_OS_UNIX
+#   define NV_OS_UNIX 1
+#elif defined POSH_OS_WIN64
+#   define NV_OS_WIN32 1
+#   define NV_OS_WIN64 1
+#elif defined POSH_OS_WIN32
+#   define NV_OS_WIN32 1
+#elif defined POSH_OS_XBOX
+#   define NV_OS_XBOX 1
+#elif defined POSH_OS_DURANGO
+#   define NV_OS_DURANGO 1
+#else
+#   error "Unsupported OS"
+#endif
+
+
+// Is this a console OS? (i.e. connected to a TV)
+#if NV_OS_ORBIS || NV_OS_XBOX || NV_OS_DURANGO
+#   define NV_OS_CONSOLE 1
+#endif 
+
+
+// Threading:
+// some platforms don't implement __thread or similar for thread-local-storage
+#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios?
+#   define NV_OS_USE_PTHREAD 1
+#   if NV_OS_IOS
+#       define NV_OS_HAS_TLS_QUALIFIER 0
+#   else
+#       define NV_OS_HAS_TLS_QUALIFIER 1
+#   endif
+#else
+#   define NV_OS_USE_PTHREAD 0
+#   define NV_OS_HAS_TLS_QUALIFIER 1
+#endif
+
+
+// CPUs:
+// NV_CPU_X86
+// NV_CPU_X86_64
+// NV_CPU_PPC
+// NV_CPU_ARM
+
+#define NV_CPU_STRING   POSH_CPU_STRING
+
+#if defined POSH_CPU_X86_64
+//#   define NV_CPU_X86 1
+#   define NV_CPU_X86_64 1
+#elif defined POSH_CPU_X86
+#   define NV_CPU_X86 1
+#elif defined POSH_CPU_PPC
+#   define NV_CPU_PPC 1
+#elif defined POSH_CPU_STRONGARM
+#   define NV_CPU_ARM 1
+#else
+#   error "Unsupported CPU"
+#endif
+
+
+// Compiler:
+// NV_CC_GNUC
+// NV_CC_MSVC
+// NV_CC_CLANG
+
+#if defined POSH_COMPILER_CLANG
+#   define NV_CC_CLANG  1
+#   define NV_CC_GNUC   1    // Clang is compatible with GCC.
+#   define NV_CC_STRING "clang"
+#elif defined POSH_COMPILER_GCC
+#   define NV_CC_GNUC   1
+#   define NV_CC_STRING "gcc"
+#elif defined POSH_COMPILER_MSVC
+#   define NV_CC_MSVC   1
+#   define NV_CC_STRING "msvc"
+#else
+#   error "Unsupported compiler"
+#endif
+
+#if NV_CC_MSVC
+#define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet.
+#else
+// @@ IC: This works in CLANG, about GCC?
+// @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4.
+#ifdef __clang__
+#define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert))
+#elif defined __GNUC__ 
+#define NV_CC_CPP11 ( __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+#endif
+#endif
+
+// Endiannes:
+#define NV_LITTLE_ENDIAN    POSH_LITTLE_ENDIAN
+#define NV_BIG_ENDIAN       POSH_BIG_ENDIAN
+#define NV_ENDIAN_STRING    POSH_ENDIAN_STRING
+
+
+// Define the right printf prefix for size_t arguments:
+#if POSH_64BIT_POINTER
+#  define NV_SIZET_PRINTF_PREFIX POSH_I64_PRINTF_PREFIX
+#else
+#  define NV_SIZET_PRINTF_PREFIX
+#endif
+
+
+// cmake config
+#include "nvconfig.h"
+
+#if NV_OS_DARWIN
+#include <stdint.h>
+//#include <inttypes.h>
+
+// Type definitions:
+typedef uint8_t     uint8;
+typedef int8_t      int8;
+
+typedef uint16_t    uint16;
+typedef int16_t     int16;
+
+typedef uint32_t    uint32;
+typedef int32_t     int32;
+
+typedef uint64_t    uint64;
+typedef int64_t     int64;
+
+// POSH gets this wrong due to __LP64__
+#undef POSH_I64_PRINTF_PREFIX
+#define POSH_I64_PRINTF_PREFIX "ll"
+
+#else
+
+// Type definitions:
+typedef posh_u8_t   uint8;
+typedef posh_i8_t   int8;
+
+typedef posh_u16_t  uint16;
+typedef posh_i16_t  int16;
+
+typedef posh_u32_t  uint32;
+typedef posh_i32_t  int32;
+
+//#if NV_OS_DARWIN
+// OSX-64 is supposed to be LP64 (longs and pointers are 64 bits), thus uint64 is defined as 
+// unsigned long. However, some OSX headers define it as unsigned long long, producing errors,
+// even though both types are 64 bit. Ideally posh should handle that, but it has not been
+// updated in ages, so here I'm just falling back to the standard C99 types defined in inttypes.h
+//#include <inttypes.h>
+//typedef posh_u64_t  uint64_t;
+//typedef posh_i64_t  int64_t;
+//#else
+typedef posh_u64_t  uint64;
+typedef posh_i64_t  int64;
+//#endif
+#if NV_OS_DARWIN
+// To avoid duplicate definitions.
+#define _UINT64
+#endif
+#endif
+
+// Aliases
+typedef uint32      uint;
+
+
+// Version string:
+#define NV_VERSION_STRING \
+    NV_OS_STRING "/" NV_CC_STRING "/" NV_CPU_STRING"/" \
+    NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__
+
+
+// Disable copy constructor and assignment operator. 
+#if NV_CC_CPP11
+#define NV_FORBID_COPY(C) \
+    C( const C & ) = delete; \
+    C &operator=( const C & ) = delete
+#else
+#define NV_FORBID_COPY(C) \
+    private: \
+    C( const C & ); \
+    C &operator=( const C & )
+#endif
+
+// Disable dynamic allocation on the heap. 
+// See Prohibiting Heap-Based Objects in More Effective C++.
+#define NV_FORBID_HEAPALLOC() \
+    private: \
+    void *operator new(size_t size); \
+    void *operator new[](size_t size)
+    //static void *operator new(size_t size); \
+    //static void *operator new[](size_t size);
+
+// String concatenation macros.
+#define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
+#define NV_DO_STRING_JOIN2(arg1, arg2) arg1 ## arg2
+#define NV_STRING_JOIN3(arg1, arg2, arg3) NV_DO_STRING_JOIN3(arg1, arg2, arg3)
+#define NV_DO_STRING_JOIN3(arg1, arg2, arg3) arg1 ## arg2 ## arg3
+#define NV_STRING2(x) #x
+#define NV_STRING(x) NV_STRING2(x)
+
+#if NV_CC_MSVC
+#define NV_MULTI_LINE_MACRO_BEGIN do {  
+#define NV_MULTI_LINE_MACRO_END \
+    __pragma(warning(push)) \
+    __pragma(warning(disable:4127)) \
+    } while(false) \
+    __pragma(warning(pop))  
+#else
+#define NV_MULTI_LINE_MACRO_BEGIN do {
+#define NV_MULTI_LINE_MACRO_END } while(false)
+#endif
+
+#if NV_CC_CPP11
+#define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed")
+#else
+#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
+#endif
+#define NV_COMPILER_CHECK(x) nvStaticCheck(x)   // I like this name best.
+
+// Make sure type definitions are fine.
+NV_COMPILER_CHECK(sizeof(int8) == 1);
+NV_COMPILER_CHECK(sizeof(uint8) == 1);
+NV_COMPILER_CHECK(sizeof(int16) == 2);
+NV_COMPILER_CHECK(sizeof(uint16) == 2);
+NV_COMPILER_CHECK(sizeof(int32) == 4);
+NV_COMPILER_CHECK(sizeof(uint32) == 4);
+NV_COMPILER_CHECK(sizeof(int32) == 4);
+NV_COMPILER_CHECK(sizeof(uint32) == 4);
+
+#include <stddef.h> // for size_t
+template <typename T, size_t N> char (&ArraySizeHelper(T (&array)[N]))[N];
+#define NV_ARRAY_SIZE(x) sizeof(ArraySizeHelper(x))
+//#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+#if 0 // Disabled in The Witness.
+#if NV_CC_MSVC
+#define NV_MESSAGE(x) message(__FILE__ "(" NV_STRING(__LINE__) ") : " x)
+#else
+#define NV_MESSAGE(x) message(x)
+#endif
+#else
+#define NV_MESSAGE(x) 
+#endif
+
+
+// Startup initialization macro.
+#define NV_AT_STARTUP(some_code) \
+    namespace { \
+        static struct NV_STRING_JOIN2(AtStartup_, __LINE__) { \
+            NV_STRING_JOIN2(AtStartup_, __LINE__)() { some_code; } \
+        } \
+        NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
+    }
+
+// Indicate the compiler that the parameter is not used to suppress compier warnings.
+#if NV_CC_MSVC
+#define NV_UNUSED(a) ((a)=(a))
+#else
+#define NV_UNUSED(a) _Pragma(NV_STRING(unused(a)))
+#endif
+
+// Null index. @@ Move this somewhere else... it's only used by nvmesh.
+//const unsigned int NIL = unsigned int(~0);
+#define NIL uint(~0)
+
+// Null pointer.
+#ifndef NULL
+#define NULL 0
+#endif
+
+// Platform includes
+#if NV_CC_MSVC
+#   if NV_OS_WIN32
+#       include "DefsVcWin32.h"
+#   elif NV_OS_XBOX
+#       include "DefsVcXBox.h"
+#   elif NV_OS_DURANGO
+#       include "DefsVcDurango.h"
+#   else
+#       error "MSVC: Platform not supported"
+#   endif
+#elif NV_CC_GNUC
+#   if NV_OS_LINUX
+#       include "DefsGnucLinux.h"
+#   elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD
+#       include "DefsGnucDarwin.h"
+#   elif NV_OS_ORBIS
+#       include "DefsOrbis.h"
+#   elif NV_OS_MINGW
+#       include "DefsGnucWin32.h"
+#   elif NV_OS_CYGWIN
+#       error "GCC: Cygwin not supported"
+#   else
+#       error "GCC: Platform not supported"
+#   endif
+#endif
+
+#endif // NV_CORE_H
diff --git a/thirdparty/thekla_atlas/nvcore/scanf.c b/thirdparty/thekla_atlas/nvcore/scanf.c
new file mode 100644
index 0000000000..bf9d293154
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvcore/scanf.c
@@ -0,0 +1,641 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * From: Id: vfscanf.c,v 1.13 1998/09/25 12:20:27 obrien Exp 
+ * From: static char sccsid[] = "@(#)strtol.c	8.1 (Berkeley) 6/4/93";
+ * From: static char sccsid[] = "@(#)strtoul.c	8.1 (Berkeley) 6/4/93";
+ */
+
+#include <stdio.h> 
+#include <stdlib.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#pragma warning(disable : 4244) // conversion from '*' to '*', possible loss of data
+#pragma warning(disable : 4018) // signed/unsigned mismatch
+#pragma warning(disable : 4267) // '=' : conversion from 'size_t' to 'int', possible loss of data
+
+#define strtoq _strtoi64
+#define strtouq _strtoui64
+#define bcopy(b1,b2,len) (memmove((b2), (b1), (len)), (void) 0)
+
+typedef int long long quad_t;
+typedef unsigned long long u_quad_t;
+typedef unsigned char u_char;
+
+#define	BUF		32 	/* Maximum length of numeric string. */
+
+/*
+ * Flags used during conversion.
+ */
+#define	LONG		0x01	/* l: long or double */
+#define	SHORT		0x04	/* h: short */
+#define	SUPPRESS	0x08	/* suppress assignment */
+#define	POINTER		0x10	/* weird %p pointer (`fake hex') */
+#define	NOSKIP		0x20	/* do not skip blanks */
+#define	QUAD		0x400
+
+/*
+ * The following are used in numeric conversions only:
+ * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
+ * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
+ */
+#define	SIGNOK		0x40	/* +/- is (still) legal */
+#define	NDIGITS		0x80	/* no digits detected */
+
+#define	DPTOK		0x100	/* (float) decimal point is still legal */
+#define	EXPOK		0x200	/* (float) exponent (e+3, etc) still legal */
+
+#define	PFXOK		0x100	/* 0x prefix is (still) legal */
+#define	NZDIGITS	0x200	/* no zero digits detected */
+
+/*
+ * Conversion types.
+ */
+#define	CT_CHAR		0	/* %c conversion */
+#define	CT_CCL		1	/* %[...] conversion */
+#define	CT_STRING	2	/* %s conversion */
+#define	CT_INT		3	/* integer, i.e., strtoq or strtouq */
+typedef u_quad_t (*ccfntype)(const char *, char **, int);
+
+static const u_char *__sccl(char *, const u_char *);
+
+int
+vsscanf(const char *inp, char const *fmt0, va_list ap)
+{
+	int inr;
+	const u_char *fmt = (const u_char *)fmt0;
+	int c;			/* character from format, or conversion */
+	size_t width;		/* field width, or 0 */
+	char *p;		/* points into all kinds of strings */
+	int n;			/* handy integer */
+	int flags;		/* flags as defined above */
+	char *p0;		/* saves original value of p when necessary */
+	int nassigned;		/* number of fields assigned */
+	int nconversions;	/* number of conversions */
+	int nread;		/* number of characters consumed from fp */
+	int base;		/* base argument to strtoq/strtouq */
+	ccfntype ccfn;		/* conversion function (strtoq/strtouq) */
+	char ccltab[256];	/* character class table for %[...] */
+	char buf[BUF];		/* buffer for numeric conversions */
+
+	/* `basefix' is used to avoid `if' tests in the integer scanner */
+	static short basefix[17] =
+		{ 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+
+	inr = strlen(inp);
+	
+	nassigned = 0;
+	nconversions = 0;
+	nread = 0;
+	base = 0;		/* XXX just to keep gcc happy */
+	ccfn = NULL;		/* XXX just to keep gcc happy */
+	for (;;) {
+		c = *fmt++;
+		if (c == 0)
+			return (nassigned);
+		if (isspace(c)) {
+			while (inr > 0 && isspace(*inp))
+				nread++, inr--, inp++;
+			continue;
+		}
+		if (c != '%')
+			goto literal;
+		width = 0;
+		flags = 0;
+		/*
+		 * switch on the format.  continue if done;
+		 * break once format type is derived.
+		 */
+again:		c = *fmt++;
+		switch (c) {
+		case '%':
+literal:
+			if (inr <= 0)
+				goto input_failure;
+			if (*inp != c)
+				goto match_failure;
+			inr--, inp++;
+			nread++;
+			continue;
+
+		case '*':
+			flags |= SUPPRESS;
+			goto again;
+		case 'l':
+			flags |= LONG;
+			goto again;
+		case 'q':
+			flags |= QUAD;
+			goto again;
+		case 'h':
+			flags |= SHORT;
+			goto again;
+
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+			width = width * 10 + c - '0';
+			goto again;
+
+		/*
+		 * Conversions.
+		 *
+		 */
+		case 'd':
+			c = CT_INT;
+			ccfn = (ccfntype)strtoq;
+			base = 10;
+			break;
+
+		case 'i':
+			c = CT_INT;
+			ccfn = (ccfntype)strtoq;
+			base = 0;
+			break;
+
+		case 'o':
+			c = CT_INT;
+			ccfn = strtouq;
+			base = 8;
+			break;
+
+		case 'u':
+			c = CT_INT;
+			ccfn = strtouq;
+			base = 10;
+			break;
+
+		case 'x':
+			flags |= PFXOK;	/* enable 0x prefixing */
+			c = CT_INT;
+			ccfn = strtouq;
+			base = 16;
+			break;
+
+		case 's':
+			c = CT_STRING;
+			break;
+
+		case '[':
+			fmt = __sccl(ccltab, fmt);
+			flags |= NOSKIP;
+			c = CT_CCL;
+			break;
+
+		case 'c':
+			flags |= NOSKIP;
+			c = CT_CHAR;
+			break;
+
+		case 'p':	/* pointer format is like hex */
+			flags |= POINTER | PFXOK;
+			c = CT_INT;
+			ccfn = strtouq;
+			base = 16;
+			break;
+
+		case 'n':
+			nconversions++;
+			if (flags & SUPPRESS)	/* ??? */
+				continue;
+			if (flags & SHORT)
+				*va_arg(ap, short *) = nread;
+			else if (flags & LONG)
+				*va_arg(ap, long *) = nread;
+			else if (flags & QUAD)
+				*va_arg(ap, quad_t *) = nread;
+			else
+				*va_arg(ap, int *) = nread;
+			continue;
+		}
+
+		/*
+		 * We have a conversion that requires input.
+		 */
+		if (inr <= 0)
+			goto input_failure;
+
+		/*
+		 * Consume leading white space, except for formats
+		 * that suppress this.
+		 */
+		if ((flags & NOSKIP) == 0) {
+			while (isspace(*inp)) {
+				nread++;
+				if (--inr > 0)
+					inp++;
+				else 
+					goto input_failure;
+			}
+			/*
+			 * Note that there is at least one character in
+			 * the buffer, so conversions that do not set NOSKIP
+			 * can no longer result in an input failure.
+			 */
+		}
+
+		/*
+		 * Do the conversion.
+		 */
+		switch (c) {
+
+		case CT_CHAR:
+			/* scan arbitrary characters (sets NOSKIP) */
+			if (width == 0)
+				width = 1;
+			if (flags & SUPPRESS) {
+				size_t sum = 0;
+				for (;;) {
+					if ((n = inr) < width) {
+						sum += n;
+						width -= n;
+						inp += n;
+						if (sum == 0)
+							goto input_failure;
+						break;
+					} else {
+						sum += width;
+						inr -= width;
+						inp += width;
+						break;
+					}
+				}
+				nread += sum;
+			} else {
+				bcopy(inp, va_arg(ap, char *), width);
+				inr -= width;
+				inp += width;
+				nread += width;
+				nassigned++;
+			}
+			nconversions++;
+			break;
+
+		case CT_CCL:
+			/* scan a (nonempty) character class (sets NOSKIP) */
+			if (width == 0)
+				width = (size_t)~0;	/* `infinity' */
+			/* take only those things in the class */
+			if (flags & SUPPRESS) {
+				n = 0;
+				while (ccltab[(unsigned char)*inp]) {
+					n++, inr--, inp++;
+					if (--width == 0)
+						break;
+					if (inr <= 0) {
+						if (n == 0)
+							goto input_failure;
+						break;
+					}
+				}
+				if (n == 0)
+					goto match_failure;
+			} else {
+				p0 = p = va_arg(ap, char *);
+				while (ccltab[(unsigned char)*inp]) {
+					inr--;
+					*p++ = *inp++;
+					if (--width == 0)
+						break;
+					if (inr <= 0) {
+						if (p == p0)
+							goto input_failure;
+						break;
+					}
+				}
+				n = p - p0;
+				if (n == 0)
+					goto match_failure;
+				*p = 0;
+				nassigned++;
+			}
+			nread += n;
+			nconversions++;
+			break;
+
+		case CT_STRING:
+			/* like CCL, but zero-length string OK, & no NOSKIP */
+			if (width == 0)
+				width = (size_t)~0;
+			if (flags & SUPPRESS) {
+				n = 0;
+				while (!isspace(*inp)) {
+					n++, inr--, inp++;
+					if (--width == 0)
+						break;
+					if (inr <= 0)
+						break;
+				}
+				nread += n;
+			} else {
+				p0 = p = va_arg(ap, char *);
+				while (!isspace(*inp)) {
+					inr--;
+					*p++ = *inp++;
+					if (--width == 0)
+						break;
+					if (inr <= 0)
+						break;
+				}
+				*p = 0;
+				nread += p - p0;
+				nassigned++;
+			}
+			nconversions++;
+			continue;
+
+		case CT_INT:
+			/* scan an integer as if by strtoq/strtouq */
+#ifdef hardway
+			if (width == 0 || width > sizeof(buf) - 1)
+				width = sizeof(buf) - 1;
+#else
+			/* size_t is unsigned, hence this optimisation */
+			if (--width > sizeof(buf) - 2)
+				width = sizeof(buf) - 2;
+			width++;
+#endif
+			flags |= SIGNOK | NDIGITS | NZDIGITS;
+			for (p = buf; width; width--) {
+				c = *inp;
+				/*
+				 * Switch on the character; `goto ok'
+				 * if we accept it as a part of number.
+				 */
+				switch (c) {
+
+				/*
+				 * The digit 0 is always legal, but is
+				 * special.  For %i conversions, if no
+				 * digits (zero or nonzero) have been
+				 * scanned (only signs), we will have
+				 * base==0.  In that case, we should set
+				 * it to 8 and enable 0x prefixing.
+				 * Also, if we have not scanned zero digits
+				 * before this, do not turn off prefixing
+				 * (someone else will turn it off if we
+				 * have scanned any nonzero digits).
+				 */
+				case '0':
+					if (base == 0) {
+						base = 8;
+						flags |= PFXOK;
+					}
+					if (flags & NZDIGITS)
+					    flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
+					else
+					    flags &= ~(SIGNOK|PFXOK|NDIGITS);
+					goto ok;
+
+				/* 1 through 7 always legal */
+				case '1': case '2': case '3':
+				case '4': case '5': case '6': case '7':
+					base = basefix[base];
+					flags &= ~(SIGNOK | PFXOK | NDIGITS);
+					goto ok;
+
+				/* digits 8 and 9 ok iff decimal or hex */
+				case '8': case '9':
+					base = basefix[base];
+					if (base <= 8)
+						break;	/* not legal here */
+					flags &= ~(SIGNOK | PFXOK | NDIGITS);
+					goto ok;
+
+				/* letters ok iff hex */
+				case 'A': case 'B': case 'C':
+				case 'D': case 'E': case 'F':
+				case 'a': case 'b': case 'c':
+				case 'd': case 'e': case 'f':
+					/* no need to fix base here */
+					if (base <= 10)
+						break;	/* not legal here */
+					flags &= ~(SIGNOK | PFXOK | NDIGITS);
+					goto ok;
+
+				/* sign ok only as first character */
+				case '+': case '-':
+					if (flags & SIGNOK) {
+						flags &= ~SIGNOK;
+						goto ok;
+					}
+					break;
+
+				/* x ok iff flag still set & 2nd char */
+				case 'x': case 'X':
+					if (flags & PFXOK && p == buf + 1) {
+						base = 16;	/* if %i */
+						flags &= ~PFXOK;
+						goto ok;
+					}
+					break;
+				}
+
+				/*
+				 * If we got here, c is not a legal character
+				 * for a number.  Stop accumulating digits.
+				 */
+				break;
+		ok:
+				/*
+				 * c is legal: store it and look at the next.
+				 */
+				*p++ = c;
+				if (--inr > 0)
+					inp++;
+				else 
+					break;		/* end of input */
+			}
+			/*
+			 * If we had only a sign, it is no good; push
+			 * back the sign.  If the number ends in `x',
+			 * it was [sign] '0' 'x', so push back the x
+			 * and treat it as [sign] '0'.
+			 */
+			if (flags & NDIGITS) {
+				if (p > buf) {
+					inp--;
+					inr++;
+				}
+				goto match_failure;
+			}
+			c = ((u_char *)p)[-1];
+			if (c == 'x' || c == 'X') {
+				--p;
+				inp--;
+				inr++;
+			}
+			if ((flags & SUPPRESS) == 0) {
+				u_quad_t res;
+
+				*p = 0;
+				res = (*ccfn)(buf, (char **)NULL, base);
+				if (flags & POINTER)
+					*va_arg(ap, void **) =
+						(void *)(uintptr_t)res;
+				else if (flags & SHORT)
+					*va_arg(ap, short *) = res;
+				else if (flags & LONG)
+					*va_arg(ap, long *) = res;
+				else if (flags & QUAD)
+					*va_arg(ap, quad_t *) = res;
+				else
+					*va_arg(ap, int *) = res;
+				nassigned++;
+			}
+			nread += p - buf;
+			nconversions++;
+			break;
+
+		}
+	}
+input_failure:
+	return (nconversions != 0 ? nassigned : -1);
+match_failure:
+	return (nassigned);
+}
+
+
+/*
+ * Fill in the given table from the scanset at the given format
+ * (just after `[').  Return a pointer to the character past the
+ * closing `]'.  The table has a 1 wherever characters should be
+ * considered part of the scanset.
+ */
+static const u_char *
+__sccl(char *tab, const u_char *fmt)
+{
+	int c, n, v;
+
+	/* first `clear' the whole table */
+	c = *fmt++;		/* first char hat => negated scanset */
+	if (c == '^') {
+		v = 1;		/* default => accept */
+		c = *fmt++;	/* get new first char */
+	} else
+		v = 0;		/* default => reject */
+
+	/* XXX: Will not work if sizeof(tab*) > sizeof(char) */
+	for (n = 0; n < 256; n++)
+		     tab[n] = v;	/* memset(tab, v, 256) */
+
+	if (c == 0)
+		return (fmt - 1);/* format ended before closing ] */
+
+	/*
+	 * Now set the entries corresponding to the actual scanset
+	 * to the opposite of the above.
+	 *
+	 * The first character may be ']' (or '-') without being special;
+	 * the last character may be '-'.
+	 */
+	v = 1 - v;
+	for (;;) {
+		tab[c] = v;		/* take character c */
+doswitch:
+		n = *fmt++;		/* and examine the next */
+		switch (n) {
+
+		case 0:			/* format ended too soon */
+			return (fmt - 1);
+
+		case '-':
+			/*
+			 * A scanset of the form
+			 *	[01+-]
+			 * is defined as `the digit 0, the digit 1,
+			 * the character +, the character -', but
+			 * the effect of a scanset such as
+			 *	[a-zA-Z0-9]
+			 * is implementation defined.  The V7 Unix
+			 * scanf treats `a-z' as `the letters a through
+			 * z', but treats `a-a' as `the letter a, the
+			 * character -, and the letter a'.
+			 *
+			 * For compatibility, the `-' is not considerd
+			 * to define a range if the character following
+			 * it is either a close bracket (required by ANSI)
+			 * or is not numerically greater than the character
+			 * we just stored in the table (c).
+			 */
+			n = *fmt;
+			if (n == ']' || n < c) {
+				c = '-';
+				break;	/* resume the for(;;) */
+			}
+			fmt++;
+			/* fill in the range */
+			do {
+			    tab[++c] = v;
+			} while (c < n);
+			c = n;
+			/*
+			 * Alas, the V7 Unix scanf also treats formats
+			 * such as [a-c-e] as `the letters a through e'.
+			 * This too is permitted by the standard....
+			 */
+			goto doswitch;
+			break;
+
+		case ']':		/* end of scanset */
+			return (fmt);
+
+		default:		/* just another character */
+			c = n;
+			break;
+		}
+	}
+	/* NOTREACHED */
+}
+
+/*
+int 
+sscanf(const char *ibuf, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+	
+	va_start(ap, fmt);
+	ret = vsscanf(ibuf, fmt, ap);
+	va_end(ap);
+	
+	return(ret);
+}
+*/
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/thirdparty/thekla_atlas/nvimage/BitMap.cpp b/thirdparty/thekla_atlas/nvimage/BitMap.cpp
new file mode 100644
index 0000000000..8cc49644ea
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvimage/BitMap.cpp
@@ -0,0 +1,27 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "BitMap.h"
+
+using namespace nv;
+
+void BitMap::resize(uint w, uint h, bool initValue)
+{
+    BitArray tmp(w*h);
+
+    if (initValue) tmp.setAll();
+    else tmp.clearAll();
+
+    // @@ Copying one bit at a time. This could be much faster.
+    for (uint y = 0; y < m_height; y++)
+    {
+        for (uint x = 0; x < m_width; x++)
+        {
+            //tmp.setBitAt(y*w + x, bitAt(x, y));
+            if (bitAt(x, y) != initValue) tmp.toggleBitAt(y*w + x);
+        }
+    }
+
+    swap(m_bitArray, tmp);
+    m_width = w;
+    m_height = h;
+}
diff --git a/thirdparty/thekla_atlas/nvimage/BitMap.h b/thirdparty/thekla_atlas/nvimage/BitMap.h
new file mode 100644
index 0000000000..a285321176
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvimage/BitMap.h
@@ -0,0 +1,87 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_IMAGE_BITMAP_H
+#define NV_IMAGE_BITMAP_H
+
+#include "nvimage.h"
+
+#include "nvcore/BitArray.h"
+
+namespace nv 
+{
+    /// Bit map. This should probably be called BitImage.
+    class NVIMAGE_CLASS BitMap
+    {
+    public:
+        BitMap() : m_width(0), m_height(0) {}
+        BitMap(uint w, uint h) : m_width(w), m_height(h), m_bitArray(w*h) {}
+
+        uint width() const { return m_width; }
+        uint height() const { return m_height; }
+
+        void resize(uint w, uint h, bool initValue);
+
+        bool bitAt(uint x, uint y) const
+        {
+            nvDebugCheck(x < m_width && y < m_height);
+            return m_bitArray.bitAt(y * m_width + x);
+        }
+        bool bitAt(uint idx) const
+        {
+            return m_bitArray.bitAt(idx);
+        }
+
+        void setBitAt(uint x, uint y)
+        {
+            nvDebugCheck(x < m_width && y < m_height);
+            m_bitArray.setBitAt(y * m_width + x);
+        }
+        void setBitAt(uint idx)
+        {
+            m_bitArray.setBitAt(idx);
+        }
+
+        void clearBitAt(uint x, uint y)
+        {
+            nvDebugCheck(x < m_width && y < m_height);
+            m_bitArray.clearBitAt(y * m_width + x);
+        }
+        void clearBitAt(uint idx)
+        {
+            m_bitArray.clearBitAt(idx);
+        }
+
+        void clearAll()
+        {
+            m_bitArray.clearAll();
+        }
+
+        void setAll()
+        {
+            m_bitArray.setAll();
+        }
+
+        void toggleAll()
+        {
+            m_bitArray.toggleAll();
+        }
+
+        friend void swap(BitMap & a, BitMap & b)
+        {
+            nvCheck(a.m_width == b.m_width);
+            nvCheck(a.m_height == b.m_height);
+            swap(a.m_bitArray, b.m_bitArray);
+        }
+
+    private:
+
+        uint m_width;
+        uint m_height;
+        BitArray m_bitArray;
+
+    };
+
+} // nv namespace
+
+#endif // NV_IMAGE_BITMAP_H
diff --git a/thirdparty/thekla_atlas/nvimage/Image.cpp b/thirdparty/thekla_atlas/nvimage/Image.cpp
new file mode 100644
index 0000000000..8c0cbcf4e3
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvimage/Image.cpp
@@ -0,0 +1,210 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "Image.h"
+//#include "ImageIO.h"
+
+#include "nvmath/Color.h"
+
+#include "nvcore/Debug.h"
+#include "nvcore/Ptr.h"
+#include "nvcore/Utils.h" // swap
+#include "nvcore/Memory.h" // realloc, free
+
+#include <string.h> // memcpy
+
+
+using namespace nv;
+
+Image::Image() : m_width(0), m_height(0), m_format(Format_RGB), m_data(NULL)
+{
+}
+
+Image::Image(const Image & img) : m_data(NULL)
+{
+	allocate(img.m_width, img.m_height, img.m_depth);
+    m_format = img.m_format;
+    memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height * m_depth);
+}
+
+Image::~Image()
+{
+    free();
+}
+
+const Image & Image::operator=(const Image & img)
+{
+    allocate(img.m_width, img.m_height, m_depth);
+    m_format = img.m_format;
+    memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height * m_depth);
+    return *this;
+}
+
+
+void Image::allocate(uint w, uint h, uint d/*= 1*/)
+{
+    m_width = w;
+    m_height = h;
+	m_depth = d;
+    m_data = realloc<Color32>(m_data, w * h * d);
+}
+
+void Image::acquire(Color32 * data, uint w, uint h, uint d/*= 1*/)
+{
+    free();
+    m_width = w;
+    m_height = h;
+    m_depth = d;
+    m_data = data;
+}
+
+void Image::resize(uint w, uint h, uint d/*= 1*/) {
+
+    Image img;
+    img.allocate(w, h, d);
+
+    Color32 background(0,0,0,0);
+
+    // Copy image.
+    uint x, y, z;
+    for(z = 0; z < min(d, m_depth); z++) {
+        for(y = 0; y < min(h, m_height); y++) {
+            for(x = 0; x < min(w, m_width); x++) {
+                img.pixel(x, y, z) = pixel(x, y, z);
+            }
+            for(; x < w; x++) {
+                img.pixel(x, y, z) = background;
+            }
+        }
+        for(; y < h; y++) {
+            for(x = 0; x < w; x++) {
+                img.pixel(x, y, z) = background;
+            }
+        }
+    }
+    for(; z < d; z++) {
+        for(y = 0; y < h; y++) {
+            for(x = 0; x < w; x++) {
+                img.pixel(x, y, z) = background;
+            }
+        }
+    }
+
+    swap(m_width, img.m_width);
+    swap(m_height, img.m_height);
+	swap(m_depth, img.m_depth);
+    swap(m_format, img.m_format);
+    swap(m_data, img.m_data);
+}
+
+/*bool Image::load(const char * name)
+{
+    free();
+
+    AutoPtr<Image> img(ImageIO::load(name));
+    if (img == NULL) {
+        return false;
+    }
+
+    swap(m_width, img->m_width);
+    swap(m_height, img->m_height);
+	swap(m_depth, img->m_depth);
+    swap(m_format, img->m_format);
+    swap(m_data, img->m_data);
+
+    return true;
+}*/
+
+void Image::wrap(void * data, uint w, uint h, uint d)
+{
+    free();
+    m_data = (Color32 *)data;
+    m_width = w;
+    m_height = h;
+	m_depth = d;
+}
+
+void Image::unwrap()
+{
+    m_data = NULL;
+    m_width = 0;
+    m_height = 0;
+	m_depth = 0;
+}
+
+
+void Image::free()
+{
+    ::free(m_data);
+    m_data = NULL;
+}
+
+
+uint Image::width() const
+{
+    return m_width;
+}
+
+uint Image::height() const
+{
+    return m_height;
+}
+
+uint Image::depth() const
+{
+	return m_depth;
+}
+
+const Color32 * Image::scanline(uint h) const
+{
+    nvDebugCheck(h < m_height);
+    return m_data + h * m_width;
+}
+
+Color32 * Image::scanline(uint h)
+{
+    nvDebugCheck(h < m_height);
+    return m_data + h * m_width;
+}
+
+const Color32 * Image::pixels() const
+{
+    return m_data;
+}
+
+Color32 * Image::pixels()
+{
+    return m_data;
+}
+
+const Color32 & Image::pixel(uint idx) const
+{
+    nvDebugCheck(idx < m_width * m_height * m_depth);
+    return m_data[idx];
+}
+
+Color32 & Image::pixel(uint idx)
+{
+    nvDebugCheck(idx < m_width * m_height * m_depth);
+    return m_data[idx];
+}
+
+
+Image::Format Image::format() const
+{
+    return m_format;
+}
+
+void Image::setFormat(Image::Format f)
+{
+    m_format = f;
+}
+
+void Image::fill(Color32 c)
+{
+    const uint size = m_width * m_height * m_depth;
+    for (uint i = 0; i < size; ++i)
+    {
+        m_data[i] = c;
+    }
+}
+
diff --git a/thirdparty/thekla_atlas/nvimage/Image.h b/thirdparty/thekla_atlas/nvimage/Image.h
new file mode 100644
index 0000000000..4c5748cb00
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvimage/Image.h
@@ -0,0 +1,89 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_IMAGE_IMAGE_H
+#define NV_IMAGE_IMAGE_H
+
+#include "nvimage.h"
+#include "nvcore/Debug.h"
+
+namespace nv
+{
+    class Color32;
+
+    /// 32 bit RGBA image.
+    class NVIMAGE_CLASS Image
+    {
+    public:
+
+        enum Format 
+        {
+            Format_RGB,
+            Format_ARGB,
+        };
+
+        Image();
+        Image(const Image & img);
+        ~Image();
+
+        const Image & operator=(const Image & img);
+
+
+        void allocate(uint w, uint h, uint d = 1);
+        void acquire(Color32 * data, uint w, uint h, uint d = 1);
+        //bool load(const char * name);
+
+        void resize(uint w, uint h, uint d = 1);
+
+        void wrap(void * data, uint w, uint h, uint d = 1);
+        void unwrap();
+
+        uint width() const;
+        uint height() const;
+        uint depth() const;
+
+        const Color32 * scanline(uint h) const;
+        Color32 * scanline(uint h);
+
+        const Color32 * pixels() const;
+        Color32 * pixels();
+
+        const Color32 & pixel(uint idx) const;
+        Color32 & pixel(uint idx);
+
+        const Color32 & pixel(uint x, uint y, uint z = 0) const;
+        Color32 & pixel(uint x, uint y,  uint z = 0);
+
+        Format format() const;
+        void setFormat(Format f);
+
+        void fill(Color32 c);
+
+    private:
+        void free();
+
+    private:
+        uint m_width;
+        uint m_height;
+        uint m_depth;
+        Format m_format;
+        Color32 * m_data;
+    };
+
+
+    inline const Color32 & Image::pixel(uint x, uint y, uint z) const
+    {
+        nvDebugCheck(x < m_width && y < m_height && z < m_depth);
+        return pixel((z * m_height + y) * m_width + x);
+    }
+
+    inline Color32 & Image::pixel(uint x, uint y, uint z)
+    {
+        nvDebugCheck(x < m_width && y < m_height && z < m_depth);
+        return pixel((z * m_height + y) * m_width + x);
+    }
+
+} // nv namespace
+
+
+#endif // NV_IMAGE_IMAGE_H
diff --git a/thirdparty/thekla_atlas/nvimage/nvimage.h b/thirdparty/thekla_atlas/nvimage/nvimage.h
new file mode 100644
index 0000000000..5c89bd4726
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvimage/nvimage.h
@@ -0,0 +1,48 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_IMAGE_H
+#define NV_IMAGE_H
+
+#include "nvcore/nvcore.h"
+#include "nvcore/Debug.h" // nvDebugCheck
+#include "nvcore/Utils.h" // isPowerOfTwo
+
+// Function linkage
+#if NVIMAGE_SHARED
+#ifdef NVIMAGE_EXPORTS
+#define NVIMAGE_API DLL_EXPORT
+#define NVIMAGE_CLASS DLL_EXPORT_CLASS
+#else
+#define NVIMAGE_API DLL_IMPORT
+#define NVIMAGE_CLASS DLL_IMPORT
+#endif
+#else
+#define NVIMAGE_API
+#define NVIMAGE_CLASS
+#endif
+
+
+namespace nv {
+
+    // Some utility functions:
+
+    inline uint computeBitPitch(uint w, uint bitsize, uint alignmentInBits)
+    {
+        nvDebugCheck(isPowerOfTwo(alignmentInBits));
+
+        return ((w * bitsize +  alignmentInBits - 1) / alignmentInBits) * alignmentInBits;
+    }
+
+    inline uint computeBytePitch(uint w, uint bitsize, uint alignmentInBytes)
+    {
+        uint pitch = computeBitPitch(w, bitsize, 8*alignmentInBytes);
+        nvDebugCheck((pitch & 7) == 0);
+
+        return (pitch + 7) / 8;
+    }
+
+
+} // nv namespace
+
+#endif // NV_IMAGE_H
diff --git a/thirdparty/thekla_atlas/nvmath/Basis.cpp b/thirdparty/thekla_atlas/nvmath/Basis.cpp
new file mode 100644
index 0000000000..0824179633
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Basis.cpp
@@ -0,0 +1,270 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "Basis.h"
+
+using namespace nv;
+
+
+/// Normalize basis vectors.
+void Basis::normalize(float epsilon /*= NV_EPSILON*/)
+{
+    normal = ::normalizeSafe(normal, Vector3(0.0f), epsilon);
+    tangent = ::normalizeSafe(tangent, Vector3(0.0f), epsilon);
+    bitangent = ::normalizeSafe(bitangent, Vector3(0.0f), epsilon);
+}
+
+
+/// Gram-Schmidt orthogonalization.
+/// @note Works only if the vectors are close to orthogonal.
+void Basis::orthonormalize(float epsilon /*= NV_EPSILON*/)
+{
+    // N' = |N|
+    // T' = |T - (N' dot T) N'|
+    // B' = |B - (N' dot B) N' - (T' dot B) T'|
+
+    normal = ::normalize(normal, epsilon);
+
+    tangent -= normal * dot(normal, tangent);
+    tangent = ::normalize(tangent, epsilon);
+
+    bitangent -= normal * dot(normal, bitangent);
+    bitangent -= tangent * dot(tangent, bitangent);
+    bitangent = ::normalize(bitangent, epsilon);
+}
+
+
+
+
+/// Robust orthonormalization. 
+/// Returns an orthonormal basis even when the original is degenerate.
+void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
+{
+    // Normalize all vectors.
+    normalize(epsilon);
+
+    if (lengthSquared(normal) < epsilon*epsilon)
+    {
+        // Build normal from tangent and bitangent.
+        normal = cross(tangent, bitangent);
+
+        if (lengthSquared(normal) < epsilon*epsilon)
+        {
+            // Arbitrary basis.
+            tangent   = Vector3(1, 0, 0);
+            bitangent = Vector3(0, 1, 0);
+            normal    = Vector3(0, 0, 1);
+            return;
+        }
+
+        normal = nv::normalize(normal, epsilon);
+    }
+
+    // Project tangents to normal plane.
+    tangent -= normal * dot(normal, tangent);
+    bitangent -= normal * dot(normal, bitangent);
+
+    if (lengthSquared(tangent) < epsilon*epsilon)
+    {
+        if (lengthSquared(bitangent) < epsilon*epsilon)
+        {
+            // Arbitrary basis.
+            buildFrameForDirection(normal);
+        }
+        else
+        {
+            // Build tangent from bitangent.
+            bitangent = nv::normalize(bitangent, epsilon);
+
+            tangent = cross(bitangent, normal);
+            nvDebugCheck(isNormalized(tangent, epsilon));
+        }
+    }
+    else
+    {
+        tangent = nv::normalize(tangent, epsilon);
+#if 0
+        bitangent -= tangent * dot(tangent, bitangent);
+
+        if (lengthSquared(bitangent) < epsilon*epsilon)
+        {
+            bitangent = cross(tangent, normal);
+            nvDebugCheck(isNormalized(bitangent, epsilon));
+        }
+        else
+        {
+            bitangent = nv::normalize(bitangent, epsilon);
+        }
+#else
+        if (lengthSquared(bitangent) < epsilon*epsilon)
+        {
+            // Build bitangent from tangent.
+            bitangent = cross(tangent, normal);
+            nvDebugCheck(isNormalized(bitangent, epsilon));
+        }
+        else
+        {
+            bitangent = nv::normalize(bitangent, epsilon);
+
+            // At this point tangent and bitangent are orthogonal to normal, but we don't know whether their orientation.
+            
+            Vector3 bisector;
+            if (lengthSquared(tangent + bitangent) < epsilon*epsilon)
+            {
+                bisector = tangent;
+            }
+            else
+            {
+                bisector = nv::normalize(tangent + bitangent);
+            }
+            Vector3 axis = nv::normalize(cross(bisector, normal));
+
+            //nvDebugCheck(isNormalized(axis, epsilon));
+            nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon));
+
+            if (dot(axis, tangent) > 0)
+            {
+                tangent = bisector + axis;
+                bitangent = bisector - axis;
+            }
+            else
+            {
+                tangent = bisector - axis;
+                bitangent = bisector + axis;
+            }
+
+            // Make sure the resulting tangents are still perpendicular to the normal.
+            tangent -= normal * dot(normal, tangent);
+            bitangent -= normal * dot(normal, bitangent);
+
+            // Double check.
+            nvDebugCheck(equal(dot(normal, tangent), 0.0f, epsilon));
+            nvDebugCheck(equal(dot(normal, bitangent), 0.0f, epsilon));
+
+            // Normalize.
+            tangent = nv::normalize(tangent);
+            bitangent = nv::normalize(bitangent);
+
+            // If tangent and bitangent are not orthogonal, then derive bitangent from tangent, just in case...
+            if (!equal(dot(tangent, bitangent), 0.0f, epsilon)) {
+                bitangent = cross(tangent, normal);
+                bitangent = nv::normalize(bitangent);
+            }
+        }
+#endif
+    }
+
+    /*// Check vector lengths.
+    if (!isNormalized(normal, epsilon))
+    {
+    nvDebug("%f %f %f\n", normal.x, normal.y, normal.z);
+    nvDebug("%f %f %f\n", tangent.x, tangent.y, tangent.z);
+    nvDebug("%f %f %f\n", bitangent.x, bitangent.y, bitangent.z);
+    }*/
+
+    nvDebugCheck(isNormalized(normal, epsilon));
+    nvDebugCheck(isNormalized(tangent, epsilon));
+    nvDebugCheck(isNormalized(bitangent, epsilon));
+
+    // Check vector angles.
+    nvDebugCheck(equal(dot(normal, tangent), 0.0f, epsilon));
+    nvDebugCheck(equal(dot(normal, bitangent), 0.0f, epsilon));
+    nvDebugCheck(equal(dot(tangent, bitangent), 0.0f, epsilon));
+
+    // Check vector orientation.
+    const float det = dot(cross(normal, tangent), bitangent);
+    nvDebugCheck(equal(det, 1.0f, epsilon) || equal(det, -1.0f, epsilon));
+}
+
+
+/// Build an arbitrary frame for the given direction.
+void Basis::buildFrameForDirection(Vector3::Arg d, float angle/*= 0*/)
+{
+    nvCheck(isNormalized(d));
+    normal = d;
+
+    // Choose minimum axis.
+    if (fabsf(normal.x) < fabsf(normal.y) && fabsf(normal.x) < fabsf(normal.z))
+    {
+        tangent = Vector3(1, 0, 0);
+    }
+    else if (fabsf(normal.y) < fabsf(normal.z))
+    {
+        tangent = Vector3(0, 1, 0);
+    }
+    else
+    {
+        tangent = Vector3(0, 0, 1);
+    }
+
+    // Ortogonalize
+    tangent -= normal * dot(normal, tangent);
+    tangent = ::normalize(tangent);
+
+    bitangent = cross(normal, tangent);
+
+    // Rotate frame around normal according to angle.
+    if (angle != 0.0f) {
+        float c = cosf(angle);
+        float s = sinf(angle);
+        Vector3 tmp = c * tangent - s * bitangent;
+        bitangent = s * tangent + c * bitangent;
+        tangent = tmp;
+    }
+}
+
+bool Basis::isValid() const
+{
+    if (equal(normal, Vector3(0.0f))) return false;
+    if (equal(tangent, Vector3(0.0f))) return false;
+    if (equal(bitangent, Vector3(0.0f))) return false;
+
+    if (equal(determinant(), 0.0f)) return false;
+
+    return true;
+}
+
+
+/// Transform by this basis. (From this basis to object space).
+Vector3 Basis::transform(Vector3::Arg v) const
+{
+    Vector3 o = tangent * v.x;
+    o += bitangent * v.y;
+    o += normal * v.z;
+    return o;
+}
+
+/// Transform by the transpose. (From object space to this basis).
+Vector3 Basis::transformT(Vector3::Arg v)
+{
+    return Vector3(dot(tangent, v), dot(bitangent, v), dot(normal, v));
+}
+
+/// Transform by the inverse. (From object space to this basis).
+/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned.
+Vector3 Basis::transformI(Vector3::Arg v) const
+{
+    const float det = determinant();
+    nvDebugCheck(!equal(det, 0.0f, 0.0f));
+
+    const float idet = 1.0f / det;
+
+    // Rows of the inverse matrix.
+    Vector3 r0(
+        (bitangent.y * normal.z - bitangent.z * normal.y),
+        -(bitangent.x * normal.z - bitangent.z * normal.x),
+        (bitangent.x * normal.y - bitangent.y * normal.x));
+
+    Vector3 r1(
+        -(tangent.y * normal.z - tangent.z * normal.y),
+        (tangent.x * normal.z - tangent.z * normal.x),
+        -(tangent.x * normal.y - tangent.y * normal.x));
+
+    Vector3 r2(
+        (tangent.y * bitangent.z - tangent.z * bitangent.y),
+        -(tangent.x * bitangent.z - tangent.z * bitangent.x),
+        (tangent.x * bitangent.y - tangent.y * bitangent.x));
+
+    return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet;
+}
+
+
diff --git a/thirdparty/thekla_atlas/nvmath/Basis.h b/thirdparty/thekla_atlas/nvmath/Basis.h
new file mode 100644
index 0000000000..e8146afdbe
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Basis.h
@@ -0,0 +1,82 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MATH_BASIS_H
+#define NV_MATH_BASIS_H
+
+#include "nvmath.h"
+#include "Vector.inl"
+#include "Matrix.h"
+
+namespace nv
+{
+
+    /// Basis class to compute tangent space basis, ortogonalizations and to
+    /// transform vectors from one space to another.
+    class Basis
+    {
+    public:
+
+        /// Create a null basis.
+        Basis() : tangent(0, 0, 0), bitangent(0, 0, 0), normal(0, 0, 0) {}
+
+        /// Create a basis given three vectors.
+        Basis(Vector3::Arg n, Vector3::Arg t, Vector3::Arg b) : tangent(t), bitangent(b), normal(n) {}
+
+        /// Create a basis with the given tangent vectors and the handness.
+        Basis(Vector3::Arg n, Vector3::Arg t, float sign)
+        {
+            build(n, t, sign);
+        }
+
+        NVMATH_API void normalize(float epsilon = NV_EPSILON);
+        NVMATH_API void orthonormalize(float epsilon = NV_EPSILON);
+        NVMATH_API void robustOrthonormalize(float epsilon = NV_EPSILON);
+        NVMATH_API void buildFrameForDirection(Vector3::Arg d, float angle = 0);
+
+        /// Calculate the determinant [ F G N ] to obtain the handness of the basis. 
+        float handness() const
+        {
+            return determinant() > 0.0f ? 1.0f : -1.0f;
+        }
+
+        /// Build a basis from 2 vectors and a handness flag.
+        void build(Vector3::Arg n, Vector3::Arg t, float sign)
+        {
+            normal = n;
+            tangent = t;
+            bitangent = sign * cross(t, n);
+        }
+
+        /// Compute the determinant of this basis.
+        float determinant() const
+        {
+            return 
+                tangent.x * bitangent.y * normal.z - tangent.z * bitangent.y * normal.x +
+                tangent.y * bitangent.z * normal.x - tangent.y * bitangent.x * normal.z + 
+                tangent.z * bitangent.x * normal.y - tangent.x * bitangent.z * normal.y;
+        }
+
+        bool isValid() const;
+
+        // Get transform matrix for this basis.
+        NVMATH_API Matrix matrix() const;
+
+        // Transform by this basis. (From this basis to object space).
+        NVMATH_API Vector3 transform(Vector3::Arg v) const;
+
+        // Transform by the transpose. (From object space to this basis).
+        NVMATH_API Vector3 transformT(Vector3::Arg v);
+
+        // Transform by the inverse. (From object space to this basis).
+        NVMATH_API Vector3 transformI(Vector3::Arg v) const;
+
+
+        Vector3 tangent;
+        Vector3 bitangent;
+        Vector3 normal;
+    };
+
+} // nv namespace
+
+#endif // NV_MATH_BASIS_H
diff --git a/thirdparty/thekla_atlas/nvmath/Box.cpp b/thirdparty/thekla_atlas/nvmath/Box.cpp
new file mode 100644
index 0000000000..8f2014a077
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Box.cpp
@@ -0,0 +1,119 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "Box.h"
+#include "Box.inl"
+#include "Sphere.h"
+
+using namespace nv;
+
+
+
+
+// Clip the given segment against this box.
+bool Box::clipSegment(const Vector3 & origin, const Vector3 & dir, float * t_near, float * t_far) const {
+
+	// Avoid aliasing.
+	float tnear = *t_near;
+	float tfar = *t_far;
+
+	// clip ray segment to box
+	for (int i = 0; i < 3; i++)
+	{
+		const float pos = origin.component[i] + tfar * dir.component[i];
+		const float dt = tfar - tnear;
+
+		if (dir.component[i] < 0) {
+			
+			// clip end point
+			if (pos < minCorner.component[i]) {
+                tfar = tnear + dt * (origin.component[i] - minCorner.component[i]) / (origin.component[i] - pos);
+			}
+			
+			// clip start point
+			if (origin.component[i] > maxCorner.component[i]) {
+				tnear = tnear + dt * (origin.component[i] - maxCorner.component[i]) / (tfar * dir.component[i]);
+			}
+		}
+		else {
+
+			// clip end point
+			if (pos > maxCorner.component[i]) {
+				tfar = tnear + dt * (maxCorner.component[i] - origin.component[i]) / (pos - origin.component[i]);
+			}
+
+			// clip start point
+			if (origin.component[i] < minCorner.component[i]) {
+				tnear = tnear + dt * (minCorner.component[i] - origin.component[i]) / (tfar * dir.component[i]);
+			}
+		}
+
+		if (tnear > tfar) {
+			// Clipped away.
+			return false;
+		}
+	}
+
+	// Return result.
+	*t_near = tnear;
+	*t_far = tfar;
+	return true;
+}
+
+
+float nv::distanceSquared(const Box &box, const Vector3 &point) {
+    Vector3 closest;
+
+    if (point.x < box.minCorner.x) closest.x = box.minCorner.x;
+    else if (point.x > box.maxCorner.x) closest.x = box.maxCorner.x;
+    else closest.x = point.x;
+
+    if (point.y < box.minCorner.y) closest.y = box.minCorner.y;
+    else if (point.y > box.maxCorner.y) closest.y = box.maxCorner.y;
+    else closest.y = point.y;
+
+    if (point.z < box.minCorner.z) closest.z = box.minCorner.z;
+    else if (point.z > box.maxCorner.z) closest.z = box.maxCorner.z;
+    else closest.z = point.z;
+
+    return lengthSquared(point - closest);
+}
+
+bool nv::overlap(const Box &box, const Sphere &sphere) {
+    return distanceSquared(box, sphere.center) < sphere.radius * sphere.radius;
+}
+
+
+bool nv::intersect(const Box & box, const Vector3 & p, const Vector3 & id, float * t /*= NULL*/) {
+    // Precompute these in ray structure?
+    int sdx = (id.x < 0);
+    int sdy = (id.y < 0);
+    int sdz = (id.z < 0);
+
+    float tmin = (box.corner(  sdx).x - p.x) * id.x;
+    float tmax = (box.corner(1-sdx).x - p.x) * id.x;
+    float tymin = (box.corner(  sdy).y - p.y) * id.y;
+    float tymax = (box.corner(1-sdy).y - p.y) * id.y;
+
+    if ((tmin > tymax) || (tymin > tmax)) 
+        return false;
+
+    if (tymin > tmin) tmin = tymin;
+    if (tymax < tmax) tmax = tymax;
+
+    float tzmin = (box.corner(  sdz).z - p.z) * id.z;
+    float tzmax = (box.corner(1-sdz).z - p.z) * id.z;
+
+    if ((tmin > tzmax) || (tzmin > tmax)) 
+        return false;
+
+    if (tzmin > tmin) tmin = tzmin;
+    if (tzmax < tmax) tmax = tzmax;
+
+    if (tmax < 0) 
+        return false;
+
+    if (t != NULL) *t = tmin;
+
+    return true;
+}
+
diff --git a/thirdparty/thekla_atlas/nvmath/Box.h b/thirdparty/thekla_atlas/nvmath/Box.h
new file mode 100644
index 0000000000..19b5f2a3a5
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Box.h
@@ -0,0 +1,103 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_BOX_H
+#define NV_MATH_BOX_H
+
+#include "Vector.h"
+
+#include <float.h> // FLT_MAX
+
+namespace nv
+{
+    class Vector;
+    class Stream;
+    class Sphere;
+
+    // Axis Aligned Bounding Box.
+    class Box
+    {
+    public:
+
+        inline Box() {}
+        inline Box(const Box & b) : minCorner(b.minCorner), maxCorner(b.maxCorner) {}
+        inline Box(const Vector3 & mins, const Vector3 & maxs) : minCorner(mins), maxCorner(maxs) {}
+
+        Box & operator=(const Box & b);
+
+        operator const float * () const { return reinterpret_cast<const float *>(this); }
+
+        // Clear the bounds.
+        void clearBounds();
+
+        // min < max
+        bool isValid() const;
+
+        // Build a cube centered on center and with edge = 2*dist
+        void cube(const Vector3 & center, float dist);
+
+        // Build a box, given center and extents.
+        void setCenterExtents(const Vector3 & center, const Vector3 & extents);
+
+        // Get box center.
+        Vector3 center() const;
+
+        // Return extents of the box.
+        Vector3 extents() const;
+
+        // Return extents of the box.
+        float extents(uint axis) const;
+
+        // Add a point to this box.
+        void addPointToBounds(const Vector3 & p);
+
+        // Add a box to this box.
+        void addBoxToBounds(const Box & b);
+
+        // Add sphere to this box.
+        void addSphereToBounds(const Vector3 & p, float r);
+
+        // Translate box.
+        void translate(const Vector3 & v);
+
+        // Scale the box.
+        void scale(float s);
+
+        // Expand the box by a fixed amount.
+        void expand(float r);
+
+        // Get the area of the box.
+        float area() const;
+ 
+        // Get the volume of the box.
+        float volume() const;
+
+        // Return true if the box contains the given point.
+        bool contains(const Vector3 & p) const;
+
+        // Split the given box in 8 octants and assign the ith one to this box.
+        void setOctant(const Box & box, const Vector3 & center, int i);
+
+
+        // Clip the given segment against this box.
+        bool clipSegment(const Vector3 & origin, const Vector3 & dir, float * t_near, float * t_far) const;
+
+
+        friend Stream & operator<< (Stream & s, Box & box);
+
+        const Vector3 & corner(int i) const { return (&minCorner)[i]; }
+
+        Vector3 minCorner;
+        Vector3 maxCorner;
+    };
+
+    float distanceSquared(const Box &box, const Vector3 &point);
+    bool overlap(const Box &box, const Sphere &sphere);
+
+    // p is ray origin, id is inverse ray direction.
+    bool intersect(const Box & box, const Vector3 & p, const Vector3 & id, float * t);
+
+} // nv namespace
+
+
+#endif // NV_MATH_BOX_H
diff --git a/thirdparty/thekla_atlas/nvmath/Box.inl b/thirdparty/thekla_atlas/nvmath/Box.inl
new file mode 100644
index 0000000000..dcfa70ff96
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Box.inl
@@ -0,0 +1,154 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_BOX_INL
+#define NV_MATH_BOX_INL
+
+#include "Box.h"
+#include "Vector.inl"
+
+#include <float.h> // FLT_MAX
+
+namespace nv
+{
+    // Default ctor.
+    //inline Box::Box() { };
+
+    // Copy ctor.
+    //inline Box::Box(const Box & b) : minCorner(b.minCorner), maxCorner(b.maxCorner) { }
+
+    // Init ctor.
+    //inline Box::Box(const Vector3 & mins, const Vector3 & maxs) : minCorner(mins), maxCorner(maxs) { }
+
+    // Assignment operator.
+    inline Box & Box::operator=(const Box & b) { minCorner = b.minCorner; maxCorner = b.maxCorner; return *this; }
+
+    // Clear the bounds.
+    inline void Box::clearBounds()
+    {
+        minCorner.set(FLT_MAX, FLT_MAX, FLT_MAX);
+        maxCorner.set(-FLT_MAX, -FLT_MAX, -FLT_MAX);
+    }
+
+    // min < max
+    inline bool Box::isValid() const
+    {
+        return minCorner.x <= maxCorner.x && minCorner.y <= maxCorner.y && minCorner.z <= maxCorner.z;
+    }
+
+    // Build a cube centered on center and with edge = 2*dist
+    inline void Box::cube(const Vector3 & center, float dist)
+    {
+        setCenterExtents(center, Vector3(dist));
+    }
+
+    // Build a box, given center and extents.
+    inline void Box::setCenterExtents(const Vector3 & center, const Vector3 & extents)
+    {
+        minCorner = center - extents;
+        maxCorner = center + extents;
+    }
+
+    // Get box center.
+    inline Vector3 Box::center() const
+    {
+        return (minCorner + maxCorner) * 0.5f;
+    }
+
+    // Return extents of the box.
+    inline Vector3 Box::extents() const
+    {
+        return (maxCorner - minCorner) * 0.5f;
+    }
+
+    // Return extents of the box.
+    inline float Box::extents(uint axis) const
+    {
+        nvDebugCheck(axis < 3);
+        if (axis == 0) return (maxCorner.x - minCorner.x) * 0.5f;
+        if (axis == 1) return (maxCorner.y - minCorner.y) * 0.5f;
+        if (axis == 2) return (maxCorner.z - minCorner.z) * 0.5f;
+        nvUnreachable();
+        return 0.0f;
+    }
+
+    // Add a point to this box.
+    inline void Box::addPointToBounds(const Vector3 & p)
+    {
+        minCorner = min(minCorner, p);
+        maxCorner = max(maxCorner, p);
+    }
+
+    // Add a box to this box.
+    inline void Box::addBoxToBounds(const Box & b)
+    {
+        minCorner = min(minCorner, b.minCorner);
+        maxCorner = max(maxCorner, b.maxCorner);
+    }
+
+    // Add sphere to this box.
+    inline void Box::addSphereToBounds(const Vector3 & p, float r) {
+        minCorner = min(minCorner, p - Vector3(r));
+        maxCorner = min(maxCorner, p + Vector3(r));
+    }
+
+    // Translate box.
+    inline void Box::translate(const Vector3 & v)
+    {
+        minCorner += v;
+        maxCorner += v;
+    }
+
+    // Scale the box.
+    inline void Box::scale(float s)
+    {
+        minCorner *= s;
+        maxCorner *= s;
+    }
+
+    // Expand the box by a fixed amount.
+    inline void Box::expand(float r) {
+        minCorner -= Vector3(r,r,r);
+        maxCorner += Vector3(r,r,r);
+    }
+
+    // Get the area of the box.
+    inline float Box::area() const
+    {
+        const Vector3 d = extents();
+        return 8.0f * (d.x*d.y + d.x*d.z + d.y*d.z);
+    }	
+
+    // Get the volume of the box.
+    inline float Box::volume() const
+    {
+        Vector3 d = extents();
+        return 8.0f * (d.x * d.y * d.z);
+    }
+
+    // Return true if the box contains the given point.
+    inline bool Box::contains(const Vector3 & p) const
+    {
+        return 
+            minCorner.x < p.x && minCorner.y < p.y && minCorner.z < p.z &&
+            maxCorner.x > p.x && maxCorner.y > p.y && maxCorner.z > p.z;
+    }
+
+    // Split the given box in 8 octants and assign the ith one to this box.
+    inline void Box::setOctant(const Box & box, const Vector3 & center, int i)
+    {
+        minCorner = box.minCorner;
+        maxCorner = box.maxCorner;
+
+        if (i & 4) minCorner.x = center.x;
+        else       maxCorner.x = center.x;
+        if (i & 2) minCorner.y = center.y;
+        else       maxCorner.y = center.y;
+        if (i & 1) minCorner.z = center.z;
+        else       maxCorner.z = center.z;
+    }
+
+} // nv namespace
+
+
+#endif // NV_MATH_BOX_INL
diff --git a/thirdparty/thekla_atlas/nvmath/Color.h b/thirdparty/thekla_atlas/nvmath/Color.h
new file mode 100644
index 0000000000..5cdc374bd9
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Color.h
@@ -0,0 +1,150 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_COLOR_H
+#define NV_MATH_COLOR_H
+
+#include "nvmath.h"
+
+namespace nv
+{
+
+    /// 64 bit color stored as BGRA.
+    class NVMATH_CLASS Color64 
+    {
+    public:
+        Color64() { }
+        Color64(const Color64 & c) : u(c.u) { }
+        Color64(uint16 R, uint16 G, uint16 B, uint16 A) { setRGBA(R, G, B, A); }
+        explicit Color64(uint64 U) : u(U) { }
+
+        void setRGBA(uint16 R, uint16 G, uint16 B, uint16 A)
+        {
+            r = R;
+            g = G;
+            b = B;
+            a = A;
+        }
+
+        operator uint64 () const {
+            return u;
+        }
+
+        union {
+            struct {
+#if NV_LITTLE_ENDIAN
+                uint16 r, a, b, g;
+#else
+                uint16 a: 16;
+                uint16 r: 16;
+                uint16 g: 16;
+                uint16 b: 16;
+#endif
+            };
+            uint64 u;
+        };
+    };
+
+    /// 32 bit color stored as BGRA.
+    class NVMATH_CLASS Color32
+    {
+    public:
+        Color32() { }
+        Color32(const Color32 & c) : u(c.u) { }
+        Color32(uint8 R, uint8 G, uint8 B) { setRGBA(R, G, B, 0xFF); }
+        Color32(uint8 R, uint8 G, uint8 B, uint8 A) { setRGBA( R, G, B, A); }
+        //Color32(uint8 c[4]) { setRGBA(c[0], c[1], c[2], c[3]); }
+        //Color32(float R, float G, float B) { setRGBA(uint(R*255), uint(G*255), uint(B*255), 0xFF); }
+        //Color32(float R, float G, float B, float A) { setRGBA(uint(R*255), uint(G*255), uint(B*255), uint(A*255)); }
+        explicit Color32(uint32 U) : u(U) { }
+
+        void setRGBA(uint8 R, uint8 G, uint8 B, uint8 A)
+        {
+            r = R;
+            g = G;
+            b = B;
+            a = A;
+        }
+
+        void setBGRA(uint8 B, uint8 G, uint8 R, uint8 A = 0xFF)
+        {
+            r = R;
+            g = G;
+            b = B;
+            a = A;
+        }
+
+        operator uint32 () const {
+            return u;
+        }
+
+        union {
+            struct {
+#if NV_LITTLE_ENDIAN
+                uint8 b, g, r, a;
+#else
+                uint8 a: 8;
+                uint8 r: 8;
+                uint8 g: 8;
+                uint8 b: 8;
+#endif
+            };
+            uint8 component[4];
+            uint32 u;
+        };
+    };
+
+
+    /// 16 bit 565 BGR color.
+    class NVMATH_CLASS Color16
+    {
+    public:
+        Color16() { }
+        Color16(const Color16 & c) : u(c.u) { }
+        explicit Color16(uint16 U) : u(U) { }
+
+        union {
+            struct {
+#if NV_LITTLE_ENDIAN
+                uint16 b : 5;
+                uint16 g : 6;
+                uint16 r : 5;
+#else
+                uint16 r : 5;
+                uint16 g : 6;
+                uint16 b : 5;
+#endif
+            };
+            uint16 u;
+        };
+    };
+
+    /// 16 bit 4444 BGRA color.
+    class NVMATH_CLASS Color16_4444
+    {
+    public:
+        Color16_4444() { }
+        Color16_4444(const Color16_4444 & c) : u(c.u) { }
+        explicit Color16_4444(uint16 U) : u(U) { }
+
+        union {
+            struct {
+#if NV_LITTLE_ENDIAN
+                uint16 b : 4;
+                uint16 g : 4;
+                uint16 r : 4;
+                uint16 a : 4;
+#else
+                uint16 a : 4;
+                uint16 r : 4;
+                uint16 g : 4;
+                uint16 b : 4;
+#endif
+            };
+            uint16 u;
+        };
+    };
+
+} // nv namespace
+
+#endif // NV_MATH_COLOR_H
diff --git a/thirdparty/thekla_atlas/nvmath/ConvexHull.cpp b/thirdparty/thekla_atlas/nvmath/ConvexHull.cpp
new file mode 100644
index 0000000000..a4a95dace4
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/ConvexHull.cpp
@@ -0,0 +1,120 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "ConvexHull.h"
+
+#include "Vector.inl"
+
+#include "nvcore/RadixSort.h"
+#include "nvcore/Array.inl"
+
+using namespace nv;
+
+inline static float triangleArea(Vector2::Arg v1, Vector2::Arg v2, Vector2::Arg v3)
+{
+    return 0.5f * (v3.x * v1.y + v1.x * v2.y + v2.x * v3.y - v2.x * v1.y - v3.x * v2.y - v1.x * v3.y);
+}
+
+
+// Compute the convex hull using Graham Scan.
+void nv::convexHull(const Array<Vector2> & input, Array<Vector2> & output, float epsilon/*=0*/)
+{
+    const uint inputCount = input.count();
+
+    Array<float> coords;
+    coords.resize(inputCount);
+
+    for (uint i = 0; i < inputCount; i++) {
+        coords[i] = input[i].x;
+    }
+
+    RadixSort radix;
+    radix.sort(coords);
+
+    const uint * ranks = radix.ranks();
+
+    Array<Vector2> top(inputCount);
+    Array<Vector2> bottom(inputCount);
+
+    Vector2 P = input[ranks[0]];
+    Vector2 Q = input[ranks[inputCount-1]];
+
+    float topy = max(P.y, Q.y);
+    float boty = min(P.y, Q.y);
+
+    for (uint i = 0; i < inputCount; i++) {
+        Vector2 p = input[ranks[i]];
+        if (p.y >= boty) top.append(p);
+    }
+
+    for (uint i = 0; i < inputCount; i++) {
+        Vector2 p = input[ranks[inputCount-1-i]];
+        if (p.y <= topy) bottom.append(p);
+    }
+
+    // Filter top list.
+    output.clear();
+    output.append(top[0]);
+    output.append(top[1]);
+
+    for (uint i = 2; i < top.count(); ) {
+        Vector2 a = output[output.count()-2];
+        Vector2 b = output[output.count()-1];
+        Vector2 c = top[i];
+
+        float area = triangleArea(a, b, c);
+
+        if (area >= -epsilon) {
+            output.popBack();
+        }
+
+        if (area < -epsilon || output.count() == 1) {
+            output.append(c);
+            i++;
+        }
+    }
+    
+    uint top_count = output.count();
+    output.append(bottom[1]);
+
+    // Filter bottom list.
+    for (uint i = 2; i < bottom.count(); ) {
+        Vector2 a = output[output.count()-2];
+        Vector2 b = output[output.count()-1];
+        Vector2 c = bottom[i];
+
+        float area = triangleArea(a, b, c);
+
+        if (area >= -epsilon) {
+            output.popBack();
+        }
+
+        if (area < -epsilon || output.count() == top_count) {
+            output.append(c);
+            i++;
+        }
+    }
+
+    // Remove duplicate element.
+    nvDebugCheck(output.front() == output.back());
+    output.popBack();
+}
+
+/*
+void testConvexHull() {
+
+    Array<Vector2> points;
+    points.append(Vector2(1.00, 1.00));
+    points.append(Vector2(0.00, 0.00));
+    points.append(Vector2(1.00, 1.00));
+    points.append(Vector2(1.00, -1.00));
+    points.append(Vector2(2.00, 5.00));
+    points.append(Vector2(-5.00, 3.00));
+    points.append(Vector2(-4.00, -3.00));
+    points.append(Vector2(7.00, -4.00));
+
+    Array<Vector2> hull;
+    convexHull(points, hull);
+
+}
+*/
+
diff --git a/thirdparty/thekla_atlas/nvmath/ConvexHull.h b/thirdparty/thekla_atlas/nvmath/ConvexHull.h
new file mode 100644
index 0000000000..6c2db5d73f
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/ConvexHull.h
@@ -0,0 +1,17 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MATH_CONVEXHULL_H
+#define NV_MATH_CONVEXHULL_H
+
+#include "nvmath.h"
+#include "nvcore/Array.h"
+
+namespace nv {
+    class Vector2;
+ 
+    void convexHull(const Array<Vector2> & input, Array<Vector2> & output, float epsilon = 0);
+
+} // namespace nv
+
+#endif // NV_MATH_CONVEXHULL_H
diff --git a/thirdparty/thekla_atlas/nvmath/Fitting.cpp b/thirdparty/thekla_atlas/nvmath/Fitting.cpp
new file mode 100644
index 0000000000..6cd5cb0f32
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Fitting.cpp
@@ -0,0 +1,1205 @@
+// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+
+#include "Fitting.h"
+#include "Vector.inl"
+#include "Plane.inl"
+
+#include "nvcore/Array.inl"
+#include "nvcore/Utils.h" // max, swap
+
+#include <float.h> // FLT_MAX
+//#include <vector>
+#include <string.h>
+
+using namespace nv;
+
+// @@ Move to EigenSolver.h
+
+// @@ We should be able to do something cheaper...
+static Vector3 estimatePrincipalComponent(const float * __restrict matrix)
+{
+	const Vector3 row0(matrix[0], matrix[1], matrix[2]);
+	const Vector3 row1(matrix[1], matrix[3], matrix[4]);
+	const Vector3 row2(matrix[2], matrix[4], matrix[5]);
+
+	float r0 = lengthSquared(row0);
+	float r1 = lengthSquared(row1);
+	float r2 = lengthSquared(row2);
+
+	if (r0 > r1 && r0 > r2) return row0;
+	if (r1 > r2) return row1;
+	return row2;
+}
+
+
+static inline Vector3 firstEigenVector_PowerMethod(const float *__restrict matrix)
+{
+    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
+    {
+        return Vector3(0.0f);
+    }
+
+    Vector3 v = estimatePrincipalComponent(matrix);
+
+    const int NUM = 8;
+    for (int i = 0; i < NUM; i++)
+    {
+        float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
+        float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];
+        float z = v.x * matrix[2] + v.y * matrix[4] + v.z * matrix[5];
+
+        float norm = max(max(x, y), z);
+
+        v = Vector3(x, y, z) / norm;
+    }
+
+    return v;
+}
+
+
+Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points)
+{
+    Vector3 centroid(0.0f);
+
+    for (int i = 0; i < n; i++)
+    {
+        centroid += points[i];
+    }
+    centroid /= float(n);
+
+    return centroid;
+}
+
+Vector3 nv::Fit::computeCentroid(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
+{
+    Vector3 centroid(0.0f);
+    float total = 0.0f;
+
+    for (int i = 0; i < n; i++)
+    {
+        total += weights[i];
+        centroid += weights[i]*points[i];
+    }
+    centroid /= total;
+
+    return centroid;
+}
+
+Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points)
+{
+    Vector4 centroid(0.0f);
+
+    for (int i = 0; i < n; i++)
+    {
+        centroid += points[i];
+    }
+    centroid /= float(n);
+
+    return centroid;
+}
+
+Vector4 nv::Fit::computeCentroid(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric)
+{
+    Vector4 centroid(0.0f);
+    float total = 0.0f;
+
+    for (int i = 0; i < n; i++)
+    {
+        total += weights[i];
+        centroid += weights[i]*points[i];
+    }
+    centroid /= total;
+
+    return centroid;
+}
+
+
+
+Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, float *__restrict covariance)
+{
+    // compute the centroid
+    Vector3 centroid = computeCentroid(n, points);
+
+    // compute covariance matrix
+    for (int i = 0; i < 6; i++)
+    {
+        covariance[i] = 0.0f;
+    }
+
+    for (int i = 0; i < n; i++)
+    {
+        Vector3 v = points[i] - centroid;
+
+        covariance[0] += v.x * v.x;
+        covariance[1] += v.x * v.y;
+        covariance[2] += v.x * v.z;
+        covariance[3] += v.y * v.y;
+        covariance[4] += v.y * v.z;
+        covariance[5] += v.z * v.z;
+    }
+
+    return centroid;
+}
+
+Vector3 nv::Fit::computeCovariance(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, float *__restrict covariance)
+{
+    // compute the centroid
+    Vector3 centroid = computeCentroid(n, points, weights, metric);
+
+    // compute covariance matrix
+    for (int i = 0; i < 6; i++)
+    {
+        covariance[i] = 0.0f;
+    }
+
+    for (int i = 0; i < n; i++)
+    {
+        Vector3 a = (points[i] - centroid) * metric;
+        Vector3 b = weights[i]*a;
+
+        covariance[0] += a.x * b.x;
+        covariance[1] += a.x * b.y;
+        covariance[2] += a.x * b.z;
+        covariance[3] += a.y * b.y;
+        covariance[4] += a.y * b.z;
+        covariance[5] += a.z * b.z;
+    }
+
+    return centroid;
+}
+
+Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, float *__restrict covariance)
+{
+    // compute the centroid
+    Vector4 centroid = computeCentroid(n, points);
+
+    // compute covariance matrix
+    for (int i = 0; i < 10; i++)
+    {
+        covariance[i] = 0.0f;
+    }
+
+    for (int i = 0; i < n; i++)
+    {
+        Vector4 v = points[i] - centroid;
+
+        covariance[0] += v.x * v.x;
+        covariance[1] += v.x * v.y;
+        covariance[2] += v.x * v.z;
+        covariance[3] += v.x * v.w;
+
+		covariance[4] += v.y * v.y;
+        covariance[5] += v.y * v.z;
+        covariance[6] += v.y * v.w;
+
+		covariance[7] += v.z * v.z;
+		covariance[8] += v.z * v.w;
+
+		covariance[9] += v.w * v.w;
+	}
+
+    return centroid;
+}
+
+Vector4 nv::Fit::computeCovariance(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric, float *__restrict covariance)
+{
+    // compute the centroid
+    Vector4 centroid = computeCentroid(n, points, weights, metric);
+
+    // compute covariance matrix
+    for (int i = 0; i < 10; i++)
+    {
+        covariance[i] = 0.0f;
+    }
+
+    for (int i = 0; i < n; i++)
+    {
+        Vector4 a = (points[i] - centroid) * metric;
+        Vector4 b = weights[i]*a;
+
+        covariance[0] += a.x * b.x;
+        covariance[1] += a.x * b.y;
+        covariance[2] += a.x * b.z;
+        covariance[3] += a.x * b.w;
+
+		covariance[4] += a.y * b.y;
+        covariance[5] += a.y * b.z;
+        covariance[6] += a.y * b.w;
+
+		covariance[7] += a.z * b.z;
+		covariance[8] += a.z * b.w;
+
+		covariance[9] += a.w * b.w;
+    }
+
+    return centroid;
+}
+
+
+
+Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points)
+{
+    float matrix[6];
+    computeCovariance(n, points, matrix);
+
+    return firstEigenVector_PowerMethod(matrix);
+}
+
+Vector3 nv::Fit::computePrincipalComponent_PowerMethod(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
+{
+    float matrix[6];
+    computeCovariance(n, points, weights, metric, matrix);
+
+    return firstEigenVector_PowerMethod(matrix);
+}
+
+
+
+static inline Vector3 firstEigenVector_EigenSolver3(const float *__restrict matrix)
+{
+    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
+    {
+        return Vector3(0.0f);
+    }
+
+    float eigenValues[3];
+    Vector3 eigenVectors[3];
+	if (!nv::Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors))
+	{
+		return Vector3(0.0f);
+	}
+
+	return eigenVectors[0];
+}
+
+Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points)
+{
+    float matrix[6];
+    computeCovariance(n, points, matrix);
+
+    return firstEigenVector_EigenSolver3(matrix);
+}
+
+Vector3 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric)
+{
+    float matrix[6];
+    computeCovariance(n, points, weights, metric, matrix);
+
+    return firstEigenVector_EigenSolver3(matrix);
+}
+
+
+
+static inline Vector4 firstEigenVector_EigenSolver4(const float *__restrict matrix)
+{
+    if (matrix[0] == 0 && matrix[4] == 0 && matrix[7] == 0&& matrix[9] == 0)
+    {
+        return Vector4(0.0f);
+    }
+
+    float eigenValues[4];
+    Vector4 eigenVectors[4];
+	if (!nv::Fit::eigenSolveSymmetric4(matrix, eigenValues, eigenVectors))
+	{
+		return Vector4(0.0f);
+	}
+
+	return eigenVectors[0];
+}
+
+Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points)
+{
+    float matrix[10];
+    computeCovariance(n, points, matrix);
+
+    return firstEigenVector_EigenSolver4(matrix);
+}
+
+Vector4 nv::Fit::computePrincipalComponent_EigenSolver(int n, const Vector4 *__restrict points, const float *__restrict weights, Vector4::Arg metric)
+{
+    float matrix[10];
+    computeCovariance(n, points, weights, metric, matrix);
+
+    return firstEigenVector_EigenSolver4(matrix);
+}
+
+
+
+void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R);
+
+Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points)
+{
+	// Store the points in an n x n matrix
+    Array<float> Q; Q.resize(n*n, 0.0f);
+	for (int i = 0; i < n; ++i)
+	{
+		Q[i*n+0] = points[i].x;
+		Q[i*n+1] = points[i].y;
+		Q[i*n+2] = points[i].z;
+	}
+
+	// Alloc space for the SVD outputs
+    Array<float> diag; diag.resize(n, 0.0f);
+    Array<float> R; R.resize(n*n, 0.0f);
+
+	ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
+
+	// Get the principal component
+	return Vector3(R[0], R[1], R[2]);
+}
+
+Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points)
+{
+	// Store the points in an n x n matrix
+    Array<float> Q; Q.resize(n*n, 0.0f);
+	for (int i = 0; i < n; ++i)
+	{
+		Q[i*n+0] = points[i].x;
+		Q[i*n+1] = points[i].y;
+		Q[i*n+2] = points[i].z;
+		Q[i*n+3] = points[i].w;
+	}
+
+	// Alloc space for the SVD outputs
+    Array<float> diag; diag.resize(n, 0.0f);
+    Array<float> R; R.resize(n*n, 0.0f);
+
+	ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
+
+	// Get the principal component
+	return Vector4(R[0], R[1], R[2], R[3]);
+}
+
+
+
+Plane nv::Fit::bestPlane(int n, const Vector3 *__restrict points)
+{
+    // compute the centroid and covariance
+    float matrix[6];
+    Vector3 centroid = computeCovariance(n, points, matrix);
+
+    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
+    {
+        // If no plane defined, then return a horizontal plane.
+        return Plane(Vector3(0, 0, 1), centroid);
+    }
+
+    float eigenValues[3];
+    Vector3 eigenVectors[3];
+    if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
+        // If no plane defined, then return a horizontal plane.
+        return Plane(Vector3(0, 0, 1), centroid);
+    }
+
+    return Plane(eigenVectors[2], centroid);
+}
+
+bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON*/)
+{
+    // compute the centroid and covariance
+    float matrix[6];
+    computeCovariance(n, points, matrix);
+
+    float eigenValues[3];
+    Vector3 eigenVectors[3];
+    if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
+        return false;
+    }
+
+    return eigenValues[2] < epsilon;
+}
+
+
+
+// Tridiagonal solver from Charles Bloom. 
+// Householder transforms followed by QL decomposition. 
+// Seems to be based on the code from Numerical Recipes in C.
+
+static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd);
+static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd);
+
+bool nv::Fit::eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3])
+{
+    nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
+
+    float subd[3];
+    float diag[3];
+    float work[3][3];
+
+    work[0][0] = matrix[0];
+    work[0][1] = work[1][0] = matrix[1];
+    work[0][2] = work[2][0] = matrix[2];
+    work[1][1] = matrix[3];
+    work[1][2] = work[2][1] = matrix[4];
+    work[2][2] = matrix[5];
+
+    EigenSolver3_Tridiagonal(work, diag, subd);
+    if (!EigenSolver3_QLAlgorithm(work, diag, subd))
+    {
+        for (int i = 0; i < 3; i++) {
+            eigenValues[i] = 0;
+            eigenVectors[i] = Vector3(0);
+        }
+        return false;
+    }
+
+    for (int i = 0; i < 3; i++) {
+        eigenValues[i] = (float)diag[i];
+    }
+
+    // eigenvectors are the columns; make them the rows :
+
+    for (int i=0; i < 3; i++)
+    {
+        for (int j = 0; j < 3; j++)
+        {
+            eigenVectors[j].component[i] = (float) work[i][j];
+        }
+    }
+
+    // shuffle to sort by singular value :
+    if (eigenValues[2] > eigenValues[0] && eigenValues[2] > eigenValues[1])
+    {
+        swap(eigenValues[0], eigenValues[2]);
+        swap(eigenVectors[0], eigenVectors[2]);
+    }
+    if (eigenValues[1] > eigenValues[0])
+    {
+        swap(eigenValues[0], eigenValues[1]);
+        swap(eigenVectors[0], eigenVectors[1]);
+    }
+    if (eigenValues[2] > eigenValues[1])
+    {
+        swap(eigenValues[1], eigenValues[2]);
+        swap(eigenVectors[1], eigenVectors[2]);
+    }
+
+    nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2]);
+    nvDebugCheck(eigenValues[1] >= eigenValues[2]);
+
+    return true;
+}
+
+static void EigenSolver3_Tridiagonal(float mat[3][3], float * diag, float * subd)
+{
+    // Householder reduction T = Q^t M Q
+    //   Input:   
+    //     mat, symmetric 3x3 matrix M
+    //   Output:  
+    //     mat, orthogonal matrix Q
+    //     diag, diagonal entries of T
+    //     subd, subdiagonal entries of T (T is symmetric)
+    const float epsilon = 1e-08f;
+
+    float a = mat[0][0];
+    float b = mat[0][1];
+    float c = mat[0][2];
+    float d = mat[1][1];
+    float e = mat[1][2];
+    float f = mat[2][2];
+
+    diag[0] = a;
+    subd[2] = 0.f;
+    if (fabsf(c) >= epsilon)
+    {
+        const float ell = sqrtf(b*b+c*c);
+        b /= ell;
+        c /= ell;
+        const float q = 2*b*e+c*(f-d);
+        diag[1] = d+c*q;
+        diag[2] = f-c*q;
+        subd[0] = ell;
+        subd[1] = e-b*q;
+        mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0;
+        mat[1][0] = 0; mat[1][1] = b; mat[1][2] = c;
+        mat[2][0] = 0; mat[2][1] = c; mat[2][2] = -b;
+    }
+    else
+    {
+        diag[1] = d;
+        diag[2] = f;
+        subd[0] = b;
+        subd[1] = e;
+        mat[0][0] = 1; mat[0][1] = 0; mat[0][2] = 0;
+        mat[1][0] = 0; mat[1][1] = 1; mat[1][2] = 0;
+        mat[2][0] = 0; mat[2][1] = 0; mat[2][2] = 1;
+    }
+}
+
+static bool EigenSolver3_QLAlgorithm(float mat[3][3], float * diag, float * subd)
+{
+    // QL iteration with implicit shifting to reduce matrix from tridiagonal
+    // to diagonal
+    const int maxiter = 32;
+
+    for (int ell = 0; ell < 3; ell++)
+    {
+        int iter;
+        for (iter = 0; iter < maxiter; iter++)
+        {
+            int m;
+            for (m = ell; m <= 1; m++)
+            {
+                float dd = fabsf(diag[m]) + fabsf(diag[m+1]);
+                if ( fabsf(subd[m]) + dd == dd )
+                    break;
+            }
+            if ( m == ell )
+                break;
+
+            float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
+            float r = sqrtf(g*g+1);
+            if ( g < 0 )
+                g = diag[m]-diag[ell]+subd[ell]/(g-r);
+            else
+                g = diag[m]-diag[ell]+subd[ell]/(g+r);
+            float s = 1, c = 1, p = 0;
+            for (int i = m-1; i >= ell; i--)
+            {
+                float f = s*subd[i], b = c*subd[i];
+                if ( fabsf(f) >= fabsf(g) )
+                {
+                    c = g/f;
+                    r = sqrtf(c*c+1);
+                    subd[i+1] = f*r;
+                    c *= (s = 1/r);
+                }
+                else
+                {
+                    s = f/g;
+                    r = sqrtf(s*s+1);
+                    subd[i+1] = g*r;
+                    s *= (c = 1/r);
+                }
+                g = diag[i+1]-p;
+                r = (diag[i]-g)*s+2*b*c;
+                p = s*r;
+                diag[i+1] = g+p;
+                g = c*r-b;
+
+                for (int k = 0; k < 3; k++)
+                {
+                    f = mat[k][i+1];
+                    mat[k][i+1] = s*mat[k][i]+c*f;
+                    mat[k][i] = c*mat[k][i]-s*f;
+                }
+            }
+            diag[ell] -= p;
+            subd[ell] = g;
+            subd[m] = 0;
+        }
+
+        if ( iter == maxiter )
+            // should not get here under normal circumstances
+            return false;
+    }
+
+    return true;
+}
+
+
+
+// Tridiagonal solver for 4x4 symmetric matrices.
+
+static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd);
+static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd);
+
+bool nv::Fit::eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4])
+{
+    nvDebugCheck(matrix != NULL && eigenValues != NULL && eigenVectors != NULL);
+
+    float subd[4];
+    float diag[4];
+    float work[4][4];
+
+    work[0][0] = matrix[0];
+    work[0][1] = work[1][0] = matrix[1];
+    work[0][2] = work[2][0] = matrix[2];
+    work[0][3] = work[3][0] = matrix[3];
+    work[1][1] = matrix[4];
+    work[1][2] = work[2][1] = matrix[5];
+    work[1][3] = work[3][1] = matrix[6];
+    work[2][2] = matrix[7];
+    work[2][3] = work[3][2] = matrix[8];
+    work[3][3] = matrix[9];
+
+    EigenSolver4_Tridiagonal(work, diag, subd);
+    if (!EigenSolver4_QLAlgorithm(work, diag, subd))
+    {
+        for (int i = 0; i < 4; i++) {
+            eigenValues[i] = 0;
+            eigenVectors[i] = Vector4(0);
+        }
+        return false;
+    }
+
+    for (int i = 0; i < 4; i++) {
+        eigenValues[i] = (float)diag[i];
+    }
+
+    // eigenvectors are the columns; make them the rows
+
+    for (int i = 0; i < 4; i++)
+    {
+        for (int j = 0; j < 4; j++)
+        {
+            eigenVectors[j].component[i] = (float) work[i][j];
+        }
+    }
+
+    // sort by singular value
+
+	for (int i = 0; i < 3; ++i)
+	{
+		for (int j = i+1; j < 4; ++j)
+		{
+			if (eigenValues[j] > eigenValues[i])
+			{
+				swap(eigenValues[i], eigenValues[j]);
+				swap(eigenVectors[i], eigenVectors[j]);
+			}
+		}
+	}
+
+    nvDebugCheck(eigenValues[0] >= eigenValues[1] && eigenValues[0] >= eigenValues[2] && eigenValues[0] >= eigenValues[3]);
+    nvDebugCheck(eigenValues[1] >= eigenValues[2] && eigenValues[1] >= eigenValues[3]);
+    nvDebugCheck(eigenValues[2] >= eigenValues[2]);
+
+    return true;
+}
+
+#include "nvmath/Matrix.inl"
+
+inline float signNonzero(float x)
+{
+	return (x >= 0.0f) ? 1.0f : -1.0f;
+}
+
+static void EigenSolver4_Tridiagonal(float mat[4][4], float * diag, float * subd)
+{
+    // Householder reduction T = Q^t M Q
+    //   Input:   
+    //     mat, symmetric 3x3 matrix M
+    //   Output:  
+    //     mat, orthogonal matrix Q
+    //     diag, diagonal entries of T
+    //     subd, subdiagonal entries of T (T is symmetric)
+
+	static const int n = 4;
+
+	// Set epsilon relative to size of elements in matrix
+	static const float relEpsilon = 1e-6f;
+	float maxElement = FLT_MAX;
+	for (int i = 0; i < n; ++i)
+		for (int j = 0; j < n; ++j)
+			maxElement = max(maxElement, fabsf(mat[i][j]));
+	float epsilon = relEpsilon * maxElement;
+
+	// Iterative algorithm, works for any size of matrix but might be slower than
+	// a closed-form solution for symmetric 4x4 matrices.  Based on this article:
+	// http://en.wikipedia.org/wiki/Householder_transformation#Tridiagonalization
+
+	Matrix A, Q(identity);
+	memcpy(&A, mat, sizeof(float)*n*n);
+
+	// We proceed from left to right, making the off-tridiagonal entries zero in
+	// one column of the matrix at a time.
+	for (int k = 0; k < n - 2; ++k)
+	{
+		float sum = 0.0f;
+		for (int j = k+1; j < n; ++j)
+			sum += A(j,k)*A(j,k);
+		float alpha = -signNonzero(A(k+1,k)) * sqrtf(sum);
+		float r = sqrtf(0.5f * (alpha*alpha - A(k+1,k)*alpha));
+
+		// If r is zero, skip this column - already in tridiagonal form
+		if (fabsf(r) < epsilon)
+			continue;
+
+		float v[n] = {};
+		v[k+1] = 0.5f * (A(k+1,k) - alpha) / r;
+		for (int j = k+2; j < n; ++j)
+			v[j] = 0.5f * A(j,k) / r;
+
+		Matrix P(identity);
+		for (int i = 0; i < n; ++i)
+			for (int j = 0; j < n; ++j)
+				P(i,j) -= 2.0f * v[i] * v[j];
+
+		A = mul(mul(P, A), P);
+		Q = mul(Q, P);
+	}
+
+	nvDebugCheck(fabsf(A(2,0)) < epsilon);
+	nvDebugCheck(fabsf(A(0,2)) < epsilon);
+	nvDebugCheck(fabsf(A(3,0)) < epsilon);
+	nvDebugCheck(fabsf(A(0,3)) < epsilon);
+	nvDebugCheck(fabsf(A(3,1)) < epsilon);
+	nvDebugCheck(fabsf(A(1,3)) < epsilon);
+
+	for (int i = 0; i < n; ++i)
+		diag[i] = A(i,i);
+	for (int i = 0; i < n - 1; ++i)
+		subd[i] = A(i+1,i);
+	subd[n-1] = 0.0f;
+
+	memcpy(mat, &Q, sizeof(float)*n*n);
+}
+
+static bool EigenSolver4_QLAlgorithm(float mat[4][4], float * diag, float * subd)
+{
+    // QL iteration with implicit shifting to reduce matrix from tridiagonal
+    // to diagonal
+    const int maxiter = 32;
+
+    for (int ell = 0; ell < 4; ell++)
+    {
+        int iter;
+        for (iter = 0; iter < maxiter; iter++)
+        {
+            int m;
+            for (m = ell; m < 3; m++)
+            {
+                float dd = fabsf(diag[m]) + fabsf(diag[m+1]);
+                if ( fabsf(subd[m]) + dd == dd )
+                    break;
+            }
+            if ( m == ell )
+                break;
+
+            float g = (diag[ell+1]-diag[ell])/(2*subd[ell]);
+            float r = sqrtf(g*g+1);
+            if ( g < 0 )
+                g = diag[m]-diag[ell]+subd[ell]/(g-r);
+            else
+                g = diag[m]-diag[ell]+subd[ell]/(g+r);
+            float s = 1, c = 1, p = 0;
+            for (int i = m-1; i >= ell; i--)
+            {
+                float f = s*subd[i], b = c*subd[i];
+                if ( fabsf(f) >= fabsf(g) )
+                {
+                    c = g/f;
+                    r = sqrtf(c*c+1);
+                    subd[i+1] = f*r;
+                    c *= (s = 1/r);
+                }
+                else
+                {
+                    s = f/g;
+                    r = sqrtf(s*s+1);
+                    subd[i+1] = g*r;
+                    s *= (c = 1/r);
+                }
+                g = diag[i+1]-p;
+                r = (diag[i]-g)*s+2*b*c;
+                p = s*r;
+                diag[i+1] = g+p;
+                g = c*r-b;
+
+                for (int k = 0; k < 4; k++)
+                {
+                    f = mat[k][i+1];
+                    mat[k][i+1] = s*mat[k][i]+c*f;
+                    mat[k][i] = c*mat[k][i]-s*f;
+                }
+            }
+            diag[ell] -= p;
+            subd[ell] = g;
+            subd[m] = 0;
+        }
+
+        if ( iter == maxiter )
+            // should not get here under normal circumstances
+            return false;
+    }
+
+    return true;
+}
+
+
+
+int nv::Fit::compute4Means(int n, const Vector3 *__restrict points, const float *__restrict weights, Vector3::Arg metric, Vector3 *__restrict cluster)
+{
+    // Compute principal component.
+    float matrix[6];
+    Vector3 centroid = computeCovariance(n, points, weights, metric, matrix);
+    Vector3 principal = firstEigenVector_PowerMethod(matrix);
+
+    // Pick initial solution.
+    int mini, maxi;
+    mini = maxi = 0;
+
+    float mindps, maxdps;
+    mindps = maxdps = dot(points[0] - centroid, principal);
+
+    for (int i = 1; i < n; ++i)
+    {
+        float dps = dot(points[i] - centroid, principal);
+
+        if (dps < mindps) {
+            mindps = dps;
+            mini = i;
+        }
+        else {
+            maxdps = dps;
+            maxi = i;
+        }
+    }
+
+    cluster[0] = centroid + mindps * principal;
+    cluster[1] = centroid + maxdps * principal;
+    cluster[2] = (2.0f * cluster[0] + cluster[1]) / 3.0f;
+    cluster[3] = (2.0f * cluster[1] + cluster[0]) / 3.0f;
+
+    // Now we have to iteratively refine the clusters.
+    while (true)
+    {
+        Vector3 newCluster[4] = { Vector3(0.0f), Vector3(0.0f), Vector3(0.0f), Vector3(0.0f) };
+        float total[4] = {0, 0, 0, 0};
+
+        for (int i = 0; i < n; ++i)
+        {
+            // Find nearest cluster.
+            int nearest = 0;
+            float mindist = FLT_MAX;
+            for (int j = 0; j < 4; j++)
+            {
+                float dist = lengthSquared((cluster[j] - points[i]) * metric);
+                if (dist < mindist)
+                {
+                    mindist = dist;
+                    nearest = j;
+                }
+            }
+
+            newCluster[nearest] += weights[i] * points[i];
+            total[nearest] += weights[i];
+        }
+
+        for (int j = 0; j < 4; j++)
+        {
+            if (total[j] != 0)
+                newCluster[j] /= total[j];
+        }
+
+        if (equal(cluster[0], newCluster[0]) && equal(cluster[1], newCluster[1]) && 
+            equal(cluster[2], newCluster[2]) && equal(cluster[3], newCluster[3]))
+        {
+            return (total[0] != 0) + (total[1] != 0) + (total[2] != 0) + (total[3] != 0);
+        }
+
+        cluster[0] = newCluster[0];
+        cluster[1] = newCluster[1];
+        cluster[2] = newCluster[2];
+        cluster[3] = newCluster[3];
+
+        // Sort clusters by weight.
+        for (int i = 0; i < 4; i++)
+        {
+            for (int j = i; j > 0 && total[j] > total[j - 1]; j--)
+            {
+                swap( total[j], total[j - 1] );
+                swap( cluster[j], cluster[j - 1] );
+            }
+        }
+    }
+}
+
+
+
+// Adaptation of James Arvo's SVD code, as found in ZOH.
+
+inline float Sqr(float x) { return x*x; }
+
+inline float svd_pythag( float a, float b )
+{
+	float at = fabsf(a);
+	float bt = fabsf(b);
+	if( at > bt )
+		return at * sqrtf( 1.0f + Sqr( bt / at ) );
+	else if( bt > 0.0f )
+		return bt * sqrtf( 1.0f + Sqr( at / bt ) );
+	else return 0.0f;
+}
+
+inline float SameSign( float a, float b ) 
+{
+	float t;
+	if( b >= 0.0f ) t = fabsf( a );
+	else t = -fabsf( a );
+	return t;
+}
+
+void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R)
+{
+	static const int MaxIterations = 30;
+
+	int    i, j, k, l, p, q, iter;
+	float  c, f, h, s, x, y, z;
+	float  norm  = 0.0f;
+	float  g     = 0.0f;
+	float  scale = 0.0f;
+
+    Array<float> temp; temp.resize(cols, 0.0f);
+
+	for( i = 0; i < cols; i++ ) 
+	{
+		temp[i] = scale * g;
+		scale   = 0.0f;
+		g       = 0.0f;
+		s       = 0.0f;
+		l       = i + 1;
+
+		if( i < rows )
+		{
+			for( k = i; k < rows; k++ ) scale += fabsf( Q[k*cols+i] );
+			if( scale != 0.0f ) 
+			{
+				for( k = i; k < rows; k++ ) 
+				{
+					Q[k*cols+i] /= scale;
+					s += Sqr( Q[k*cols+i] );
+				}
+				f = Q[i*cols+i];
+				g = -SameSign( sqrtf(s), f );
+				h = f * g - s;
+				Q[i*cols+i] = f - g;
+				if( i != cols - 1 )
+				{
+					for( j = l; j < cols; j++ ) 
+					{
+						s = 0.0f;
+						for( k = i; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
+						f = s / h;
+						for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
+					}
+				}
+				for( k = i; k < rows; k++ ) Q[k*cols+i] *= scale;
+			}
+		}
+
+		diag[i] = scale * g;
+		g       = 0.0f;
+		s       = 0.0f;
+		scale   = 0.0f;
+
+		if( i < rows && i != cols - 1 ) 
+		{
+			for( k = l; k < cols; k++ ) scale += fabsf( Q[i*cols+k] );
+			if( scale != 0.0f ) 
+			{
+				for( k = l; k < cols; k++ ) 
+				{
+					Q[i*cols+k] /= scale;
+					s += Sqr( Q[i*cols+k] );
+				}
+				f = Q[i*cols+l];
+				g = -SameSign( sqrtf(s), f );
+				h = f * g - s;
+				Q[i*cols+l] = f - g;
+				for( k = l; k < cols; k++ ) temp[k] = Q[i*cols+k] / h;
+				if( i != rows - 1 ) 
+				{
+					for( j = l; j < rows; j++ ) 
+					{
+						s = 0.0f;
+						for( k = l; k < cols; k++ ) s += Q[j*cols+k] * Q[i*cols+k];
+						for( k = l; k < cols; k++ ) Q[j*cols+k] += s * temp[k];
+					}
+				}
+				for( k = l; k < cols; k++ ) Q[i*cols+k] *= scale;
+			}
+		}
+		norm = max( norm, fabsf( diag[i] ) + fabsf( temp[i] ) );
+	}
+
+
+	for( i = cols - 1; i >= 0; i-- ) 
+	{
+		if( i < cols - 1 ) 
+		{
+			if( g != 0.0f ) 
+			{
+				for( j = l; j < cols; j++ ) R[i*cols+j] = ( Q[i*cols+j] / Q[i*cols+l] ) / g;
+				for( j = l; j < cols; j++ ) 
+				{
+					s = 0.0f;
+					for( k = l; k < cols; k++ ) s += Q[i*cols+k] * R[j*cols+k];
+					for( k = l; k < cols; k++ ) R[j*cols+k] += s * R[i*cols+k];
+				}
+			}
+			for( j = l; j < cols; j++ ) 
+			{
+				R[i*cols+j] = 0.0f;
+				R[j*cols+i] = 0.0f;
+			}
+		}
+		R[i*cols+i] = 1.0f;
+		g = temp[i];
+		l = i;
+	}
+
+
+	for( i = cols - 1; i >= 0; i-- ) 
+	{
+		l = i + 1;
+		g = diag[i];
+		if( i < cols - 1 ) for( j = l; j < cols; j++ ) Q[i*cols+j] = 0.0f;
+		if( g != 0.0f ) 
+		{
+			g = 1.0f / g;
+			if( i != cols - 1 ) 
+			{
+				for( j = l; j < cols; j++ ) 
+				{
+					s = 0.0f;
+					for( k = l; k < rows; k++ ) s += Q[k*cols+i] * Q[k*cols+j];
+					f = ( s / Q[i*cols+i] ) * g;
+					for( k = i; k < rows; k++ ) Q[k*cols+j] += f * Q[k*cols+i];
+				}
+			}
+			for( j = i; j < rows; j++ ) Q[j*cols+i] *= g;
+		} 
+		else 
+		{
+			for( j = i; j < rows; j++ ) Q[j*cols+i] = 0.0f;
+		}
+		Q[i*cols+i] += 1.0f;
+	}
+
+
+	for( k = cols - 1; k >= 0; k-- ) 
+	{
+		for( iter = 1; iter <= MaxIterations; iter++ ) 
+		{
+			int jump = 0;
+
+			for( l = k; l >= 0; l-- )
+			{
+				q = l - 1;
+				if( fabsf( temp[l] ) + norm == norm ) { jump = 1; break; }
+				if( fabsf( diag[q] ) + norm == norm ) { jump = 0; break; }
+			}
+
+			if( !jump )
+			{
+				c = 0.0f;
+				s = 1.0f;
+				for( i = l; i <= k; i++ )
+				{
+					f = s * temp[i];
+					temp[i] *= c;
+					if( fabsf( f ) + norm == norm ) break;
+					g = diag[i];
+					h = svd_pythag( f, g );
+					diag[i] = h;
+					h = 1.0f / h;
+					c = g * h;
+					s = -f * h;
+					for( j = 0; j < rows; j++ ) 
+					{
+						y = Q[j*cols+q];
+						z = Q[j*cols+i];
+						Q[j*cols+q] = y * c + z * s;
+						Q[j*cols+i] = z * c - y * s;
+					}
+				}
+			}
+
+			z = diag[k];
+			if( l == k ) 
+			{
+				if( z < 0.0f ) 
+				{
+					diag[k] = -z;
+					for( j = 0; j < cols; j++ ) R[k*cols+j] *= -1.0f; 
+				}
+				break;
+			}
+			if( iter >= MaxIterations ) return;
+			x = diag[l];
+			q = k - 1;
+			y = diag[q];
+			g = temp[q];
+			h = temp[k];
+			f = ( ( y - z ) * ( y + z ) + ( g - h ) * ( g + h ) ) / ( 2.0f * h * y );
+			g = svd_pythag( f, 1.0f );
+			f = ( ( x - z ) * ( x + z ) + h * ( ( y / ( f + SameSign( g, f ) ) ) - h ) ) / x;
+			c = 1.0f;
+			s = 1.0f;
+			for( j = l; j <= q; j++ ) 
+			{
+				i = j + 1;
+				g = temp[i];
+				y = diag[i];
+				h = s * g;
+				g = c * g;
+				z = svd_pythag( f, h );
+				temp[j] = z;
+				c = f / z;
+				s = h / z;
+				f = x * c + g * s;
+				g = g * c - x * s;
+				h = y * s;
+				y = y * c;
+				for( p = 0; p < cols; p++ ) 
+				{
+					x = R[j*cols+p];
+					z = R[i*cols+p];
+					R[j*cols+p] = x * c + z * s;
+					R[i*cols+p] = z * c - x * s;
+				}
+				z = svd_pythag( f, h );
+				diag[j] = z;
+				if( z != 0.0f ) 
+				{
+					z = 1.0f / z;
+					c = f * z;
+					s = h * z;
+				}
+				f = c * g + s * y;
+				x = c * y - s * g;
+				for( p = 0; p < rows; p++ ) 
+				{
+					y = Q[p*cols+j];
+					z = Q[p*cols+i];
+					Q[p*cols+j] = y * c + z * s;
+					Q[p*cols+i] = z * c - y * s;
+				}
+			}
+			temp[l] = 0.0f;
+			temp[k] = f;
+			diag[k] = x;
+		}
+	}
+
+	// Sort the singular values into descending order.
+
+	for( i = 0; i < cols - 1; i++ )
+	{
+		float biggest = diag[i];  // Biggest singular value so far.
+		int   bindex  = i;        // The row/col it occurred in.
+		for( j = i + 1; j < cols; j++ )
+		{
+			if( diag[j] > biggest ) 
+			{
+				biggest = diag[j];
+				bindex  = j;
+			}            
+		}
+		if( bindex != i )  // Need to swap rows and columns.
+		{
+			// Swap columns in Q.
+			for (int j = 0; j < rows; ++j)
+				swap(Q[j*cols+i], Q[j*cols+bindex]);
+
+			// Swap rows in R.
+			for (int j = 0; j < rows; ++j)
+				swap(R[i*cols+j], R[bindex*cols+j]);
+
+			// Swap elements in diag.
+			swap(diag[i], diag[bindex]);
+		}
+	}
+}
diff --git a/thirdparty/thekla_atlas/nvmath/Fitting.h b/thirdparty/thekla_atlas/nvmath/Fitting.h
new file mode 100644
index 0000000000..7a88cd28fd
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Fitting.h
@@ -0,0 +1,50 @@
+// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MATH_FITTING_H
+#define NV_MATH_FITTING_H
+
+#include "Vector.h"
+#include "Plane.h"
+
+namespace nv
+{
+    namespace Fit
+    {
+        Vector3 computeCentroid(int n, const Vector3 * points);
+        Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
+
+        Vector4 computeCentroid(int n, const Vector4 * points);
+        Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
+
+        Vector3 computeCovariance(int n, const Vector3 * points, float * covariance);
+        Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance);
+
+        Vector4 computeCovariance(int n, const Vector4 * points, float * covariance);
+        Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance);
+
+        Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points);
+        Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
+
+        Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points);
+        Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
+
+		Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points);
+        Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
+
+        Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points);
+        Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points);
+
+        Plane bestPlane(int n, const Vector3 * points);
+        bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON);
+
+        bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]);
+        bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]);
+
+        // Returns number of clusters [1-4].
+        int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster);
+    }
+
+} // nv namespace
+
+#endif // NV_MATH_FITTING_H
diff --git a/thirdparty/thekla_atlas/nvmath/KahanSum.h b/thirdparty/thekla_atlas/nvmath/KahanSum.h
new file mode 100644
index 0000000000..18d475e7cb
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/KahanSum.h
@@ -0,0 +1,39 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MATH_KAHANSUM_H
+#define NV_MATH_KAHANSUM_H
+
+#include "nvmath.h"
+
+namespace nv
+{
+
+    class KahanSum
+    {
+    public:
+        KahanSum() : accum(0.0f), err(0) {};
+
+        void add(float f)
+        {
+            float compensated = f + err;
+            float tmp = accum + compensated;
+            err = accum - tmp;
+            err += compensated;
+            accum = tmp;
+        }
+
+        float sum() const
+        {
+            return accum;
+        }
+
+    private:
+        float accum;
+        float err;
+    };
+
+} // nv namespace
+
+
+#endif // NV_MATH_KAHANSUM_H
diff --git a/thirdparty/thekla_atlas/nvmath/Matrix.cpp b/thirdparty/thekla_atlas/nvmath/Matrix.cpp
new file mode 100644
index 0000000000..29bd19f5f8
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Matrix.cpp
@@ -0,0 +1,441 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "Matrix.inl"
+#include "Vector.inl"
+
+#include "nvcore/Array.inl"
+
+#include <float.h>
+
+#if !NV_CC_MSVC && !NV_OS_ORBIS
+#include <alloca.h>
+#endif
+
+using namespace nv;
+
+
+// Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise
+// permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above;
+// indx[1..n] is an output vector that records the row permutation effected by the partial
+// pivoting; d is output as -1 depending on whether the number of row interchanges was even
+// or odd, respectively. This routine is used in combination with lubksb to solve linear equations
+// or invert a matrix.
+static bool ludcmp(float **a, int n, int *indx, float *d)
+{
+    const float TINY = 1.0e-20f;
+
+    float * vv = (float*)alloca(sizeof(float) * n);    // vv stores the implicit scaling of each row.
+
+    *d = 1.0; // No row interchanges yet.
+    for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information.
+    
+        float big = 0.0;
+        for (int j = 0; j < n; j++) {
+            big = max(big, fabsf(a[i][j]));
+        }
+        if (big == 0) {
+            return false;   // Singular matrix
+        }
+        
+        // No nonzero largest element.
+        vv[i] = 1.0f / big; // Save the scaling.
+    }
+
+    for (int j = 0; j < n; j++) {       // This is the loop over columns of Crout's method.
+        for (int i = 0; i < j; i++) {   // This is equation (2.3.12) except for i = j.
+            float sum = a[i][j];
+            for (int k = 0; k < i; k++) sum -= a[i][k]*a[k][j];
+            a[i][j] = sum;
+        }
+
+        int imax = -1;
+        float big = 0.0;                // Initialize for the search for largest pivot element.
+        for (int i = j; i < n; i++) {   // This is i = j of equation (2.3.12) and i = j+ 1 : : : N
+            float sum = a[i][j];              // of equation (2.3.13).
+            for (int k = 0; k < j; k++) {
+                sum -= a[i][k]*a[k][j];
+            }
+            a[i][j]=sum;
+
+            float dum = vv[i]*fabs(sum);
+            if (dum >= big) {
+                // Is the figure of merit for the pivot better than the best so far?
+                big = dum;
+                imax = i;
+            }
+        }
+        nvDebugCheck(imax != -1);
+
+        if (j != imax) {                // Do we need to interchange rows?
+            for (int k = 0; k < n; k++) {   // Yes, do so...
+                swap(a[imax][k], a[j][k]);
+            }
+            *d = -(*d); // ...and change the parity of d.
+            vv[imax]=vv[j]; // Also interchange the scale factor.
+        }
+
+        indx[j]=imax;
+        if (a[j][j] == 0.0) a[j][j] = TINY;
+        
+        // If the pivot element is zero the matrix is singular (at least to the precision of the
+        // algorithm). For some applications on singular matrices, it is desirable to substitute
+        // TINY for zero.
+        if (j != n-1) { // Now, finally, divide by the pivot element.
+            float dum = 1.0f / a[j][j];
+            for (int i = j+1; i < n; i++) a[i][j] *= dum;
+        }
+    } // Go back for the next column in the reduction.
+
+    return true;
+}
+
+
+// Solves the set of n linear equations Ax = b. Here a[1..n][1..n] is input, not as the matrix
+// A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input
+// as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector
+// B, and returns with the solution vector X. a, n, and indx are not modified by this routine
+// and can be left in place for successive calls with different right-hand sides b. This routine takes
+// into account the possibility that b will begin with many zero elements, so it is efficient for use
+// in matrix inversion.
+static void lubksb(float **a, int n, int *indx, float b[])
+{
+    int ii = 0;
+    for (int i=0; i<n; i++) {   // When ii is set to a positive value, it will become 
+        int ip = indx[i];       // the index of the first nonvanishing element of b. We now 
+        float sum = b[ip];      // do the forward substitution, equation (2.3.6). The 
+        b[ip] = b[i];           // only new wrinkle is to unscramble the permutation as we go.
+        if (ii != 0) {
+            for (int j = ii-1; j < i; j++) sum -= a[i][j]*b[j];
+        }
+        else if (sum != 0.0f) {
+            ii = i+1;             // A nonzero element was encountered, so from now on we 
+        }
+        b[i] = sum;             // will have to do the sums in the loop above.
+    }
+    for (int i=n-1; i>=0; i--) {  // Now we do the backsubstitution, equation (2.3.7).
+        float sum = b[i];
+        for (int j = i+1; j < n; j++) {
+            sum -= a[i][j]*b[j];
+        }
+        b[i] = sum/a[i][i];     // Store a component of the solution vector X.
+    } // All done!
+}
+
+
+bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x)
+{
+    nvDebugCheck(x != NULL);
+
+    float m[4][4];
+    float *a[4] = {m[0], m[1], m[2], m[3]};
+    int idx[4];
+    float d;
+
+    for (int y = 0; y < 4; y++) {
+        for (int x = 0; x < 4; x++) {
+            a[x][y] = A(x, y);
+        }
+    }
+
+    // Create LU decomposition.
+    if (!ludcmp(a, 4, idx, &d)) {
+        // Singular matrix.
+        return false;
+    }
+
+    // Init solution.
+    *x = b;
+
+    // Do back substitution.
+    lubksb(a, 4, idx, x->component);
+
+    return true;
+}
+
+// @@ Not tested.
+Matrix nv::inverseLU(const Matrix & A)
+{
+    Vector4 Ai[4];
+
+    solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]);
+    solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]);
+    solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]);
+    solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]);
+
+    return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]);
+}
+
+
+
+bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x)
+{
+    nvDebugCheck(x != NULL);
+
+    float m[3][3];
+    float *a[3] = {m[0], m[1], m[2]};
+    int idx[3];
+    float d;
+
+    for (int y = 0; y < 3; y++) {
+        for (int x = 0; x < 3; x++) {
+            a[x][y] = A(x, y);
+        }
+    }
+
+    // Create LU decomposition.
+    if (!ludcmp(a, 3, idx, &d)) {
+        // Singular matrix.
+        return false;
+    }
+
+    // Init solution.
+    *x = b;
+
+    // Do back substitution.
+    lubksb(a, 3, idx, x->component);
+
+    return true;
+}
+
+
+bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x)
+{
+    nvDebugCheck(x != NULL);
+
+    *x = transform(inverseCramer(A), b);
+    
+    return true; // @@ Return false if determinant(A) == 0 !
+}
+
+bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
+{
+    nvDebugCheck(x != NULL);
+
+    const float det = A.determinant();
+    if (equal(det, 0.0f)) {   // @@ Use input epsilon.
+        return false;
+    }
+
+    Matrix3 Ai = inverseCramer(A);
+
+    *x = transform(Ai, b);
+    
+    return true;
+}
+
+
+
+// Inverse using gaussian elimination. From Jon's code.
+Matrix nv::inverse(const Matrix & m) {
+
+    Matrix A = m;
+    Matrix B(identity);
+
+    int i, j, k;
+    float max, t, det, pivot;
+
+    det = 1.0;
+    for (i=0; i<4; i++) {               /* eliminate in column i, below diag */
+        max = -1.;
+        for (k=i; k<4; k++)             /* find pivot for column i */
+            if (fabs(A(k, i)) > max) {
+                max = fabs(A(k, i));
+                j = k;
+            }
+        if (max<=0.) return B;         /* if no nonzero pivot, PUNT */
+        if (j!=i) {                     /* swap rows i and j */
+            for (k=i; k<4; k++)
+                swap(A(i, k), A(j, k));
+            for (k=0; k<4; k++)
+                swap(B(i, k), B(j, k));
+            det = -det;
+        }
+        pivot = A(i, i);
+        det *= pivot;
+        for (k=i+1; k<4; k++)           /* only do elems to right of pivot */
+            A(i, k) /= pivot;
+        for (k=0; k<4; k++)
+            B(i, k) /= pivot;
+        /* we know that A(i, i) will be set to 1, so don't bother to do it */
+
+        for (j=i+1; j<4; j++) {         /* eliminate in rows below i */
+            t = A(j, i);                /* we're gonna zero this guy */
+            for (k=i+1; k<4; k++)       /* subtract scaled row i from row j */
+                A(j, k) -= A(i, k)*t;   /* (ignore k<=i, we know they're 0) */
+            for (k=0; k<4; k++)
+                B(j, k) -= B(i, k)*t;
+        }
+    }
+
+    /*---------- backward elimination ----------*/
+
+    for (i=4-1; i>0; i--) {             /* eliminate in column i, above diag */
+        for (j=0; j<i; j++) {           /* eliminate in rows above i */
+            t = A(j, i);                /* we're gonna zero this guy */
+            for (k=0; k<4; k++)         /* subtract scaled row i from row j */
+                B(j, k) -= B(i, k)*t;
+        }
+    }
+
+    return B;
+}
+
+
+Matrix3 nv::inverse(const Matrix3 & m) {
+
+    Matrix3 A = m;
+    Matrix3 B(identity);
+
+    int i, j, k;
+    float max, t, det, pivot;
+
+    det = 1.0;
+    for (i=0; i<3; i++) {               /* eliminate in column i, below diag */
+        max = -1.;
+        for (k=i; k<3; k++)             /* find pivot for column i */
+            if (fabs(A(k, i)) > max) {
+                max = fabs(A(k, i));
+                j = k;
+            }
+        if (max<=0.) return B;         /* if no nonzero pivot, PUNT */
+        if (j!=i) {                     /* swap rows i and j */
+            for (k=i; k<3; k++)
+                swap(A(i, k), A(j, k));
+            for (k=0; k<3; k++)
+                swap(B(i, k), B(j, k));
+            det = -det;
+        }
+        pivot = A(i, i);
+        det *= pivot;
+        for (k=i+1; k<3; k++)           /* only do elems to right of pivot */
+            A(i, k) /= pivot;
+        for (k=0; k<3; k++)
+            B(i, k) /= pivot;
+        /* we know that A(i, i) will be set to 1, so don't bother to do it */
+
+        for (j=i+1; j<3; j++) {         /* eliminate in rows below i */
+            t = A(j, i);                /* we're gonna zero this guy */
+            for (k=i+1; k<3; k++)       /* subtract scaled row i from row j */
+                A(j, k) -= A(i, k)*t;   /* (ignore k<=i, we know they're 0) */
+            for (k=0; k<3; k++)
+                B(j, k) -= B(i, k)*t;
+        }
+    }
+
+    /*---------- backward elimination ----------*/
+
+    for (i=3-1; i>0; i--) {             /* eliminate in column i, above diag */
+        for (j=0; j<i; j++) {           /* eliminate in rows above i */
+            t = A(j, i);                /* we're gonna zero this guy */
+            for (k=0; k<3; k++)         /* subtract scaled row i from row j */
+                B(j, k) -= B(i, k)*t;
+        }
+    }
+
+    return B;
+}
+
+
+
+
+
+#if 0 
+
+// Copyright (C) 1999-2004 Michael Garland.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a
+// copy of this software and associated documentation files (the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, and/or sell copies of the Software, and to permit persons
+// to whom the Software is furnished to do so, provided that the above
+// copyright notice(s) and this permission notice appear in all copies of
+// the Software and that both the above copyright notice(s) and this
+// permission notice appear in supporting documentation.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+// OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+// HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+// INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+// FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+// NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+// WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+// 
+// Except as contained in this notice, the name of a copyright holder
+// shall not be used in advertising or otherwise to promote the sale, use
+// or other dealings in this Software without prior written authorization
+// of the copyright holder.
+
+
+// Matrix inversion code for 4x4 matrices using Gaussian elimination
+// with partial pivoting.  This is a specialized version of a
+// procedure originally due to Paul Heckbert <ph@cs.cmu.edu>.
+//
+// Returns determinant of A, and B=inverse(A)
+// If matrix A is singular, returns 0 and leaves trash in B.
+//
+#define SWAP(a, b, t)   {t = a; a = b; b = t;}
+double invert(Mat4& B, const Mat4& m)
+{
+    Mat4 A = m;
+    int i, j, k;
+    double max, t, det, pivot;
+
+    /*---------- forward elimination ----------*/
+
+    for (i=0; i<4; i++)                 /* put identity matrix in B */
+        for (j=0; j<4; j++)
+            B(i, j) = (double)(i==j);
+
+    det = 1.0;
+    for (i=0; i<4; i++) {               /* eliminate in column i, below diag */
+        max = -1.;
+        for (k=i; k<4; k++)             /* find pivot for column i */
+            if (fabs(A(k, i)) > max) {
+                max = fabs(A(k, i));
+                j = k;
+            }
+        if (max<=0.) return 0.;         /* if no nonzero pivot, PUNT */
+        if (j!=i) {                     /* swap rows i and j */
+            for (k=i; k<4; k++)
+                SWAP(A(i, k), A(j, k), t);
+            for (k=0; k<4; k++)
+                SWAP(B(i, k), B(j, k), t);
+            det = -det;
+        }
+        pivot = A(i, i);
+        det *= pivot;
+        for (k=i+1; k<4; k++)           /* only do elems to right of pivot */
+            A(i, k) /= pivot;
+        for (k=0; k<4; k++)
+            B(i, k) /= pivot;
+        /* we know that A(i, i) will be set to 1, so don't bother to do it */
+
+        for (j=i+1; j<4; j++) {         /* eliminate in rows below i */
+            t = A(j, i);                /* we're gonna zero this guy */
+            for (k=i+1; k<4; k++)       /* subtract scaled row i from row j */
+                A(j, k) -= A(i, k)*t;   /* (ignore k<=i, we know they're 0) */
+            for (k=0; k<4; k++)
+                B(j, k) -= B(i, k)*t;
+        }
+    }
+
+    /*---------- backward elimination ----------*/
+
+    for (i=4-1; i>0; i--) {             /* eliminate in column i, above diag */
+        for (j=0; j<i; j++) {           /* eliminate in rows above i */
+            t = A(j, i);                /* we're gonna zero this guy */
+            for (k=0; k<4; k++)         /* subtract scaled row i from row j */
+                B(j, k) -= B(i, k)*t;
+        }
+    }
+
+    return det;
+}
+
+#endif // 0
+
+
+
diff --git a/thirdparty/thekla_atlas/nvmath/Matrix.h b/thirdparty/thekla_atlas/nvmath/Matrix.h
new file mode 100644
index 0000000000..506bdad1ca
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Matrix.h
@@ -0,0 +1,113 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_MATRIX_H
+#define NV_MATH_MATRIX_H
+
+#include "Vector.h"
+
+// - Matrices are stored in memory in *column major* order.
+// - Points are to be though of as column vectors.
+// - Transformation of a point p by a matrix M is: p' = M * p
+
+namespace nv
+{
+    enum identity_t { identity };
+
+    // 3x3 matrix.
+    class NVMATH_CLASS Matrix3
+    {
+    public:
+        Matrix3();
+        explicit Matrix3(float f);
+        explicit Matrix3(identity_t);
+        Matrix3(const Matrix3 & m);
+        Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2);
+
+        float data(uint idx) const;
+        float & data(uint idx);
+        float get(uint row, uint col) const;
+        float operator()(uint row, uint col) const;
+        float & operator()(uint row, uint col);
+
+        Vector3 row(uint i) const;
+        Vector3 column(uint i) const;
+
+        void operator*=(float s);
+        void operator/=(float s);
+        void operator+=(const Matrix3 & m);
+        void operator-=(const Matrix3 & m);
+
+        void scale(float s);
+        void scale(Vector3::Arg s);
+        float determinant() const;
+
+    private:
+        float m_data[9];
+    };
+
+    // Solve equation system using LU decomposition and back-substitution.
+    extern bool solveLU(const Matrix3 & m, const Vector3 & b, Vector3 * x);
+
+    // Solve equation system using Cramer's inverse.
+    extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x);
+
+
+    // 4x4 matrix.
+    class NVMATH_CLASS Matrix
+    {
+    public:
+        typedef Matrix const & Arg;
+
+        Matrix();
+        explicit Matrix(float f);
+        explicit Matrix(identity_t);
+        Matrix(const Matrix3 & m);
+        Matrix(const Matrix & m);
+        Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
+        //explicit Matrix(const float m[]);	// m is assumed to contain 16 elements
+
+        float data(uint idx) const;
+        float & data(uint idx);
+        float get(uint row, uint col) const;
+        float operator()(uint row, uint col) const;
+        float & operator()(uint row, uint col);
+        const float * ptr() const;
+
+        Vector4 row(uint i) const;
+        Vector4 column(uint i) const;
+
+        void zero();
+        void identity();
+
+        void scale(float s);
+        void scale(Vector3::Arg s);
+        void translate(Vector3::Arg t);
+        void rotate(float theta, float v0, float v1, float v2);
+        float determinant() const;
+
+        void operator+=(const Matrix & m);
+        void operator-=(const Matrix & m);
+
+        void apply(Matrix::Arg m);
+
+    private:
+        float m_data[16];
+    };
+
+    // Solve equation system using LU decomposition and back-substitution.
+    extern bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x);
+
+    // Solve equation system using Cramer's inverse.
+    extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x);
+
+    // Compute inverse using LU decomposition.
+    extern Matrix inverseLU(const Matrix & m);
+
+    // Compute inverse using Gaussian elimination and partial pivoting.
+    extern Matrix inverse(const Matrix & m);
+    extern Matrix3 inverse(const Matrix3 & m);
+
+} // nv namespace
+
+#endif // NV_MATH_MATRIX_H
diff --git a/thirdparty/thekla_atlas/nvmath/Matrix.inl b/thirdparty/thekla_atlas/nvmath/Matrix.inl
new file mode 100644
index 0000000000..c0d99d9fe0
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Matrix.inl
@@ -0,0 +1,1274 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_MATRIX_INL
+#define NV_MATH_MATRIX_INL
+
+#include "Matrix.h"
+
+namespace nv
+{
+    inline Matrix3::Matrix3() {}
+    
+    inline Matrix3::Matrix3(float f)
+    {
+        for(int i = 0; i < 9; i++) {
+            m_data[i] = f;
+        }
+    }
+
+    inline Matrix3::Matrix3(identity_t)
+    {
+        for(int i = 0; i < 3; i++) {
+            for(int j = 0; j < 3; j++) {
+                m_data[3*j+i] = (i == j) ? 1.0f : 0.0f;
+            }
+        }
+    }
+
+    inline Matrix3::Matrix3(const Matrix3 & m)
+    {
+        for(int i = 0; i < 9; i++) {
+            m_data[i] = m.m_data[i];
+        }
+    }
+    
+    inline Matrix3::Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2)
+    {
+        m_data[0] = v0.x; m_data[1] = v0.y; m_data[2] = v0.z;
+        m_data[3] = v1.x; m_data[4] = v1.y; m_data[5] = v1.z;
+        m_data[6] = v2.x; m_data[7] = v2.y; m_data[8] = v2.z;
+    }
+
+    inline float Matrix3::data(uint idx) const
+    {
+        nvDebugCheck(idx < 9);
+        return m_data[idx];
+    }
+    inline float & Matrix3::data(uint idx)
+    {
+        nvDebugCheck(idx < 9);
+        return m_data[idx];
+    }
+    inline float Matrix3::get(uint row, uint col) const
+    {
+        nvDebugCheck(row < 3 && col < 3);
+        return m_data[col * 3 + row];
+    }
+    inline float Matrix3::operator()(uint row, uint col) const
+    {
+        nvDebugCheck(row < 3 && col < 3);
+        return m_data[col * 3 + row];
+    }
+    inline float & Matrix3::operator()(uint row, uint col)
+    {
+        nvDebugCheck(row < 3 && col < 3);
+        return m_data[col * 3 + row];
+    }
+
+    inline Vector3 Matrix3::row(uint i) const
+    {
+        nvDebugCheck(i < 3);
+        return Vector3(get(i, 0), get(i, 1), get(i, 2));
+    }
+    inline Vector3 Matrix3::column(uint i) const
+    {
+        nvDebugCheck(i < 3);
+        return Vector3(get(0, i), get(1, i), get(2, i));
+    }
+
+    inline void Matrix3::operator*=(float s)
+    {
+        for(int i = 0; i < 9; i++) {
+            m_data[i] *= s;
+        }
+    }
+
+    inline void Matrix3::operator/=(float s)
+    {
+        float is = 1.0f /s;
+        for(int i = 0; i < 9; i++) {
+            m_data[i] *= is;
+        }
+    }
+
+    inline void Matrix3::operator+=(const Matrix3 & m)
+    {
+        for(int i = 0; i < 9; i++) {
+            m_data[i] += m.m_data[i];
+        }
+    }
+
+    inline void Matrix3::operator-=(const Matrix3 & m)
+    {
+        for(int i = 0; i < 9; i++) {
+            m_data[i] -= m.m_data[i];
+        }
+    }
+
+    inline Matrix3 operator+(const Matrix3 & a, const Matrix3 & b)
+    {
+        Matrix3 m = a;
+        m += b;
+        return m;
+    }
+
+    inline Matrix3 operator-(const Matrix3 & a, const Matrix3 & b)
+    {
+        Matrix3 m = a;
+        m -= b;
+        return m;
+    }
+
+    inline Matrix3 operator*(const Matrix3 & a, float s)
+    {
+        Matrix3 m = a;
+        m *= s;
+        return m;
+    }
+
+    inline Matrix3 operator*(float s, const Matrix3 & a)
+    {
+        Matrix3 m = a;
+        m *= s;
+        return m;
+    }
+
+    inline Matrix3 operator/(const Matrix3 & a, float s)
+    {
+        Matrix3 m = a;
+        m /= s;
+        return m;
+    }
+
+    inline Matrix3 mul(const Matrix3 & a, const Matrix3 & b)
+    {
+        Matrix3 m;
+
+        for(int i = 0; i < 3; i++) {
+            const float ai0 = a(i,0), ai1 = a(i,1), ai2 = a(i,2);
+            m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0) + ai2 * b(2,0);
+            m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1) + ai2 * b(2,1);
+            m(i, 2) = ai0 * b(0,2) + ai1 * b(1,2) + ai2 * b(2,2);
+        }
+
+        return m;
+    }
+
+    inline Matrix3 operator*(const Matrix3 & a, const Matrix3 & b)
+    {
+        return mul(a, b);
+    }
+
+    // Transform the given 3d vector with the given matrix.
+    inline Vector3 transform(const Matrix3 & m, const Vector3 & p)
+    {
+        return Vector3(
+            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2),
+            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2),
+            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2));
+    }
+
+    inline void Matrix3::scale(float s)
+    {
+        for (int i = 0; i < 9; i++) {
+            m_data[i] *= s;
+        }
+    }
+
+    inline void Matrix3::scale(Vector3::Arg s)
+    {
+        m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x;
+        m_data[3] *= s.y; m_data[4] *= s.y; m_data[5] *= s.y;
+        m_data[6] *= s.z; m_data[7] *= s.z; m_data[8] *= s.z;
+    }
+
+    inline float Matrix3::determinant() const
+    {
+        return 
+            get(0,0) * get(1,1) * get(2,2) + 
+            get(0,1) * get(1,2) * get(2,0) + 
+            get(0,2) * get(1,0) * get(2,1) -
+            get(0,2) * get(1,1) * get(2,0) - 
+            get(0,1) * get(1,0) * get(2,2) -
+            get(0,0) * get(1,2) * get(2,1);
+    }
+
+    // Inverse using Cramer's rule.
+    inline Matrix3 inverseCramer(const Matrix3 & m)
+    {
+        const float det = m.determinant();
+        if (equal(det, 0.0f, 0.0f)) {
+            return Matrix3(0);
+        }
+
+        Matrix3 r;
+
+        r.data(0) =  - m.data(5) * m.data(7) + m.data(4) * m.data(8);
+        r.data(1) =  + m.data(5) * m.data(6) - m.data(3) * m.data(8);
+        r.data(2) =  - m.data(4) * m.data(6) + m.data(3) * m.data(7);
+
+        r.data(3) =  + m.data(2) * m.data(7) - m.data(1) * m.data(8);
+        r.data(4) =  - m.data(2) * m.data(6) + m.data(0) * m.data(8);
+        r.data(5) =  + m.data(1) * m.data(6) - m.data(0) * m.data(7);
+
+        r.data(6) =  - m.data(2) * m.data(4) + m.data(1) * m.data(5);
+        r.data(7) =  + m.data(2) * m.data(3) - m.data(0) * m.data(5);
+        r.data(8) =  - m.data(1) * m.data(3) + m.data(0) * m.data(4);
+
+        r.scale(1.0f / det);
+
+        return r;
+    }
+
+
+
+    inline Matrix::Matrix()
+    {
+    }
+
+    inline Matrix::Matrix(float f)
+    {
+        for(int i = 0; i < 16; i++) {
+            m_data[i] = 0.0f;
+        }
+    }
+
+    inline Matrix::Matrix(identity_t)
+    {
+        for(int i = 0; i < 4; i++) {
+            for(int j = 0; j < 4; j++) {
+                m_data[4*j+i] = (i == j) ? 1.0f : 0.0f;
+            }
+        }
+    }
+
+    inline Matrix::Matrix(const Matrix & m)
+    {
+        for(int i = 0; i < 16; i++) {
+            m_data[i] = m.m_data[i];
+        }
+    }
+
+    inline Matrix::Matrix(const Matrix3 & m)
+    {
+        for(int i = 0; i < 3; i++) {
+            for(int j = 0; j < 3; j++) {
+                operator()(i, j) = m.get(i, j);
+            }
+        }
+        for(int i = 0; i < 4; i++) {
+            operator()(3, i) = 0;
+            operator()(i, 3) = 0;
+        }
+    }
+
+    inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3)
+    {
+        m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w;
+        m_data[ 4] = v1.x; m_data[ 5] = v1.y; m_data[ 6] = v1.z; m_data[ 7] = v1.w;
+        m_data[ 8] = v2.x; m_data[ 9] = v2.y; m_data[10] = v2.z; m_data[11] = v2.w;
+        m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w;
+    }
+
+    /*inline Matrix::Matrix(const float m[])
+    {
+        for(int i = 0; i < 16; i++) {
+            m_data[i] = m[i];
+        }
+    }*/
+
+
+    // Accessors
+    inline float Matrix::data(uint idx) const
+    {
+        nvDebugCheck(idx < 16);
+        return m_data[idx];
+    }
+    inline float & Matrix::data(uint idx)
+    {
+        nvDebugCheck(idx < 16);
+        return m_data[idx];
+    }
+    inline float Matrix::get(uint row, uint col) const
+    {
+        nvDebugCheck(row < 4 && col < 4);
+        return m_data[col * 4 + row];
+    }
+    inline float Matrix::operator()(uint row, uint col) const
+    {
+        nvDebugCheck(row < 4 && col < 4);
+        return m_data[col * 4 + row];
+    }
+    inline float & Matrix::operator()(uint row, uint col)
+    {
+        nvDebugCheck(row < 4 && col < 4);
+        return m_data[col * 4 + row];
+    }
+
+    inline const float * Matrix::ptr() const
+    {
+        return m_data;
+    }
+
+    inline Vector4 Matrix::row(uint i) const
+    {
+        nvDebugCheck(i < 4);
+        return Vector4(get(i, 0), get(i, 1), get(i, 2), get(i, 3));
+    }
+
+    inline Vector4 Matrix::column(uint i) const
+    {
+        nvDebugCheck(i < 4);
+        return Vector4(get(0, i), get(1, i), get(2, i), get(3, i));
+    }
+
+    inline void Matrix::zero()
+    {
+        m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0;
+        m_data[4] = 0; m_data[5] = 0; m_data[6] = 0; m_data[7] = 0;
+        m_data[8] = 0; m_data[9] = 0; m_data[10] = 0; m_data[11] = 0;
+        m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 0;
+    }
+
+    inline void Matrix::identity()
+    {
+        m_data[0] = 1; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0;
+        m_data[4] = 0; m_data[5] = 1; m_data[6] = 0; m_data[7] = 0;
+        m_data[8] = 0; m_data[9] = 0; m_data[10] = 1; m_data[11] = 0;
+        m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 1;
+    }
+
+    // Apply scale.
+    inline void Matrix::scale(float s)
+    {
+        m_data[0] *= s; m_data[1] *= s; m_data[2] *= s; m_data[3] *= s;
+        m_data[4] *= s; m_data[5] *= s; m_data[6] *= s; m_data[7] *= s;
+        m_data[8] *= s; m_data[9] *= s; m_data[10] *= s; m_data[11] *= s;
+        m_data[12] *= s; m_data[13] *= s; m_data[14] *= s; m_data[15] *= s;
+    }
+
+    // Apply scale.
+    inline void Matrix::scale(Vector3::Arg s)
+    {
+        m_data[0] *= s.x; m_data[1] *= s.x; m_data[2] *= s.x; m_data[3] *= s.x;
+        m_data[4] *= s.y; m_data[5] *= s.y; m_data[6] *= s.y; m_data[7] *= s.y;
+        m_data[8] *= s.z; m_data[9] *= s.z; m_data[10] *= s.z; m_data[11] *= s.z;
+    }
+
+    // Apply translation.
+    inline void Matrix::translate(Vector3::Arg t)
+    {
+        m_data[12] = m_data[0] * t.x + m_data[4] * t.y + m_data[8]  * t.z + m_data[12];
+        m_data[13] = m_data[1] * t.x + m_data[5] * t.y + m_data[9]  * t.z + m_data[13];
+        m_data[14] = m_data[2] * t.x + m_data[6] * t.y + m_data[10] * t.z + m_data[14];
+        m_data[15] = m_data[3] * t.x + m_data[7] * t.y + m_data[11] * t.z + m_data[15];
+    }
+
+    Matrix rotation(float theta, float v0, float v1, float v2);
+
+    // Apply rotation.
+    inline void Matrix::rotate(float theta, float v0, float v1, float v2)
+    {
+        Matrix R(rotation(theta, v0, v1, v2));
+        apply(R);
+    }
+
+    // Apply transform.
+    inline void Matrix::apply(Matrix::Arg m)
+    {
+        nvDebugCheck(this != &m);
+
+        for(int i = 0; i < 4; i++) {
+            const float ai0 = get(i,0), ai1 = get(i,1), ai2 = get(i,2), ai3 = get(i,3);
+            m_data[0 + i] = ai0 * m(0,0) + ai1 * m(1,0) + ai2 * m(2,0) + ai3 * m(3,0);
+            m_data[4 + i] = ai0 * m(0,1) + ai1 * m(1,1) + ai2 * m(2,1) + ai3 * m(3,1);
+            m_data[8 + i] = ai0 * m(0,2) + ai1 * m(1,2) + ai2 * m(2,2) + ai3 * m(3,2);
+            m_data[12+ i] = ai0 * m(0,3) + ai1 * m(1,3) + ai2 * m(2,3) + ai3 * m(3,3);
+        }
+    }
+
+    // Get scale matrix.
+    inline Matrix scale(Vector3::Arg s)
+    {
+        Matrix m(identity);
+        m(0,0) = s.x;
+        m(1,1) = s.y;
+        m(2,2) = s.z;
+        return m;
+    }
+
+    // Get scale matrix.
+    inline Matrix scale(float s)
+    {
+        Matrix m(identity);
+        m(0,0) = m(1,1) = m(2,2) = s;
+        return m;
+    }
+
+    // Get translation matrix.
+    inline Matrix translation(Vector3::Arg t)
+    {
+        Matrix m(identity);
+        m(0,3) = t.x;
+        m(1,3) = t.y;
+        m(2,3) = t.z;
+        return m;
+    }
+
+    // Get rotation matrix.
+    inline Matrix rotation(float theta, float v0, float v1, float v2)
+    {
+        float cost = cosf(theta);
+        float sint = sinf(theta);
+
+        Matrix m(identity);
+
+        if( 1 == v0 && 0 == v1 && 0 == v2 ) {
+            m(1,1) = cost; m(2,1) = -sint;
+            m(1,2) = sint; m(2,2) = cost;
+        }
+        else if( 0 == v0  && 1 == v1 && 0 == v2 ) {
+            m(0,0) = cost; m(2,0) = sint;
+            m(1,2) = -sint; m(2,2) = cost;
+        }
+        else if( 0 == v0 && 0 == v1 && 1 == v2 ) {
+            m(0,0) = cost; m(1,0) = -sint;
+            m(0,1) = sint; m(1,1) = cost;
+        } 
+        else {
+            float a2, b2, c2;
+            a2 = v0 * v0;
+            b2 = v1 * v1;
+            c2 = v2 * v2;
+
+            float iscale = 1.0f / sqrtf(a2 + b2 + c2);
+            v0 *= iscale;
+            v1 *= iscale;
+            v2 *= iscale;
+
+            float abm, acm, bcm;
+            float mcos, asin, bsin, csin;
+            mcos = 1.0f - cost;
+            abm = v0 * v1 * mcos;
+            acm = v0 * v2 * mcos;
+            bcm = v1 * v2 * mcos;
+            asin = v0 * sint;
+            bsin = v1 * sint;
+            csin = v2 * sint;
+            m(0,0) = a2 * mcos + cost;
+            m(1,0) = abm - csin;
+            m(2,0) = acm + bsin;
+            m(3,0) = abm + csin;
+            m(1,1) = b2 * mcos + cost;
+            m(2,1) = bcm - asin;
+            m(3,1) = acm - bsin;
+            m(1,2) = bcm + asin;
+            m(2,2) = c2 * mcos + cost;
+        }
+        return m;
+    }
+
+    //Matrix rotation(float yaw, float pitch, float roll);
+    //Matrix skew(float angle, Vector3::Arg v1, Vector3::Arg v2);
+
+    // Get frustum matrix.
+    inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
+    {
+        Matrix m(0.0f);
+
+        float doubleznear = 2.0f * zNear;
+        float one_deltax = 1.0f / (xmax - xmin);
+        float one_deltay = 1.0f / (ymax - ymin);
+        float one_deltaz = 1.0f / (zFar - zNear);
+
+        m(0,0) = doubleznear * one_deltax;
+        m(1,1) = doubleznear * one_deltay;
+        m(0,2) = (xmax + xmin) * one_deltax;
+        m(1,2) = (ymax + ymin) * one_deltay;
+        m(2,2) = -(zFar + zNear) * one_deltaz;
+        m(3,2) = -1.0f;
+        m(2,3) = -(zFar * doubleznear) * one_deltaz;
+
+        return m;
+    }
+
+    // Get inverse frustum matrix.
+    inline Matrix frustumInverse(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
+    {
+        Matrix m(0.0f);
+
+        float one_doubleznear = 1.0f / (2.0f * zNear);
+        float one_doubleznearzfar = 1.0f / (2.0f * zNear * zFar);
+
+        m(0,0) = (xmax - xmin) * one_doubleznear;
+        m(0,3) = (xmax + xmin) * one_doubleznear;
+        m(1,1) = (ymax - ymin) * one_doubleznear;
+        m(1,3) = (ymax + ymin) * one_doubleznear;
+        m(2,3) = -1;
+        m(3,2) = -(zFar - zNear) * one_doubleznearzfar;
+        m(3,3) = (zFar + zNear) * one_doubleznearzfar;
+
+        return m;
+    }
+
+    // Get infinite frustum matrix.
+    inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear)
+    {
+        Matrix m(0.0f);
+
+        float doubleznear = 2.0f * zNear;
+        float one_deltax = 1.0f / (xmax - xmin);
+        float one_deltay = 1.0f / (ymax - ymin);
+        float nudge = 1.0; // 0.999;
+
+        m(0,0) = doubleznear * one_deltax;
+        m(1,1) = doubleznear * one_deltay;
+        m(0,2) = (xmax + xmin) * one_deltax;
+        m(1,2) = (ymax + ymin) * one_deltay;
+        m(2,2) = -1.0f * nudge;
+        m(3,2) = -1.0f;
+        m(2,3) = -doubleznear * nudge;
+
+        return m;
+    }
+
+    // Get perspective matrix.
+    inline Matrix perspective(float fovy, float aspect, float zNear, float zFar)
+    {
+        float xmax = zNear * tan(fovy / 2);
+        float xmin = -xmax;
+
+        float ymax = xmax / aspect;
+        float ymin = -ymax;
+
+        return frustum(xmin, xmax, ymin, ymax, zNear, zFar);	
+    }
+
+    // Get inverse perspective matrix.
+    inline Matrix perspectiveInverse(float fovy, float aspect, float zNear, float zFar)
+    {
+        float xmax = zNear * tan(fovy / 2);
+        float xmin = -xmax;
+
+        float ymax = xmax / aspect;
+        float ymin = -ymax;
+
+        return frustumInverse(xmin, xmax, ymin, ymax, zNear, zFar);	
+    }
+
+    // Get infinite perspective matrix.
+    inline Matrix perspective(float fovy, float aspect, float zNear)
+    {
+        float x = zNear * tan(fovy / 2);
+        float y = x / aspect;
+        return frustum( -x, x, -y, y, zNear );	
+    }
+
+    // Get matrix determinant.
+    inline float Matrix::determinant() const
+    {
+        return 
+            m_data[3] * m_data[6] * m_data[ 9] * m_data[12] - m_data[2] * m_data[7] * m_data[ 9] * m_data[12] - m_data[3] * m_data[5] * m_data[10] * m_data[12] + m_data[1] * m_data[7] * m_data[10] * m_data[12] +
+            m_data[2] * m_data[5] * m_data[11] * m_data[12] - m_data[1] * m_data[6] * m_data[11] * m_data[12] - m_data[3] * m_data[6] * m_data[ 8] * m_data[13] + m_data[2] * m_data[7] * m_data[ 8] * m_data[13] +
+            m_data[3] * m_data[4] * m_data[10] * m_data[13] - m_data[0] * m_data[7] * m_data[10] * m_data[13] - m_data[2] * m_data[4] * m_data[11] * m_data[13] + m_data[0] * m_data[6] * m_data[11] * m_data[13] +
+            m_data[3] * m_data[5] * m_data[ 8] * m_data[14] - m_data[1] * m_data[7] * m_data[ 8] * m_data[14] - m_data[3] * m_data[4] * m_data[ 9] * m_data[14] + m_data[0] * m_data[7] * m_data[ 9] * m_data[14] +
+            m_data[1] * m_data[4] * m_data[11] * m_data[14] - m_data[0] * m_data[5] * m_data[11] * m_data[14] - m_data[2] * m_data[5] * m_data[ 8] * m_data[15] + m_data[1] * m_data[6] * m_data[ 8] * m_data[15] +
+            m_data[2] * m_data[4] * m_data[ 9] * m_data[15] - m_data[0] * m_data[6] * m_data[ 9] * m_data[15] - m_data[1] * m_data[4] * m_data[10] * m_data[15] + m_data[0] * m_data[5] * m_data[10] * m_data[15];
+    }
+
+    inline Matrix transpose(Matrix::Arg m)
+    {
+        Matrix r;
+        for (int i = 0; i < 4; i++)
+        {
+            for (int j = 0; j < 4; j++)
+            {
+                r(i, j) = m(j, i);
+            }
+        }
+        return r;
+    }
+
+    // Inverse using Cramer's rule.
+    inline Matrix inverseCramer(Matrix::Arg m)
+    {
+        Matrix r;
+        r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15);
+        r.data( 1) = m.data(3)*m.data(10)*m.data(13) - m.data(2)*m.data(11)*m.data(13) - m.data(3)*m.data(9)*m.data(14) + m.data(1)*m.data(11)*m.data(14) + m.data(2)*m.data(9)*m.data(15) - m.data(1)*m.data(10)*m.data(15);
+        r.data( 2) = m.data(2)*m.data( 7)*m.data(13) - m.data(3)*m.data( 6)*m.data(13) + m.data(3)*m.data(5)*m.data(14) - m.data(1)*m.data( 7)*m.data(14) - m.data(2)*m.data(5)*m.data(15) + m.data(1)*m.data( 6)*m.data(15);
+        r.data( 3) = m.data(3)*m.data( 6)*m.data( 9) - m.data(2)*m.data( 7)*m.data( 9) - m.data(3)*m.data(5)*m.data(10) + m.data(1)*m.data( 7)*m.data(10) + m.data(2)*m.data(5)*m.data(11) - m.data(1)*m.data( 6)*m.data(11);
+        r.data( 4) = m.data(7)*m.data(10)*m.data(12) - m.data(6)*m.data(11)*m.data(12) - m.data(7)*m.data(8)*m.data(14) + m.data(4)*m.data(11)*m.data(14) + m.data(6)*m.data(8)*m.data(15) - m.data(4)*m.data(10)*m.data(15);
+        r.data( 5) = m.data(2)*m.data(11)*m.data(12) - m.data(3)*m.data(10)*m.data(12) + m.data(3)*m.data(8)*m.data(14) - m.data(0)*m.data(11)*m.data(14) - m.data(2)*m.data(8)*m.data(15) + m.data(0)*m.data(10)*m.data(15);
+        r.data( 6) = m.data(3)*m.data( 6)*m.data(12) - m.data(2)*m.data( 7)*m.data(12) - m.data(3)*m.data(4)*m.data(14) + m.data(0)*m.data( 7)*m.data(14) + m.data(2)*m.data(4)*m.data(15) - m.data(0)*m.data( 6)*m.data(15);
+        r.data( 7) = m.data(2)*m.data( 7)*m.data( 8) - m.data(3)*m.data( 6)*m.data( 8) + m.data(3)*m.data(4)*m.data(10) - m.data(0)*m.data( 7)*m.data(10) - m.data(2)*m.data(4)*m.data(11) + m.data(0)*m.data( 6)*m.data(11);
+        r.data( 8) = m.data(5)*m.data(11)*m.data(12) - m.data(7)*m.data( 9)*m.data(12) + m.data(7)*m.data(8)*m.data(13) - m.data(4)*m.data(11)*m.data(13) - m.data(5)*m.data(8)*m.data(15) + m.data(4)*m.data( 9)*m.data(15);
+        r.data( 9) = m.data(3)*m.data( 9)*m.data(12) - m.data(1)*m.data(11)*m.data(12) - m.data(3)*m.data(8)*m.data(13) + m.data(0)*m.data(11)*m.data(13) + m.data(1)*m.data(8)*m.data(15) - m.data(0)*m.data( 9)*m.data(15);
+        r.data(10) = m.data(1)*m.data( 7)*m.data(12) - m.data(3)*m.data( 5)*m.data(12) + m.data(3)*m.data(4)*m.data(13) - m.data(0)*m.data( 7)*m.data(13) - m.data(1)*m.data(4)*m.data(15) + m.data(0)*m.data( 5)*m.data(15);
+        r.data(11) = m.data(3)*m.data( 5)*m.data( 8) - m.data(1)*m.data( 7)*m.data( 8) - m.data(3)*m.data(4)*m.data( 9) + m.data(0)*m.data( 7)*m.data( 9) + m.data(1)*m.data(4)*m.data(11) - m.data(0)*m.data( 5)*m.data(11);
+        r.data(12) = m.data(6)*m.data( 9)*m.data(12) - m.data(5)*m.data(10)*m.data(12) - m.data(6)*m.data(8)*m.data(13) + m.data(4)*m.data(10)*m.data(13) + m.data(5)*m.data(8)*m.data(14) - m.data(4)*m.data( 9)*m.data(14);
+        r.data(13) = m.data(1)*m.data(10)*m.data(12) - m.data(2)*m.data( 9)*m.data(12) + m.data(2)*m.data(8)*m.data(13) - m.data(0)*m.data(10)*m.data(13) - m.data(1)*m.data(8)*m.data(14) + m.data(0)*m.data( 9)*m.data(14);
+        r.data(14) = m.data(2)*m.data( 5)*m.data(12) - m.data(1)*m.data( 6)*m.data(12) - m.data(2)*m.data(4)*m.data(13) + m.data(0)*m.data( 6)*m.data(13) + m.data(1)*m.data(4)*m.data(14) - m.data(0)*m.data( 5)*m.data(14);
+        r.data(15) = m.data(1)*m.data( 6)*m.data( 8) - m.data(2)*m.data( 5)*m.data( 8) + m.data(2)*m.data(4)*m.data( 9) - m.data(0)*m.data( 6)*m.data( 9) - m.data(1)*m.data(4)*m.data(10) + m.data(0)*m.data( 5)*m.data(10);
+        r.scale(1.0f / m.determinant());
+        return r;
+    }
+
+    inline Matrix isometryInverse(Matrix::Arg m)
+    {
+        Matrix r(identity);
+
+        // transposed 3x3 upper left matrix
+        for (int i = 0; i < 3; i++)
+        {
+            for (int j = 0; j < 3; j++)
+            {
+                r(i, j) = m(j, i);
+            }
+        }
+
+        // translate by the negative offsets
+        r.translate(-Vector3(m.data(12), m.data(13), m.data(14)));
+
+        return r;
+    }
+
+    // Transform the given 3d point with the given matrix.
+    inline Vector3 transformPoint(Matrix::Arg m, Vector3::Arg p)
+    {
+        return Vector3(
+            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + m(0,3),
+            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + m(1,3),
+            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + m(2,3));
+    }
+
+    // Transform the given 3d vector with the given matrix.
+    inline Vector3 transformVector(Matrix::Arg m, Vector3::Arg p)
+    {
+        return Vector3(
+            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2),
+            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2),
+            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2));
+    }
+
+    // Transform the given 4d vector with the given matrix.
+    inline Vector4 transform(Matrix::Arg m, Vector4::Arg p)
+    {
+        return Vector4(
+            p.x * m(0,0) + p.y * m(0,1) + p.z * m(0,2) + p.w * m(0,3),
+            p.x * m(1,0) + p.y * m(1,1) + p.z * m(1,2) + p.w * m(1,3),
+            p.x * m(2,0) + p.y * m(2,1) + p.z * m(2,2) + p.w * m(2,3),
+            p.x * m(3,0) + p.y * m(3,1) + p.z * m(3,2) + p.w * m(3,3));
+    }
+
+    inline Matrix mul(Matrix::Arg a, Matrix::Arg b)
+    {
+        // @@ Is this the right order? mul(a, b) = b * a
+        Matrix m = a;
+        m.apply(b);
+        return m;
+    }
+
+    inline void Matrix::operator+=(const Matrix & m)
+    {
+        for(int i = 0; i < 16; i++) {
+            m_data[i] += m.m_data[i];
+        }
+    }
+
+    inline void Matrix::operator-=(const Matrix & m)
+    {
+        for(int i = 0; i < 16; i++) {
+            m_data[i] -= m.m_data[i];
+        }
+    }
+
+    inline Matrix operator+(const Matrix & a, const Matrix & b)
+    {
+        Matrix m = a;
+        m += b;
+        return m;
+    }
+
+    inline Matrix operator-(const Matrix & a, const Matrix & b)
+    {
+        Matrix m = a;
+        m -= b;
+        return m;
+    }
+
+
+} // nv namespace
+
+
+#if 0 // old code.
+/** @name Special matrices. */
+//@{
+/** Generate a translation matrix. */
+void TranslationMatrix(const Vec3 & v) {
+    data[0] = 1; data[1] = 0; data[2] = 0; data[3] = 0;
+    data[4] = 0; data[5] = 1; data[6] = 0; data[7] = 0;
+    data[8] = 0; data[9] = 0; data[10] = 1; data[11] = 0;
+    data[12] = v.x; data[13] = v.y; data[14] = v.z; data[15] = 1;
+}
+
+/** Rotate theta degrees around v. */
+void RotationMatrix( float theta, float v0, float v1, float v2 ) {
+    float cost = cos(theta);
+    float sint = sin(theta);
+
+    if( 1 == v0 && 0 == v1 && 0 == v2 ) {
+        data[0] = 1.0f;	data[1] = 0.0f;	data[2] = 0.0f;	data[3] = 0.0f;
+        data[4] = 0.0f;	data[5] = cost;	data[6] = -sint;data[7] = 0.0f;
+        data[8] = 0.0f;	data[9] = sint;	data[10] = cost;data[11] = 0.0f;
+        data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
+    }
+    else if( 0 == v0  && 1 == v1 && 0 == v2 ) {
+        data[0] = cost;	data[1] = 0.0f;	data[2] = sint;	data[3] = 0.0f;
+        data[4] = 0.0f;	data[5] = 1.0f;	data[6] = 0.0f;	data[7] = 0.0f;
+        data[8] = -sint;data[9] = 0.0f;data[10] = cost;	data[11] = 0.0f;
+        data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
+    }
+    else if( 0 == v0 && 0 == v1 && 1 == v2 ) {
+        data[0] = cost;	data[1] = -sint;data[2] = 0.0f;	data[3] = 0.0f;
+        data[4] = sint; data[5] = cost;	data[6] = 0.0f;	data[7] = 0.0f;
+        data[8] = 0.0f;	data[9] = 0.0f;	data[10] = 1.0f;data[11] = 0.0f;
+        data[12] = 0.0f;data[13] = 0.0f;data[14] = 0.0f;data[15] = 1.0f;
+    } 
+    else {
+        //we need scale a,b,c to unit length.
+        float a2, b2, c2;
+        a2 = v0 * v0;
+        b2 = v1 * v1;
+        c2 = v2 * v2;
+
+        float iscale = 1.0f / sqrtf(a2 + b2 + c2);
+        v0 *= iscale;
+        v1 *= iscale;
+        v2 *= iscale;
+
+        float abm, acm, bcm;
+        float mcos, asin, bsin, csin;
+        mcos = 1.0f - cost;
+        abm = v0 * v1 * mcos;
+        acm = v0 * v2 * mcos;
+        bcm = v1 * v2 * mcos;
+        asin = v0 * sint;
+        bsin = v1 * sint;
+        csin = v2 * sint;
+        data[0] = a2 * mcos + cost;
+        data[1] = abm - csin;
+        data[2] = acm + bsin;
+        data[3] = abm + csin;
+        data[4] = 0.0f;
+        data[5] = b2 * mcos + cost;
+        data[6] = bcm - asin;
+        data[7] = acm - bsin;
+        data[8] = 0.0f;
+        data[9] = bcm + asin;
+        data[10] = c2 * mcos + cost;
+        data[11] = 0.0f;
+        data[12] = 0.0f;
+        data[13] = 0.0f;
+        data[14] = 0.0f;
+        data[15] = 1.0f;
+    }
+}
+
+/*
+void SkewMatrix(float angle, const Vec3 & v1, const Vec3 & v2) {
+v1.Normalize();
+v2.Normalize();
+
+Vec3 v3;
+v3.Cross(v1, v2);
+v3.Normalize();
+
+// Get skew factor.
+float costheta = Vec3DotProduct(v1, v2);
+float sintheta = Real.Sqrt(1 - costheta * costheta);
+float skew = tan(Trig.DegreesToRadians(angle) + acos(sintheta)) * sintheta - costheta;
+
+// Build orthonormal matrix.
+v1 = FXVector3.Cross(v3, v2);
+v1.Normalize();
+
+Matrix R = Matrix::Identity;
+R[0, 0] = v3.X;	// Not sure this is in the correct order...
+R[1, 0] = v3.Y;
+R[2, 0] = v3.Z;
+R[0, 1] = v1.X;
+R[1, 1] = v1.Y;
+R[2, 1] = v1.Z;
+R[0, 2] = v2.X;
+R[1, 2] = v2.Y;
+R[2, 2] = v2.Z;
+
+// Build skew matrix.
+Matrix S = Matrix::Identity;
+S[2, 1] = -skew;
+
+// Return skew transform.
+return R * S * R.Transpose;	// Not sure this is in the correct order...
+}
+*/
+
+/**
+* Generate rotation matrix for the euler angles. This is the same as computing
+* 3 rotation matrices and multiplying them together in our custom order.
+*
+* @todo Have to recompute this code for our new convention.
+**/
+void RotationMatrix( float yaw, float pitch, float roll ) {
+    float sy = sin(yaw+ToRadian(90));
+    float cy = cos(yaw+ToRadian(90));
+    float sp = sin(pitch-ToRadian(90));
+    float cp = cos(pitch-ToRadian(90));
+    float sr = sin(roll);
+    float cr = cos(roll);
+
+    data[0] = cr*cy + sr*sp*sy;
+    data[1] = cp*sy;
+    data[2] = -sr*cy + cr*sp*sy;
+    data[3] = 0;
+
+    data[4] = -cr*sy + sr*sp*cy;
+    data[5] = cp*cy;
+    data[6] = sr*sy + cr*sp*cy;
+    data[7] = 0;
+
+    data[8] = sr*cp;
+    data[9] = -sp;
+    data[10] = cr*cp;
+    data[11] = 0;
+
+    data[12] = 0;
+    data[13] = 0;
+    data[14] = 0;
+    data[15] = 1;
+}
+
+/** Create a frustum matrix with the far plane at the infinity. */
+void Frustum( float xmin, float xmax, float ymin, float ymax, float zNear, float zFar ) {
+    float one_deltax, one_deltay, one_deltaz, doubleznear;
+
+    doubleznear = 2.0f * zNear;
+    one_deltax = 1.0f / (xmax - xmin);
+    one_deltay = 1.0f / (ymax - ymin);
+    one_deltaz = 1.0f / (zFar - zNear);
+
+    data[0] = (float)(doubleznear * one_deltax);
+    data[1] = 0.0f;
+    data[2] = 0.0f;
+    data[3] = 0.0f;
+    data[4] = 0.0f;
+    data[5] = (float)(doubleznear * one_deltay);
+    data[6] = 0.f;
+    data[7] = 0.f;
+    data[8] = (float)((xmax + xmin) * one_deltax);
+    data[9] = (float)((ymax + ymin) * one_deltay);
+    data[10] = (float)(-(zFar + zNear) * one_deltaz);
+    data[11] = -1.f;
+    data[12] = 0.f;
+    data[13] = 0.f;
+    data[14] = (float)(-(zFar * doubleznear) * one_deltaz);
+    data[15] = 0.f;
+}
+
+/** Create a frustum matrix with the far plane at the infinity. */
+void FrustumInf( float xmin, float xmax, float ymin, float ymax, float zNear ) {
+    float one_deltax, one_deltay, doubleznear, nudge;
+
+    doubleznear = 2.0f * zNear;
+    one_deltax = 1.0f / (xmax - xmin);
+    one_deltay = 1.0f / (ymax - ymin);
+    nudge = 1.0; // 0.999;
+
+    data[0] = doubleznear * one_deltax;
+    data[1] = 0.0f;
+    data[2] = 0.0f;
+    data[3] = 0.0f;
+
+    data[4] = 0.0f;
+    data[5] = doubleznear * one_deltay;
+    data[6] = 0.f;
+    data[7] = 0.f;
+
+    data[8] = (xmax + xmin) * one_deltax;
+    data[9] = (ymax + ymin) * one_deltay;
+    data[10] = -1.0f * nudge;
+    data[11] = -1.0f;
+
+    data[12] = 0.f;
+    data[13] = 0.f;
+    data[14] = -doubleznear * nudge;
+    data[15] = 0.f;
+}
+
+/** Create an inverse frustum matrix with the far plane at the infinity. */
+void FrustumInfInv( float left, float right, float bottom, float top, float zNear ) {
+    // this matrix is wrong (not tested floatly) I think it should be transposed.
+    data[0] = (right - left) / (2 * zNear);
+    data[1] = 0;
+    data[2] = 0;
+    data[3] = (right + left) / (2 * zNear);
+    data[4] = 0;
+    data[5] = (top - bottom) / (2 * zNear);
+    data[6] = 0;
+    data[7] = (top + bottom) / (2 * zNear);
+    data[8] = 0;
+    data[9] = 0;
+    data[10] = 0;
+    data[11] = -1;
+    data[12] = 0;
+    data[13] = 0;
+    data[14] = -1 / (2 * zNear);
+    data[15] = 1 / (2 * zNear);
+}
+
+/** Create an homogeneous projection matrix. */
+void Perspective( float fov, float aspect, float zNear, float zFar ) {
+    float xmin, xmax, ymin, ymax;
+
+    xmax = zNear * tan( fov/2 );
+    xmin = -xmax;
+
+    ymax = xmax / aspect;
+    ymin = -ymax;
+
+    Frustum(xmin, xmax, ymin, ymax, zNear, zFar);
+}
+
+/** Create a projection matrix with the far plane at the infinity. */
+void PerspectiveInf( float fov, float aspect, float zNear ) {
+    float x = zNear * tan( fov/2 );
+    float y = x / aspect;
+    FrustumInf( -x, x, -y, y, zNear );
+}
+
+/** Create an inverse projection matrix with far plane at the infinity. */
+void PerspectiveInfInv( float fov, float aspect, float zNear ) {
+    float x = zNear * tan( fov/2 );
+    float y = x / aspect;
+    FrustumInfInv( -x, x, -y, y, zNear );
+}
+
+/** Build bone matrix from quatertion and offset. */
+void BoneMatrix(const Quat & q, const Vec3 & offset) {
+    float x2, y2, z2, xx, xy, xz, yy, yz, zz, wx, wy, wz;
+
+    // calculate coefficients
+    x2 = q.x + q.x;
+    y2 = q.y + q.y;
+    z2 = q.z + q.z;
+
+    xx = q.x * x2;   xy = q.x * y2;   xz = q.x * z2;
+    yy = q.y * y2;   yz = q.y * z2;   zz = q.z * z2;
+    wx = q.w * x2;   wy = q.w * y2;   wz = q.w * z2;
+
+    data[0] = 1.0f - (yy + zz); 	
+    data[1] = xy - wz;
+    data[2] = xz + wy;		
+    data[3] = 0.0f;
+
+    data[4] = xy + wz;		
+    data[5] = 1.0f - (xx + zz);
+    data[6] = yz - wx;		
+    data[7] = 0.0f;
+
+    data[8] = xz - wy;		
+    data[9] = yz + wx;
+    data[10] = 1.0f - (xx + yy);		
+    data[11] = 0.0f;
+
+    data[12] = offset.x;
+    data[13] = offset.y;
+    data[14] = offset.z;			
+    data[15] = 1.0f;
+}
+
+//@}
+
+
+/** @name Transformations: */
+//@{
+
+/** Apply a general scale. */
+void Scale( float x, float y, float z ) {
+    data[0] *= x;	data[4] *= y;	data[8]  *= z;
+    data[1] *= x;	data[5] *= y;	data[9]  *= z;
+    data[2] *= x;	data[6] *= y;	data[10] *= z;
+    data[3] *= x;	data[7] *= y;	data[11] *= z;
+}
+
+/** Apply a rotation of theta degrees around the axis v*/
+void Rotate( float theta, const Vec3 & v ) {
+    Matrix b;
+    b.RotationMatrix( theta, v[0], v[1], v[2] );
+    Multiply4x3( b );
+}
+
+/** Apply a rotation of theta degrees around the axis v*/
+void Rotate( float theta, float v0, float v1, float v2 ) {
+    Matrix b;
+    b.RotationMatrix( theta, v0, v1, v2 );
+    Multiply4x3( b );
+}
+
+/**
+* Translate the matrix by t. This is the same as multiplying by a
+* translation matrix with the given offset.
+* this = T * this
+*/
+void Translate( const Vec3 &t ) {
+    data[12] = data[0] * t.x + data[4] * t.y + data[8]  * t.z + data[12];
+    data[13] = data[1] * t.x + data[5] * t.y + data[9]  * t.z + data[13];
+    data[14] = data[2] * t.x + data[6] * t.y + data[10] * t.z + data[14];
+    data[15] = data[3] * t.x + data[7] * t.y + data[11] * t.z + data[15];
+}
+
+/** 
+* Translate the matrix by x, y, z. This is the same as multiplying by a 
+* translation matrix with the given offsets.
+*/
+void Translate( float x, float y, float z ) {
+    data[12] = data[0] * x + data[4] * y + data[8]  * z + data[12];
+    data[13] = data[1] * x + data[5] * y + data[9]  * z + data[13];
+    data[14] = data[2] * x + data[6] * y + data[10] * z + data[14];
+    data[15] = data[3] * x + data[7] * y + data[11] * z + data[15];
+}
+
+/** Compute the transposed matrix. */
+void Transpose() {
+    piSwap(data[1], data[4]);
+    piSwap(data[2], data[8]);
+    piSwap(data[6], data[9]);
+    piSwap(data[3], data[12]);
+    piSwap(data[7], data[13]);
+    piSwap(data[11], data[14]);
+}
+
+/** Compute the inverse of a rigid-body/isometry/orthonormal matrix. */
+void IsometryInverse() {
+    // transposed 3x3 upper left matrix
+    piSwap(data[1], data[4]);
+    piSwap(data[2], data[8]);
+    piSwap(data[6], data[9]);
+
+    // translate by the negative offsets
+    Vec3 v(-data[12], -data[13], -data[14]);
+    data[12] = data[13] = data[14] = 0;
+    Translate(v);
+}
+
+/** Compute the inverse of the affine portion of this matrix. */
+void AffineInverse() {
+    data[12] = data[13] = data[14] = 0;
+    Transpose();
+}
+//@}
+
+/** @name Matrix operations: */
+//@{
+
+/** Return the determinant of this matrix. */
+float Determinant() const {
+    return	data[0] * data[5] * data[10] * data[15] + 
+        data[1] * data[6] * data[11] * data[12] +
+        data[2] * data[7] * data[ 8] * data[13] +
+        data[3] * data[4] * data[ 9] * data[14] -
+        data[3] * data[6] * data[ 9] * data[12] -
+        data[2] * data[5] * data[ 8] * data[15] -
+        data[1] * data[4] * data[11] * data[14] -
+        data[0] * data[7] * data[10] * data[12];
+}
+
+
+/** Standard matrix product: this *= B. */
+void Multiply4x4( const Matrix & restrict B ) {
+    Multiply4x4(*this, B);
+}
+
+/** Standard matrix product: this = A * B. this != B*/
+void Multiply4x4( const Matrix & A, const Matrix & restrict B ) {
+    piDebugCheck(this != &B);
+
+    for(int i = 0; i < 4; i++) {
+        const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
+        GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
+        GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
+        GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
+        GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3);
+    }
+
+    /* Unrolled but does not allow this == A
+    data[0] = A.data[0] * B.data[0] + A.data[4] * B.data[1] + A.data[8] * B.data[2] + A.data[12] * B.data[3];
+    data[1] = A.data[1] * B.data[0] + A.data[5] * B.data[1] + A.data[9] * B.data[2] + A.data[13] * B.data[3];
+    data[2] = A.data[2] * B.data[0] + A.data[6] * B.data[1] + A.data[10] * B.data[2] + A.data[14] * B.data[3];
+    data[3] = A.data[3] * B.data[0] + A.data[7] * B.data[1] + A.data[11] * B.data[2] + A.data[15] * B.data[3];
+    data[4] = A.data[0] * B.data[4] + A.data[4] * B.data[5] + A.data[8] * B.data[6] + A.data[12] * B.data[7];
+    data[5] = A.data[1] * B.data[4] + A.data[5] * B.data[5] + A.data[9] * B.data[6] + A.data[13] * B.data[7];
+    data[6] = A.data[2] * B.data[4] + A.data[6] * B.data[5] + A.data[10] * B.data[6] + A.data[14] * B.data[7];
+    data[7] = A.data[3] * B.data[4] + A.data[7] * B.data[5] + A.data[11] * B.data[6] + A.data[15] * B.data[7];
+    data[8] = A.data[0] * B.data[8] + A.data[4] * B.data[9] + A.data[8] * B.data[10] + A.data[12] * B.data[11];
+    data[9] = A.data[1] * B.data[8] + A.data[5] * B.data[9] + A.data[9] * B.data[10] + A.data[13] * B.data[11];
+    data[10]= A.data[2] * B.data[8] + A.data[6] * B.data[9] + A.data[10] * B.data[10] + A.data[14] * B.data[11];
+    data[11]= A.data[3] * B.data[8] + A.data[7] * B.data[9] + A.data[11] * B.data[10] + A.data[15] * B.data[11];
+    data[12]= A.data[0] * B.data[12] + A.data[4] * B.data[13] + A.data[8] * B.data[14] + A.data[12] * B.data[15];
+    data[13]= A.data[1] * B.data[12] + A.data[5] * B.data[13] + A.data[9] * B.data[14] + A.data[13] * B.data[15];
+    data[14]= A.data[2] * B.data[12] + A.data[6] * B.data[13] + A.data[10] * B.data[14] + A.data[14] * B.data[15];
+    data[15]= A.data[3] * B.data[12] + A.data[7] * B.data[13] + A.data[11] * B.data[14] + A.data[15] * B.data[15];
+    */
+}
+
+/** Standard matrix product: this *= B. */
+void Multiply4x3( const Matrix & restrict B ) {
+    Multiply4x3(*this, B);
+}
+
+/** Standard product of matrices, where the last row is [0 0 0 1]. */
+void Multiply4x3( const Matrix & A, const Matrix & restrict B ) {
+    piDebugCheck(this != &B);
+
+    for(int i = 0; i < 3; i++) {
+        const float ai0 = A(i,0), ai1 = A(i,1), ai2 = A(i,2), ai3 = A(i,3);
+        GetElem(i,0) = ai0 * B(0,0) + ai1 * B(1,0) + ai2 * B(2,0) + ai3 * B(3,0);
+        GetElem(i,1) = ai0 * B(0,1) + ai1 * B(1,1) + ai2 * B(2,1) + ai3 * B(3,1);
+        GetElem(i,2) = ai0 * B(0,2) + ai1 * B(1,2) + ai2 * B(2,2) + ai3 * B(3,2);
+        GetElem(i,3) = ai0 * B(0,3) + ai1 * B(1,3) + ai2 * B(2,3) + ai3 * B(3,3);
+    }
+    data[3] = 0.0f; data[7] = 0.0f; data[11] = 0.0f; data[15] = 1.0f;
+
+    /* Unrolled but does not allow this == A
+    data[0] = a.data[0] * b.data[0] + a.data[4] * b.data[1] + a.data[8] * b.data[2] + a.data[12] * b.data[3];
+    data[1] = a.data[1] * b.data[0] + a.data[5] * b.data[1] + a.data[9] * b.data[2] + a.data[13] * b.data[3];
+    data[2] = a.data[2] * b.data[0] + a.data[6] * b.data[1] + a.data[10] * b.data[2] + a.data[14] * b.data[3];
+    data[3] = 0.0f;
+    data[4] = a.data[0] * b.data[4] + a.data[4] * b.data[5] + a.data[8] * b.data[6] + a.data[12] * b.data[7];
+    data[5] = a.data[1] * b.data[4] + a.data[5] * b.data[5] + a.data[9] * b.data[6] + a.data[13] * b.data[7];
+    data[6] = a.data[2] * b.data[4] + a.data[6] * b.data[5] + a.data[10] * b.data[6] + a.data[14] * b.data[7];
+    data[7] = 0.0f;
+    data[8] = a.data[0] * b.data[8] + a.data[4] * b.data[9] + a.data[8] * b.data[10] + a.data[12] * b.data[11];
+    data[9] = a.data[1] * b.data[8] + a.data[5] * b.data[9] + a.data[9] * b.data[10] + a.data[13] * b.data[11];
+    data[10]= a.data[2] * b.data[8] + a.data[6] * b.data[9] + a.data[10] * b.data[10] + a.data[14] * b.data[11];
+    data[11]= 0.0f;
+    data[12]= a.data[0] * b.data[12] + a.data[4] * b.data[13] + a.data[8] * b.data[14] + a.data[12] * b.data[15];
+    data[13]= a.data[1] * b.data[12] + a.data[5] * b.data[13] + a.data[9] * b.data[14] + a.data[13] * b.data[15];
+    data[14]= a.data[2] * b.data[12] + a.data[6] * b.data[13] + a.data[10] * b.data[14] + a.data[14] * b.data[15];
+    data[15]= 1.0f;
+    */
+}
+//@}
+
+
+/** @name Vector operations: */
+//@{
+
+/** Transform 3d vector (w=0). */
+void TransformVec3(const Vec3 & restrict orig, Vec3 * restrict dest) const {
+    piDebugCheck(&orig != dest);
+    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8];
+    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9];
+    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10];
+}
+/** Transform 3d vector by the transpose (w=0). */
+void TransformVec3T(const Vec3 & restrict orig, Vec3 * restrict dest) const {
+    piDebugCheck(&orig != dest);
+    dest->x = orig.x * data[0] + orig.y * data[1] + orig.z * data[2];
+    dest->y = orig.x * data[4] + orig.y * data[5] + orig.z * data[6];
+    dest->z = orig.x * data[8] + orig.y * data[9] + orig.z * data[10];
+}
+
+/** Transform a 3d homogeneous vector, where the fourth coordinate is assumed to be 1. */
+void TransformPoint(const Vec3 & restrict orig, Vec3 * restrict dest) const {
+    piDebugCheck(&orig != dest);
+    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
+    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
+    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
+}
+
+/** Transform a point, normalize it, and return w. */
+float TransformPointAndNormalize(const Vec3 & restrict orig, Vec3 * restrict dest) const {
+    piDebugCheck(&orig != dest);
+    float w;
+    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
+    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
+    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
+    w = 1 / (orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15]);
+    *dest *= w;
+    return w;
+}
+
+/** Transform a point and return w. */
+float TransformPointReturnW(const Vec3 & restrict orig, Vec3 * restrict dest) const {
+    piDebugCheck(&orig != dest);
+    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
+    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
+    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
+    return orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15];
+}
+
+/** Transform a normalized 3d point by a 4d matrix and return the resulting 4d vector. */
+void TransformVec4(const Vec3 & orig, Vec4 * dest) const {
+    dest->x = orig.x * data[0] + orig.y * data[4] + orig.z * data[8] + data[12];
+    dest->y = orig.x * data[1] + orig.y * data[5] + orig.z * data[9] + data[13];
+    dest->z = orig.x * data[2] + orig.y * data[6] + orig.z * data[10] + data[14];
+    dest->w = orig.x * data[3] + orig.y * data[7] + orig.z * data[11] + data[15];
+}
+//@}
+
+/** @name Matrix analysis. */
+//@{
+
+/** Get the ZYZ euler angles from the matrix. Assumes the matrix is orthonormal. */
+void GetEulerAnglesZYZ(float * s, float * t, float * r) const {
+    if( GetElem(2,2) < 1.0f ) {
+        if( GetElem(2,2) > -1.0f ) {
+            // 	cs*ct*cr-ss*sr 		-ss*ct*cr-cs*sr		st*cr
+            //	cs*ct*sr+ss*cr		-ss*ct*sr+cs*cr		st*sr
+            //	-cs*st				ss*st				ct
+            *s = atan2(GetElem(1,2), -GetElem(0,2));
+            *t = acos(GetElem(2,2));
+            *r = atan2(GetElem(2,1), GetElem(2,0));		
+        }
+        else {
+            // 	-c(s-r)	 	s(s-r)		0
+            //	s(s-r)		c(s-r)		0
+            //	0			0			-1
+            *s = atan2(GetElem(0, 1), -GetElem(0, 0)); // = s-r
+            *t = PI;
+            *r = 0;
+        }
+    }
+    else {
+        // 	c(s+r)		-s(s+r)		0
+        //	s(s+r)		c(s+r)		0
+        //	0			0			1
+        *s = atan2(GetElem(0, 1), GetElem(0, 0)); // = s+r
+        *t = 0;
+        *r = 0;
+    }
+}
+
+//@}
+
+MATHLIB_API friend PiStream & operator<< ( PiStream & s, Matrix & m );
+
+/** Print to debug output. */
+void Print() const {
+    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[0], data[4], data[8], data[12] );
+    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[1], data[5], data[9], data[13] );
+    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[2], data[6], data[10], data[14] );
+    piDebug( "[ %5.2f %5.2f %5.2f %5.2f ]\n", data[3], data[7], data[11], data[15] );
+}
+
+
+public:
+
+    float data[16];
+
+};
+#endif
+
+
+#endif // NV_MATH_MATRIX_INL
diff --git a/thirdparty/thekla_atlas/nvmath/Morton.h b/thirdparty/thekla_atlas/nvmath/Morton.h
new file mode 100644
index 0000000000..10e0d8152a
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Morton.h
@@ -0,0 +1,83 @@
+
+// Code from ryg:
+// http://fgiesen.wordpress.com/2009/12/13/decoding-morton-codes/
+
+
+// "Insert" a 0 bit after each of the 16 low bits of x
+inline uint32 part1By1(uint32 x)
+{
+	x &= 0x0000ffff;                  // x = ---- ---- ---- ---- fedc ba98 7654 3210
+	x = (x ^ (x <<  8)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
+	x = (x ^ (x <<  4)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
+	x = (x ^ (x <<  2)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
+	x = (x ^ (x <<  1)) & 0x55555555; // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
+	return x;
+}
+
+// "Insert" two 0 bits after each of the 10 low bits of x
+inline uint32 part1By2(uint32 x)
+{
+	x &= 0x000003ff;                  // x = ---- ---- ---- ---- ---- --98 7654 3210
+	x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
+	x = (x ^ (x <<  8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
+	x = (x ^ (x <<  4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
+	x = (x ^ (x <<  2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
+	return x;
+}
+
+inline uint32 encodeMorton2(uint32 x, uint32 y)
+{
+	return (part1By1(y) << 1) + part1By1(x);
+}
+
+inline uint32 encodeMorton3(uint32 x, uint32 y, uint32 z)
+{
+	return (part1By2(z) << 2) + (part1By2(y) << 1) + part1By2(x);
+}
+
+// Inverse of part1By1 - "delete" all odd-indexed bits
+inline uint32 compact1By1(uint32 x)
+{
+	x &= 0x55555555;                  // x = -f-e -d-c -b-a -9-8 -7-6 -5-4 -3-2 -1-0
+	x = (x ^ (x >>  1)) & 0x33333333; // x = --fe --dc --ba --98 --76 --54 --32 --10
+	x = (x ^ (x >>  2)) & 0x0f0f0f0f; // x = ---- fedc ---- ba98 ---- 7654 ---- 3210
+	x = (x ^ (x >>  4)) & 0x00ff00ff; // x = ---- ---- fedc ba98 ---- ---- 7654 3210
+	x = (x ^ (x >>  8)) & 0x0000ffff; // x = ---- ---- ---- ---- fedc ba98 7654 3210
+	return x;
+}
+
+// Inverse of part1By2 - "delete" all bits not at positions divisible by 3
+inline uint32 compact1By2(uint32 x)
+{
+	x &= 0x09249249;                  // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0
+	x = (x ^ (x >>  2)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10
+	x = (x ^ (x >>  4)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210
+	x = (x ^ (x >>  8)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210
+	x = (x ^ (x >> 16)) & 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210
+	return x;
+}
+
+inline uint32 decodeMorton2X(uint32 code)
+{
+	return compact1By1(code >> 0);
+}
+
+inline uint32 decodeMorton2Y(uint32 code)
+{
+	return compact1By1(code >> 1);
+}
+
+inline uint32 decodeMorton3X(uint32 code)
+{
+	return compact1By2(code >> 0);
+}
+
+inline uint32 decodeMorton3Y(uint32 code)
+{
+	return compact1By2(code >> 1);
+}
+
+inline uint32 decodeMorton3Z(uint32 code)
+{
+	return compact1By2(code >> 2);
+}
+\ No newline at end of file
diff --git a/thirdparty/thekla_atlas/nvmath/Plane.cpp b/thirdparty/thekla_atlas/nvmath/Plane.cpp
new file mode 100644
index 0000000000..8b54f829ad
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Plane.cpp
@@ -0,0 +1,27 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "Plane.h"
+#include "Plane.inl"
+#include "Matrix.inl"
+
+namespace nv
+{
+    Plane transformPlane(const Matrix & m, const Plane & p)
+    {
+        Vector3 newVec = transformVector(m, p.vector());
+
+        Vector3 ptInPlane = p.offset() * p.vector();
+        ptInPlane = transformPoint(m, ptInPlane);
+
+        return Plane(newVec, ptInPlane);
+    }
+
+    Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c)
+    {
+        return dot(a.vector(), cross(b.vector(), c.vector())) * (
+            a.offset() * cross(b.vector(), c.vector()) + 
+            c.offset() * cross(a.vector(), b.vector()) +
+            b.offset() * cross(c.vector(), a.vector()));
+    }
+
+} // nv namespace
diff --git a/thirdparty/thekla_atlas/nvmath/Plane.h b/thirdparty/thekla_atlas/nvmath/Plane.h
new file mode 100644
index 0000000000..dc468b28e2
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Plane.h
@@ -0,0 +1,42 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MATH_PLANE_H
+#define NV_MATH_PLANE_H
+
+#include "nvmath.h"
+#include "Vector.h"
+
+namespace nv
+{
+    class Matrix;
+
+    class NVMATH_CLASS Plane
+    {
+    public:
+        Plane();
+        Plane(float x, float y, float z, float w);
+        Plane(const Vector4 & v);
+        Plane(const Vector3 & v, float d);
+        Plane(const Vector3 & normal, const Vector3 & point);
+        Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2);
+
+        const Plane & operator=(const Plane & v);
+
+        Vector3 vector() const;
+        float offset() const;
+        Vector3 normal() const;
+
+        void operator*=(float s);
+
+        Vector4 v;
+    };
+
+    Plane transformPlane(const Matrix &, const Plane &);
+
+    Vector3 planeIntersection(const Plane & a, const Plane & b, const Plane & c);
+
+
+} // nv namespace
+
+#endif // NV_MATH_PLANE_H
diff --git a/thirdparty/thekla_atlas/nvmath/Plane.inl b/thirdparty/thekla_atlas/nvmath/Plane.inl
new file mode 100644
index 0000000000..2277e38cd5
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Plane.inl
@@ -0,0 +1,50 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MATH_PLANE_INL
+#define NV_MATH_PLANE_INL
+
+#include "Plane.h"
+#include "Vector.inl"
+
+namespace nv
+{
+    inline Plane::Plane() {}
+    inline Plane::Plane(float x, float y, float z, float w) : v(x, y, z, w) {}
+    inline Plane::Plane(const Vector4 & v) : v(v) {}
+    inline Plane::Plane(const Vector3 & v, float d) : v(v, d) {}
+    inline Plane::Plane(const Vector3 & normal, const Vector3 & point) : v(normal, -dot(normal, point)) {}
+    inline Plane::Plane(const Vector3 & v0, const Vector3 & v1, const Vector3 & v2) {
+        Vector3 n = cross(v1-v0, v2-v0);
+        float d = -dot(n, v0);
+        v = Vector4(n, d);
+    }
+
+    inline const Plane & Plane::operator=(const Plane & p) { v = p.v; return *this; }
+
+    inline Vector3 Plane::vector() const { return v.xyz(); }
+    inline float Plane::offset() const { return v.w; }
+    inline Vector3 Plane::normal() const { return normalize(vector(), 0.0f); }
+
+    // Normalize plane.
+    inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON)
+    {
+        const float len = length(plane.vector());
+        const float inv = isZero(len, epsilon) ? 0 : 1.0f / len;
+        return Plane(plane.v * inv);
+    }
+
+    // Get the signed distance from the given point to this plane.
+    inline float distance(const Plane & plane, const Vector3 & point)
+    {
+        return dot(plane.vector(), point) + plane.offset();
+    }
+
+    inline void Plane::operator*=(float s)
+    {
+        v *= s;
+    }
+
+} // nv namespace
+
+#endif // NV_MATH_PLANE_H
diff --git a/thirdparty/thekla_atlas/nvmath/ProximityGrid.cpp b/thirdparty/thekla_atlas/nvmath/ProximityGrid.cpp
new file mode 100644
index 0000000000..3553e48f64
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/ProximityGrid.cpp
@@ -0,0 +1,158 @@
+#include "ProximityGrid.h"
+
+#include "Box.inl"
+#include "Morton.h"
+
+
+using namespace nv;
+
+ProximityGrid::ProximityGrid() {
+}
+
+void ProximityGrid::reset() {
+    cellArray.clear();
+}
+
+void ProximityGrid::init(const Array<Vector3> & pointArray) {
+
+	// Compute bounding box.
+    Box box;
+	box.clearBounds();
+	
+    const uint count = pointArray.count();
+
+    for (uint i = 0; i < count; i++) {
+		box.addPointToBounds(pointArray[i]);
+	}
+
+    init(box, count);
+
+	// Insert all points.
+	for (uint i = 0; i < count; i++) {
+        add(pointArray[i], i);
+    }
+}
+
+
+void ProximityGrid::init(const Box & box, uint count) {
+    reset();
+ 
+    // Determine grid size.
+    float cellWidth;
+
+    Vector3 diagonal = box.extents() * 2.f;
+    float volume = box.volume();
+
+    if (equal(volume, 0)) {
+        // Degenerate box, treat like a quad.
+        Vector2 quad;
+        if (diagonal.x < diagonal.y && diagonal.x < diagonal.z) {
+            quad.x = diagonal.y;
+            quad.y = diagonal.z;
+        }
+        else if (diagonal.y < diagonal.x && diagonal.y < diagonal.z) {
+            quad.x = diagonal.x;
+            quad.y = diagonal.z;
+        }
+        else {
+            quad.x = diagonal.x;
+            quad.y = diagonal.y;
+        }
+
+        float cellArea = quad.x * quad.y / count;
+        cellWidth = sqrtf(cellArea); // pow(cellArea, 1.0f / 2.0f);
+    }
+    else {
+        // Ideally we want one cell per point.
+        float cellVolume = volume / count;
+        cellWidth = pow(cellVolume, 1.0f / 3.0f);
+    }
+
+    nvDebugCheck(cellWidth != 0);
+
+    sx = max(1, ftoi_ceil(diagonal.x / cellWidth));
+    sy = max(1, ftoi_ceil(diagonal.y / cellWidth));
+    sz = max(1, ftoi_ceil(diagonal.z / cellWidth));
+
+    invCellSize.x = float(sx) / diagonal.x;
+    invCellSize.y = float(sy) / diagonal.y;
+    invCellSize.z = float(sz) / diagonal.z;
+
+	cellArray.resize(sx * sy * sz);
+
+    corner = box.minCorner; // @@ Align grid better?
+}
+
+// Gather all points inside the given sphere.
+// Radius is assumed to be small, so we don't bother culling the cells.
+void ProximityGrid::gather(const Vector3 & position, float radius, Array<uint> & indexArray) {
+    int x0 = index_x(position.x - radius);
+    int x1 = index_x(position.x + radius);
+
+    int y0 = index_y(position.y - radius);
+    int y1 = index_y(position.y + radius);
+
+    int z0 = index_z(position.z - radius);
+    int z1 = index_z(position.z + radius);
+
+    for (int z = z0; z <= z1; z++) {
+        for (int y = y0; y <= y1; y++) {
+            for (int x = x0; x <= x1; x++) {
+                int idx = index(x, y, z);
+                indexArray.append(cellArray[idx].indexArray);
+            }
+        }
+    }
+}
+
+
+uint32 ProximityGrid::mortonCount() const {
+    uint64 s = U64(max3(sx, sy, sz));
+    s = nextPowerOfTwo(s);
+    
+    if (s > 1024) {
+        return U32(s * s * min3(sx, sy, sz));
+    }
+
+    return U32(s * s * s);
+}
+
+int ProximityGrid::mortonIndex(uint32 code) const {
+    uint32 x, y, z;
+
+    uint s = U32(max3(sx, sy, sz));
+    if (s > 1024) {
+        // Use layered two-dimensional morton order.
+        s = nextPowerOfTwo(s);
+        uint layer = code / (s * s);
+        code = code % (s * s);
+
+        uint layer_count = U32(min3(sx, sy, sz));
+        if (sx == layer_count) {
+            x = layer;
+            y = decodeMorton2X(code);
+            z = decodeMorton2Y(code);
+        }
+        else if (sy == layer_count) {
+            x = decodeMorton2Y(code); 
+            y = layer; 
+            z = decodeMorton2X(code);
+        }
+        else /*if (sz == layer_count)*/ {
+            x = decodeMorton2X(code);
+            y = decodeMorton2Y(code);
+            z = layer;
+        }
+    }
+    else {
+        x = decodeMorton3X(code);
+        y = decodeMorton3Y(code);
+        z = decodeMorton3Z(code);
+    }
+
+    if (x >= U32(sx) || y >= U32(sy) || z >= U32(sz)) {
+        return -1;
+    }
+
+    return index(x, y, z);
+}
diff --git a/thirdparty/thekla_atlas/nvmath/ProximityGrid.h b/thirdparty/thekla_atlas/nvmath/ProximityGrid.h
new file mode 100644
index 0000000000..a21bb3bd68
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/ProximityGrid.h
@@ -0,0 +1,99 @@
+#pragma once
+#ifndef NV_MATH_PROXIMITYGRID_H
+#define NV_MATH_PROXIMITYGRID_H
+
+#include "Vector.h"
+#include "ftoi.h"
+
+#include "nvcore/Array.inl"
+
+
+// A simple, dynamic proximity grid based on Jon's code.
+// Instead of storing pointers here I store indices.
+
+namespace nv {
+
+    class Box;
+
+    struct Cell {
+        Array<uint> indexArray;
+    };
+
+    struct ProximityGrid {
+        ProximityGrid();
+
+        void reset();
+        void init(const Array<Vector3> & pointArray);
+        void init(const Box & box, uint count);
+
+        int index_x(float x) const;
+        int index_y(float y) const;
+        int index_z(float z) const;
+        int index(int x, int y, int z) const;
+        int index(const Vector3 & pos) const;
+        
+        uint32 mortonCount() const;
+        int mortonIndex(uint32 code) const;
+
+        void add(const Vector3 & pos, uint key);
+        bool remove(const Vector3 & pos, uint key);
+
+        void gather(const Vector3 & pos, float radius, Array<uint> & indices);
+
+        Array<Cell> cellArray;
+
+        Vector3 corner;
+        Vector3 invCellSize;
+        int sx, sy, sz;
+    };
+
+    // For morton traversal, do:
+    // for (int code = 0; code < mortonCount(); code++) {
+    //   int idx = mortonIndex(code);
+    //   if (idx < 0) continue;
+    // }
+
+
+
+    inline int ProximityGrid::index_x(float x) const {
+        return clamp(ftoi_floor((x - corner.x) * invCellSize.x),  0, sx-1);
+    }
+
+    inline int ProximityGrid::index_y(float y) const {
+        return clamp(ftoi_floor((y - corner.y) * invCellSize.y),  0, sy-1);
+    }
+
+    inline int ProximityGrid::index_z(float z) const {
+        return clamp(ftoi_floor((z - corner.z) * invCellSize.z),  0, sz-1);
+    }
+
+    inline int ProximityGrid::index(int x, int y, int z) const {
+        nvDebugCheck(x >= 0 && x < sx);
+        nvDebugCheck(y >= 0 && y < sy);
+        nvDebugCheck(z >= 0 && z < sz);
+        int idx = (z * sy + y) * sx + x;
+        nvDebugCheck(idx >= 0 && uint(idx) < cellArray.count());
+        return idx;
+    }
+
+    inline int ProximityGrid::index(const Vector3 & pos) const {
+        int x = index_x(pos.x);
+        int y = index_y(pos.y);
+        int z = index_z(pos.z);
+        return index(x, y, z);
+    }
+
+
+    inline void ProximityGrid::add(const Vector3 & pos, uint key) {
+        uint idx = index(pos);
+        cellArray[idx].indexArray.append(key);
+    }
+
+    inline bool ProximityGrid::remove(const Vector3 & pos, uint key) {
+        uint idx = index(pos);
+        return cellArray[idx].indexArray.remove(key);
+    }
+
+} // nv namespace
+
+#endif // NV_MATH_PROXIMITYGRID_H
diff --git a/thirdparty/thekla_atlas/nvmath/Quaternion.h b/thirdparty/thekla_atlas/nvmath/Quaternion.h
new file mode 100644
index 0000000000..dc5219e5e4
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Quaternion.h
@@ -0,0 +1,213 @@
+// This code is in the public domain -- castano@gmail.com
+
+#pragma once
+#ifndef NV_MATH_QUATERNION_H
+#define NV_MATH_QUATERNION_H
+
+#include "nvmath/nvmath.h"
+#include "nvmath/Vector.inl" // @@ Do not include inl files from header files.
+#include "nvmath/Matrix.h"
+
+namespace nv
+{
+
+    class NVMATH_CLASS Quaternion
+    {
+    public:
+        typedef Quaternion const & Arg;
+
+        Quaternion();
+        explicit Quaternion(float f);
+        Quaternion(float x, float y, float z, float w);
+        Quaternion(Vector4::Arg v);
+
+        const Quaternion & operator=(Quaternion::Arg v);
+
+        Vector4 asVector() const;
+
+        union {
+            struct {
+                float x, y, z, w;
+            };
+            float component[4];
+        };
+    };
+
+    inline Quaternion::Quaternion() {}
+    inline Quaternion::Quaternion(float f) : x(f), y(f), z(f), w(f) {}
+    inline Quaternion::Quaternion(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
+    inline Quaternion::Quaternion(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
+
+    // @@ Move all these to Quaternion.inl!
+
+    inline const Quaternion & Quaternion::operator=(Quaternion::Arg v) { 
+        x = v.x;
+        y = v.y;
+        z = v.z;
+        w = v.w;
+        return *this;
+    }
+
+    inline Vector4 Quaternion::asVector() const { return Vector4(x, y, z, w); }
+
+    inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
+    {
+        return Quaternion(
+            + a.x*b.w + a.y*b.z - a.z*b.y + a.w*b.x,
+            - a.x*b.z + a.y*b.w + a.z*b.x + a.w*b.y,
+            + a.x*b.y - a.y*b.x + a.z*b.w + a.w*b.z,
+            - a.x*b.x - a.y*b.y - a.z*b.z + a.w*b.w);
+    }
+
+    inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b)
+    {
+        return Quaternion(
+            + a.y*b.z - a.z*b.y + a.w*b.x,
+            - a.x*b.z           + a.z*b.x + a.w*b.y,
+            + a.x*b.y - a.y*b.x           + a.w*b.z,
+            - a.x*b.x - a.y*b.y - a.z*b.z );
+    }
+
+    inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b)
+    {
+        return Quaternion(
+            + a.x*b.w + a.y*b.z - a.z*b.y,
+            - a.x*b.z + a.y*b.w + a.z*b.x,
+            + a.x*b.y - a.y*b.x + a.z*b.w,
+            - a.x*b.x - a.y*b.y - a.z*b.z);
+    }
+
+    inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b)
+    {
+        return mul(a, b);
+    }
+
+    inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b)
+    {
+        return mul(a, b);
+    }
+
+    inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b)
+    {
+        return mul(a, b);
+    }
+
+
+    inline Quaternion scale(Quaternion::Arg q, float s)
+    {
+        return scale(q.asVector(), s);
+    }
+    inline Quaternion operator *(Quaternion::Arg q, float s)
+    {
+        return scale(q, s);
+    }
+    inline Quaternion operator *(float s, Quaternion::Arg q)
+    {
+        return scale(q, s);
+    }
+
+    inline Quaternion scale(Quaternion::Arg q, Vector4::Arg s)
+    {
+        return scale(q.asVector(), s);
+    }
+    /*inline Quaternion operator *(Quaternion::Arg q, Vector4::Arg s)
+    {
+    return scale(q, s);
+    }
+    inline Quaternion operator *(Vector4::Arg s, Quaternion::Arg q)
+    {
+    return scale(q, s);
+    }*/
+
+    inline Quaternion conjugate(Quaternion::Arg q)
+    {
+        return scale(q, Vector4(-1, -1, -1, 1));
+    }
+
+    inline float length(Quaternion::Arg q)
+    {
+        return length(q.asVector());
+    }
+
+    inline bool isNormalized(Quaternion::Arg q, float epsilon = NV_NORMAL_EPSILON)
+    {
+        return equal(length(q), 1, epsilon);
+    }
+
+    inline Quaternion normalize(Quaternion::Arg q, float epsilon = NV_EPSILON)
+    {
+        float l = length(q);
+        nvDebugCheck(!isZero(l, epsilon));
+        Quaternion n = scale(q, 1.0f / l);
+        nvDebugCheck(isNormalized(n));
+        return n;
+    }
+
+    inline Quaternion inverse(Quaternion::Arg q)
+    {
+        return conjugate(normalize(q));
+    }
+
+    /// Create a rotation quaternion for @a angle alpha around normal vector @a v.
+    inline Quaternion axisAngle(Vector3::Arg v, float alpha)
+    {
+        float s = sinf(alpha * 0.5f);
+        float c = cosf(alpha * 0.5f);
+        return Quaternion(Vector4(v * s, c));
+    }
+
+    inline Vector3 imag(Quaternion::Arg q)
+    {
+        return q.asVector().xyz();
+    }
+
+    inline float real(Quaternion::Arg q)
+    {
+        return q.w;
+    }
+
+
+    /// Transform vector.
+    inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v)
+    {
+        //Quaternion t = q * v * conjugate(q);
+        //return imag(t);
+
+        // Faster method by Fabian Giesen and others:
+        // http://molecularmusings.wordpress.com/2013/05/24/a-faster-quaternion-vector-multiplication/
+        // http://mollyrocket.com/forums/viewtopic.php?t=833&sid=3a84e00a70ccb046cfc87ac39881a3d0
+        
+        Vector3 t = 2 * cross(imag(q), v);
+        return v + q.w * t + cross(imag(q), t);
+    }
+
+    // @@ Not tested.
+    // From Insomniac's Mike Day:
+    // http://www.insomniacgames.com/converting-a-rotation-matrix-to-a-quaternion/
+    inline Quaternion fromMatrix(const Matrix & m) {
+        if (m(2, 2) < 0) {
+            if (m(0, 0) < m(1,1)) {
+                float t = 1 - m(0, 0) - m(1, 1) - m(2, 2);
+                return Quaternion(t, m(0,1)+m(1,0), m(2,0)+m(0,2), m(1,2)-m(2,1));
+            }
+            else {
+                float t = 1 - m(0, 0) + m(1, 1) - m(2, 2);
+                return Quaternion(t, m(0,1) + m(1,0), m(1,2) + m(2,1), m(2,0) - m(0,2));
+            }
+        }
+        else {
+            if (m(0, 0) < -m(1, 1)) {
+                float t = 1 - m(0, 0) - m(1, 1) + m(2, 2);
+                return Quaternion(t, m(2,0) + m(0,2), m(1,2) + m(2,1), m(0,1) - m(1,0));
+            }
+            else {
+                float t = 1 + m(0, 0) + m(1, 1) + m(2, 2);
+                return Quaternion(t, m(1,2) - m(2,1), m(2,0) - m(0,2), m(0,1) - m(1,0));
+            }
+        }
+    }
+
+
+} // nv namespace
+
+#endif // NV_MATH_QUATERNION_H
diff --git a/thirdparty/thekla_atlas/nvmath/Random.cpp b/thirdparty/thekla_atlas/nvmath/Random.cpp
new file mode 100644
index 0000000000..1a60e7f5e7
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Random.cpp
@@ -0,0 +1,54 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include <nvmath/Random.h>
+#include <time.h>
+
+using namespace nv;
+
+// Statics
+const uint16 Rand48::a0 = 0xE66D; 
+const uint16 Rand48::a1 = 0xDEEC; 
+const uint16 Rand48::a2 = 0x0005;
+const uint16 Rand48::c0 = 0x000B;
+
+
+/// Get a random seed based on the current time.
+uint Rand::randomSeed()
+{
+    return (uint)time(NULL);
+}
+
+
+void MTRand::initialize( uint32 seed )
+{
+    // Initialize generator state with seed
+    // See Knuth TAOCP Vol 2, 3rd Ed, p.106 for multiplier.
+    // In previous versions, most significant bits (MSBs) of the seed affect
+    // only MSBs of the state array.  Modified 9 Jan 2002 by Makoto Matsumoto.
+    uint32 *s = state;
+    uint32 *r = state;
+    int i = 1;
+    *s++ = seed & 0xffffffffUL;
+    for( ; i < N; ++i )
+    {
+        *s++ = ( 1812433253UL * ( *r ^ (*r >> 30) ) + i ) & 0xffffffffUL;
+        r++;
+    }
+}
+
+
+void MTRand::reload()
+{
+    // Generate N new values in state
+    // Made clearer and faster by Matthew Bellew (matthew.bellew@home.com)
+    uint32 *p = state;
+    int i;
+    for( i = N - M; i--; ++p )
+        *p = twist( p[M], p[0], p[1] );
+    for( i = M; --i; ++p )
+        *p = twist( p[M-N], p[0], p[1] );
+    *p = twist( p[M-N], p[0], state[0] );
+
+    left = N, next = state;
+}
+
diff --git a/thirdparty/thekla_atlas/nvmath/Random.h b/thirdparty/thekla_atlas/nvmath/Random.h
new file mode 100644
index 0000000000..223292706a
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Random.h
@@ -0,0 +1,376 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_RANDOM_H
+#define NV_MATH_RANDOM_H
+
+#include "nvmath.h"
+#include "nvcore/Utils.h" // nextPowerOfTwo
+
+
+namespace nv
+{
+
+    /// Interface of the random number generators.
+    class Rand
+    {
+    public:
+
+        virtual ~Rand() {}
+
+        enum time_e { Time };
+
+        /// Provide a new seed.
+        virtual void seed( uint s ) { /* empty */ };
+
+        /// Get an integer random number.
+        virtual uint get() = 0;
+
+        /// Get a random number on [0, max] interval.
+        uint getRange( uint max )
+        {
+            if (max == 0) return 0;
+            if (max == NV_UINT32_MAX) return get();
+
+            const uint np2 = nextPowerOfTwo( max+1 ); // @@ This fails if max == NV_UINT32_MAX
+            const uint mask = np2 - 1;
+            uint n;
+            do { n = get() & mask; } while( n > max );
+            return n;
+        }
+
+        /// Random number on [0.0, 1.0] interval.
+        float getFloat()
+        {
+            union
+            {
+                uint32 i;
+                float f;
+            } pun;
+
+            pun.i = 0x3f800000UL | (get() & 0x007fffffUL);
+            return pun.f - 1.0f;
+        }
+
+        float getFloatRange(float min, float max) {
+            return getFloat() * (max - min) + min;
+        }
+
+        /*
+        /// Random number on [0.0, 1.0] interval.
+        double getReal()
+        {
+        return double(get()) * (1.0/4294967295.0); // 2^32-1
+        }
+
+        /// Random number on [0.0, 1.0) interval.
+        double getRealExclusive()
+        {
+        return double(get()) * (1.0/4294967296.0); // 2^32
+        }
+        */
+
+        /// Get the max value of the random number.
+        uint max() const { return NV_UINT32_MAX; }
+
+        // Get a random seed.
+        static uint randomSeed();
+
+    };
+
+
+    /// Very simple random number generator with low storage requirements.
+    class SimpleRand : public Rand
+    {
+    public:
+
+        /// Constructor that uses the current time as the seed.
+        SimpleRand( time_e )
+        {
+            seed(randomSeed());
+        }
+
+        /// Constructor that uses the given seed.
+        SimpleRand( uint s = 0 )
+        {
+            seed(s);
+        }
+
+        /// Set the given seed.
+        virtual void seed( uint s )
+        {
+            current = s;
+        }
+
+        /// Get a random number.
+        virtual uint get()
+        {
+            return current = current * 1103515245 + 12345;
+        }
+
+    private:
+
+        uint current;
+
+    };
+
+
+    /// Mersenne twister random number generator.
+    class MTRand : public Rand
+    {
+    public:
+
+        enum { N = 624 };       // length of state vector
+        enum { M = 397 };
+
+        /// Constructor that uses the current time as the seed.
+        MTRand( time_e )
+        {
+            seed(randomSeed());
+        }
+
+        /// Constructor that uses the given seed.
+        MTRand( uint s = 0 )
+        {
+            seed(s);
+        }
+
+        /// Constructor that uses the given seeds.
+        NVMATH_API MTRand( const uint * seed_array, uint length );
+
+
+        /// Provide a new seed.
+        virtual void seed( uint s )
+        {
+            initialize(s);
+            reload();
+        }	
+
+        /// Get a random number between 0 - 65536.
+        virtual uint get()
+        {
+            // Pull a 32-bit integer from the generator state
+            // Every other access function simply transforms the numbers extracted here
+            if( left == 0 ) { 
+                reload(); 
+            }
+            left--;
+
+            uint s1;
+            s1 = *next++;
+            s1 ^= (s1 >> 11);
+            s1 ^= (s1 <<  7) & 0x9d2c5680U;
+            s1 ^= (s1 << 15) & 0xefc60000U;
+            return ( s1 ^ (s1 >> 18) );		
+        };
+
+
+    private:
+
+        NVMATH_API void initialize( uint32 seed );
+        NVMATH_API void reload();
+
+        uint hiBit( uint u ) const { return u & 0x80000000U; }
+        uint loBit( uint u ) const { return u & 0x00000001U; }
+        uint loBits( uint u ) const { return u & 0x7fffffffU; }
+        uint mixBits( uint u, uint v ) const { return hiBit(u) | loBits(v); }
+        uint twist( uint m, uint s0, uint s1 ) const { return m ^ (mixBits(s0,s1)>>1) ^ ((~loBit(s1)+1) & 0x9908b0dfU); }
+
+    private:
+
+        uint state[N];	// internal state
+        uint * next;	// next value to get from state
+        int left;		// number of values left before reload needed		
+
+    };
+
+
+
+    /** George Marsaglia's random number generator. 
+    * Code based on Thatcher Ulrich public domain source code:
+    * http://cvs.sourceforge.net/viewcvs.py/tu-testbed/tu-testbed/base/tu_random.cpp?rev=1.7&view=auto
+    *
+    * PRNG code adapted from the complimentary-multiply-with-carry
+    * code in the article: George Marsaglia, "Seeds for Random Number
+    * Generators", Communications of the ACM, May 2003, Vol 46 No 5,
+    * pp90-93.
+    * 
+    * The article says:
+    * 
+    * "Any one of the choices for seed table size and multiplier will
+    * provide a RNG that has passed extensive tests of randomness,
+    * particularly those in [3], yet is simple and fast --
+    * approximately 30 million random 32-bit integers per second on a
+    * 850MHz PC.  The period is a*b^n, where a is the multiplier, n
+    * the size of the seed table and b=2^32-1.  (a is chosen so that
+    * b is a primitive root of the prime a*b^n + 1.)"
+    * 
+    * [3] Marsaglia, G., Zaman, A., and Tsang, W.  Toward a universal
+    * random number generator.  _Statistics and Probability Letters
+    * 8_ (1990), 35-39.
+    */
+    class GMRand : public Rand
+    {
+    public:
+
+        enum { SEED_COUNT = 8 };
+
+        //	const uint64 a = 123471786;		// for SEED_COUNT=1024
+        //	const uint64 a = 123554632;		// for SEED_COUNT=512
+        //	const uint64 a = 8001634;		// for SEED_COUNT=255
+        //	const uint64 a = 8007626;		// for SEED_COUNT=128
+        //	const uint64 a = 647535442;		// for SEED_COUNT=64
+        //	const uint64 a = 547416522;		// for SEED_COUNT=32
+        //	const uint64 a = 487198574;		// for SEED_COUNT=16
+        //	const uint64 a = 716514398U;	// for SEED_COUNT=8
+        enum { a = 716514398U };
+
+
+        GMRand( time_e )
+        {
+            seed(randomSeed());
+        }
+
+        GMRand(uint s = 987654321)
+        {
+            seed(s);
+        }
+
+
+        /// Provide a new seed.
+        virtual void seed( uint s )
+        {
+            c = 362436;
+            i = SEED_COUNT - 1;
+
+            for(int i = 0; i < SEED_COUNT; i++) {
+                s = s ^ (s << 13);
+                s = s ^ (s >> 17);
+                s = s ^ (s << 5);
+                Q[i] = s;
+            }
+        }
+
+        /// Get a random number between 0 - 65536.
+        virtual uint get()
+        {
+            const uint32 r = 0xFFFFFFFE;		
+
+            uint64 t;
+            uint32 x;
+
+            i = (i + 1) & (SEED_COUNT - 1);
+            t = a * Q[i] + c;
+            c = uint32(t >> 32);
+            x = uint32(t + c);
+
+            if( x < c ) {
+                x++;
+                c++;
+            }
+
+            uint32  val = r - x;
+            Q[i] = val;
+            return val;
+        };
+
+
+    private:
+
+        uint32 c;
+        uint32 i;
+        uint32 Q[8];
+
+    };
+
+
+    /** Random number implementation from the GNU Sci. Lib. (GSL).
+    * Adapted from Nicholas Chapman version:
+    * 
+    * Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough
+    * This is the Unix rand48() generator. The generator returns the
+    * upper 32 bits from each term of the sequence,
+    * 
+    * x_{n+1} = (a x_n + c) mod m 
+    * 
+    * using 48-bit unsigned arithmetic, with a = 0x5DEECE66D , c = 0xB
+    * and m = 2^48. The seed specifies the upper 32 bits of the initial
+    * value, x_1, with the lower 16 bits set to 0x330E.
+    * 
+    * The theoretical value of x_{10001} is 244131582646046.
+    * 
+    * The period of this generator is ? FIXME (probably around 2^48). 
+    */
+    class Rand48 : public Rand
+    {
+    public:
+
+        Rand48( time_e )
+        {
+            seed(randomSeed());
+        }
+
+        Rand48( uint s = 0x1234ABCD )
+        {
+            seed(s);
+        }	
+
+
+        /** Set the given seed. */
+        virtual void seed( uint s ) {
+            vstate.x0 = 0x330E;
+            vstate.x1 = uint16(s & 0xFFFF);
+            vstate.x2 = uint16((s >> 16) & 0xFFFF);
+        }
+
+        /** Get a random number. */
+        virtual uint get() {
+
+            advance();
+
+            uint x1 = vstate.x1;
+            uint x2 = vstate.x2;
+            return (x2 << 16) + x1;
+        }
+
+
+    private:
+
+        void advance()
+        {
+            /* work with unsigned long ints throughout to get correct integer
+            promotions of any unsigned short ints */
+            const uint32 x0 = vstate.x0;
+            const uint32 x1 = vstate.x1;
+            const uint32 x2 = vstate.x2;
+
+            uint32 a;
+            a = a0 * x0 + c0;
+
+            vstate.x0 = uint16(a & 0xFFFF);
+            a >>= 16;
+
+            /* although the next line may overflow we only need the top 16 bits
+            in the following stage, so it does not matter */
+
+            a += a0 * x1 + a1 * x0; 
+            vstate.x1 = uint16(a & 0xFFFF);
+
+            a >>= 16;
+            a += a0 * x2 + a1 * x1 + a2 * x0;
+            vstate.x2 = uint16(a & 0xFFFF);
+        }
+
+
+    private:	
+        NVMATH_API static const uint16 a0, a1, a2, c0;
+
+        struct rand48_state_t { 
+            uint16 x0, x1, x2; 
+        } vstate;
+
+    };
+
+} // nv namespace
+
+#endif // NV_MATH_RANDOM_H
diff --git a/thirdparty/thekla_atlas/nvmath/Solver.cpp b/thirdparty/thekla_atlas/nvmath/Solver.cpp
new file mode 100644
index 0000000000..191793ee29
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Solver.cpp
@@ -0,0 +1,744 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "Solver.h"
+#include "Sparse.h"
+
+#include "nvcore/Array.inl"
+
+using namespace nv;
+
+namespace
+{
+    class Preconditioner
+    {
+    public:
+        // Virtual dtor.
+        virtual ~Preconditioner() { }
+
+        // Apply preconditioning step.
+        virtual void apply(const FullVector & x, FullVector & y) const = 0;
+    };
+
+
+    // Jacobi preconditioner.
+    class JacobiPreconditioner : public Preconditioner
+    {
+    public:
+
+        JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
+        {
+            nvCheck(M.isSquare());
+
+            for(uint x = 0; x < M.width(); x++)
+            {
+                float elem = M.getCoefficient(x, x);
+                //nvDebugCheck( elem != 0.0f ); // This can be zero in the presence of zero area triangles.
+
+                if (symmetric) 
+                {
+                    m_inverseDiagonal[x] = (elem != 0) ? 1.0f / sqrtf(fabsf(elem)) : 1.0f;
+                }
+                else 
+                {
+                    m_inverseDiagonal[x] = (elem != 0) ? 1.0f / elem : 1.0f;
+                }
+            }
+        }
+
+        void apply(const FullVector & x, FullVector & y) const
+        {
+            nvDebugCheck(x.dimension() == m_inverseDiagonal.dimension());
+            nvDebugCheck(y.dimension() == m_inverseDiagonal.dimension());
+
+            // @@ Wrap vector component-wise product into a separate function.
+            const uint D = x.dimension();
+            for (uint i = 0; i < D; i++)
+            {
+                y[i] = m_inverseDiagonal[i] * x[i];
+            }
+        }
+
+    private:
+
+        FullVector m_inverseDiagonal;
+
+    };
+
+} // namespace
+
+
+static bool ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
+static bool ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
+
+
+// Solve the symmetric system: At�A�x = At�b
+bool nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
+{
+    nvDebugCheck(A.width() == x.dimension());
+    nvDebugCheck(A.height() == b.dimension());
+    nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
+
+    const uint D = A.width();
+
+    SparseMatrix At(A.height(), A.width());
+    transpose(A, At);
+
+    FullVector Atb(D);
+    //mult(Transposed, A, b, Atb);
+    mult(At, b, Atb);
+
+    SparseMatrix AtA(D);
+    //mult(Transposed, A, NoTransposed, A, AtA);
+    mult(At, A, AtA);
+
+    return SymmetricSolver(AtA, Atb, x, epsilon);
+}
+
+
+// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
+bool nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
+{
+    nvDebugCheck(A.width() == x.dimension());
+    nvDebugCheck(A.height() == b.dimension());
+    nvDebugCheck(A.height() >= A.width() - lockedCount);
+
+    // @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
+
+    const uint D = A.width() - lockedCount;
+    nvDebugCheck(D > 0);
+
+    // Compute: b - Al * xl
+    FullVector b_Alxl(b);
+
+    for (uint y = 0; y < A.height(); y++)
+    {
+        const uint count = A.getRow(y).count();
+        for (uint e = 0; e < count; e++)
+        {
+            uint column = A.getRow(y)[e].x;
+
+            bool isFree = true;
+            for (uint i = 0; i < lockedCount; i++) 
+            {
+                isFree &= (lockedParameters[i] != column);
+            }
+
+            if (!isFree)
+            {
+                b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
+            }
+        }
+    }
+
+    // Remove locked columns from A.
+    SparseMatrix Af(D, A.height());
+
+    for (uint y = 0; y < A.height(); y++)
+    {
+        const uint count = A.getRow(y).count();
+        for (uint e = 0; e < count; e++)
+        {
+            uint column = A.getRow(y)[e].x;
+            uint ix = column;
+
+            bool isFree = true;
+            for (uint i = 0; i < lockedCount; i++) 
+            {
+                isFree &= (lockedParameters[i] != column);
+                if (column > lockedParameters[i]) ix--; // shift columns
+            }
+
+            if (isFree)
+            {
+                Af.setCoefficient(ix, y, A.getRow(y)[e].v);
+            }
+        }
+    }
+
+    // Remove elements from x
+    FullVector xf(D);
+
+    for (uint i = 0, j = 0; i < A.width(); i++)
+    {
+        bool isFree = true;
+        for (uint l = 0; l < lockedCount; l++) 
+        {
+            isFree &= (lockedParameters[l] != i);
+        }
+
+        if (isFree)
+        {
+            xf[j++] = x[i];
+        }
+    }
+
+    // Solve reduced system.
+    bool result = LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
+
+    // Copy results back to x.
+    for (uint i = 0, j = 0; i < A.width(); i++)
+    {
+        bool isFree = true;
+        for (uint l = 0; l < lockedCount; l++) 
+        {
+            isFree &= (lockedParameters[l] != i);
+        }
+
+        if (isFree)
+        {
+            x[i] = xf[j++];
+        }
+    }
+
+    return result;
+}
+
+
+bool nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
+{
+    nvDebugCheck(A.height() == A.width());
+    nvDebugCheck(A.height() == b.dimension());
+    nvDebugCheck(b.dimension() == x.dimension());
+
+    JacobiPreconditioner jacobi(A, true);
+    return ConjugateGradientSolver(jacobi, A, b, x, epsilon);
+
+    //return ConjugateGradientSolver(A, b, x, epsilon);
+}
+
+
+/**
+* Compute the solution of the sparse linear system Ab=x using the Conjugate
+* Gradient method.
+*
+* Solving sparse linear systems:
+* (1)		A�x = b
+* 
+* The conjugate gradient algorithm solves (1) only in the case that A is 
+* symmetric and positive definite. It is based on the idea of minimizing the 
+* function
+* 
+* (2)		f(x) = 1/2�x�A�x - b�x
+* 
+* This function is minimized when its gradient
+* 
+* (3)		df = A�x - b
+* 
+* is zero, which is equivalent to (1). The minimization is carried out by 
+* generating a succession of search directions p.k and improved minimizers x.k. 
+* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k�p.k), 
+* and x.k+1 is set equal to the new point x.k + alfa.k�p.k. The p.k and x.k are 
+* built up in such a way that x.k+1 is also the minimizer of f over the whole
+* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N 
+* iterations you arrive at the minimizer over the entire vector space, i.e., the 
+* solution to (1).
+*
+* For a really good explanation of the method see:
+*
+* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
+* Jonhathan Richard Shewchuk.
+*
+**/
+/*static*/ bool ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
+{
+    nvDebugCheck( A.isSquare() );
+    nvDebugCheck( A.width() == b.dimension() );
+    nvDebugCheck( A.width() == x.dimension() );
+
+    int i = 0;
+    const int D = A.width();
+    const int i_max = 4 * D;   // Convergence should be linear, but in some cases, it's not.
+
+    FullVector r(D);   // residual
+    FullVector p(D);   // search direction
+    FullVector q(D);   // 
+    float delta_0;
+    float delta_old;
+    float delta_new;
+    float alpha;
+    float beta;
+
+    // r = b - A�x;
+    copy(b, r);
+    sgemv(-1, A, x, 1, r);
+
+    // p = r;
+    copy(r, p);
+
+    delta_new = dot( r, r );
+    delta_0 = delta_new;
+
+    while (i < i_max && delta_new > epsilon*epsilon*delta_0)
+    {
+        i++;
+
+        // q = A�p
+        mult(A, p, q);
+
+        // alpha = delta_new / p�q
+        alpha = delta_new / dot( p, q );
+
+        // x = alfa�p + x
+        saxpy(alpha, p, x);
+
+        if ((i & 31) == 0) // recompute r after 32 steps
+        {
+            // r = b - A�x
+            copy(b, r);
+            sgemv(-1, A, x, 1, r);
+        }
+        else
+        {
+            // r = r - alpha�q
+            saxpy(-alpha, q, r);
+        }
+
+        delta_old = delta_new;
+        delta_new = dot( r, r );
+
+        beta = delta_new / delta_old;
+
+        // p = beta�p + r
+        scal(beta, p);
+        saxpy(1, r, p);
+    }
+
+    return delta_new <= epsilon*epsilon*delta_0;
+}
+
+
+// Conjugate gradient with preconditioner.
+/*static*/ bool ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
+{
+    nvDebugCheck( A.isSquare() );
+    nvDebugCheck( A.width() == b.dimension() );
+    nvDebugCheck( A.width() == x.dimension() );
+
+    int i = 0;
+    const int D = A.width();
+    const int i_max = 4 * D;   // Convergence should be linear, but in some cases, it's not.
+
+    FullVector r(D);    // residual
+    FullVector p(D);    // search direction
+    FullVector q(D);    // 
+    FullVector s(D);    // preconditioned
+    float delta_0;
+    float delta_old;
+    float delta_new;
+    float alpha;
+    float beta;
+
+    // r = b - A�x
+    copy(b, r);
+    sgemv(-1, A, x, 1, r);
+
+
+    // p = M^-1 � r
+    preconditioner.apply(r, p);
+    //copy(r, p);
+
+
+    delta_new = dot(r, p);
+    delta_0 = delta_new;
+
+    while (i < i_max && delta_new > epsilon*epsilon*delta_0)
+    {
+        i++;
+
+        // q = A�p
+        mult(A, p, q);
+
+        // alpha = delta_new / p�q
+        alpha = delta_new / dot(p, q);
+
+        // x = alfa�p + x
+        saxpy(alpha, p, x);
+
+        if ((i & 31) == 0)  // recompute r after 32 steps
+        {			
+            // r = b - A�x
+            copy(b, r);
+            sgemv(-1, A, x, 1, r);
+        }
+        else
+        {
+            // r = r - alfa�q
+            saxpy(-alpha, q, r);
+        }
+
+        // s = M^-1 � r
+        preconditioner.apply(r, s);
+        //copy(r, s);
+
+        delta_old = delta_new;
+        delta_new = dot( r, s );
+
+        beta = delta_new / delta_old;
+
+        // p = s + beta�p
+        scal(beta, p);
+        saxpy(1, s, p);
+    }
+
+    return delta_new <= epsilon*epsilon*delta_0;
+}
+
+
+#if 0 // Nonsymmetric solvers
+
+/** Bi-conjugate gradient method.  */
+MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
+    piDebugCheck( A.IsSquare() );
+    piDebugCheck( A.Width() == b.Dim() );
+    piDebugCheck( A.Width() == x.Dim() );
+
+    int i = 0;
+    const int D = A.Width();
+    const int i_max = 4 * D;
+
+    float resid;
+    float rho_1 = 0;
+    float rho_2 = 0;
+    float alpha;
+    float beta;
+
+    DenseVector r(D);
+    DenseVector rtilde(D);
+    DenseVector p(D);
+    DenseVector ptilde(D);
+    DenseVector q(D);
+    DenseVector qtilde(D);
+    DenseVector tmp(D);	// temporal vector.
+
+    // r = b - A�x;
+    A.Product( x, tmp );
+    r.Sub( b, tmp );
+
+    // rtilde = r
+    rtilde.Set( r );
+
+    // p = r;
+    p.Set( r );
+
+    // ptilde = rtilde
+    ptilde.Set( rtilde );
+
+
+
+    float normb = b.Norm();
+    if( normb == 0.0 ) normb = 1;
+
+    // test convergence
+    resid = r.Norm() / normb;
+    if( resid < epsilon ) {
+        // method converges?
+        return 0;
+    }
+
+
+    while( i < i_max ) {
+
+        i++;
+
+        rho_1 = DenseVectorDotProduct( r, rtilde );
+
+        if( rho_1 == 0 ) {
+            // method fails.
+            return -i;
+        }
+
+        if (i == 1) {
+            p.Set( r );
+            ptilde.Set( rtilde );
+        } 
+        else {
+            beta = rho_1 / rho_2;
+
+            // p = r + beta * p;
+            p.Mad( r, p, beta );
+
+            // ptilde = ztilde + beta * ptilde;
+            ptilde.Mad( rtilde, ptilde, beta );
+        }
+
+        // q = A * p;
+        A.Product( p, q );
+
+        // qtilde = A^t * ptilde;
+        A.TransProduct( ptilde, qtilde );
+
+        alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
+
+        // x += alpha * p;
+        x.Mad( x, p, alpha );
+
+        // r -= alpha * q;
+        r.Mad( r, q, -alpha );
+
+        // rtilde -= alpha * qtilde;
+        rtilde.Mad( rtilde, qtilde, -alpha );
+
+        rho_2 = rho_1;
+
+        // test convergence
+        resid = r.Norm() / normb;
+        if( resid < epsilon ) {
+            // method converges
+            return i;
+        }
+    }
+
+    return i;
+}
+
+
+/** Bi-conjugate gradient stabilized method. */
+int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
+    piDebugCheck( A.IsSquare() );
+    piDebugCheck( A.Width() == b.Dim() );
+    piDebugCheck( A.Width() == x.Dim() );
+
+    int i = 0;
+    const int D = A.Width();
+    const int i_max = 2 * D;
+
+
+    float resid;
+    float rho_1 = 0;
+    float rho_2 = 0;
+    float alpha = 0;
+    float beta = 0;
+    float omega = 0;
+
+    DenseVector p(D);
+    DenseVector phat(D);
+    DenseVector s(D);
+    DenseVector shat(D);
+    DenseVector t(D);
+    DenseVector v(D);
+
+    DenseVector r(D);
+    DenseVector rtilde(D);
+
+    DenseVector tmp(D);
+
+    // r = b - A�x;
+    A.Product( x, tmp );
+    r.Sub( b, tmp );
+
+    // rtilde = r
+    rtilde.Set( r );
+
+
+    float normb = b.Norm();
+    if( normb == 0.0 ) normb = 1;
+
+    // test convergence
+    resid = r.Norm() / normb;
+    if( resid < epsilon ) {
+        // method converges?
+        return 0;
+    }
+
+
+    while( i<i_max ) {
+
+        i++;
+
+        rho_1 = DenseVectorDotProduct( rtilde, r );
+        if( rho_1 == 0 ) {
+            // method fails
+            return -i;
+        }
+
+
+        if( i == 1 ) {
+            p.Set( r );
+        }
+        else {
+            beta = (rho_1 / rho_2) * (alpha / omega);
+
+            // p = r + beta * (p - omega * v);
+            p.Mad( p, v, -omega );
+            p.Mad( r, p, beta );
+        }
+
+        //phat = M.solve(p);
+        phat.Set( p );
+        //Precond( &phat, p );
+
+        //v = A * phat;
+        A.Product( phat, v );
+
+        alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
+
+        // s = r - alpha * v;
+        s.Mad( r, v, -alpha );
+
+
+        resid = s.Norm() / normb;
+        if( resid < epsilon ) {
+            // x += alpha * phat;
+            x.Mad( x, phat, alpha );
+            return i;
+        }
+
+        //shat = M.solve(s);
+        shat.Set( s );
+        //Precond( &shat, s );
+
+        //t = A * shat;
+        A.Product( shat, t );
+
+        omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
+
+        // x += alpha * phat + omega * shat;
+        x.Mad( x, shat, omega );
+        x.Mad( x, phat, alpha );
+
+        //r = s - omega * t;
+        r.Mad( s, t, -omega );
+
+        rho_2 = rho_1;
+
+        resid = r.Norm() / normb;
+        if( resid < epsilon ) {
+            return i;
+        }
+
+        if( omega == 0 ) {
+            return -i;	// ???
+        }
+    }
+
+    return i;
+}
+
+
+/** Bi-conjugate gradient stabilized method. */
+int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
+    piDebugCheck( A.IsSquare() );
+    piDebugCheck( A.Width() == b.Dim() );
+    piDebugCheck( A.Width() == x.Dim() );
+
+    int i = 0;
+    const int D = A.Width();
+    const int i_max = D;
+    //	const int i_max = 1000;
+
+
+    float resid;
+    float rho_1 = 0;
+    float rho_2 = 0;
+    float alpha = 0;
+    float beta = 0;
+    float omega = 0;
+
+    DenseVector p(D);
+    DenseVector phat(D);
+    DenseVector s(D);
+    DenseVector shat(D);
+    DenseVector t(D);
+    DenseVector v(D);
+
+    DenseVector r(D);
+    DenseVector rtilde(D);
+
+    DenseVector tmp(D);
+
+    // r = b - A�x;
+    A.Product( x, tmp );
+    r.Sub( b, tmp );
+
+    // rtilde = r
+    rtilde.Set( r );
+
+
+    float normb = b.Norm();
+    if( normb == 0.0 ) normb = 1;
+
+    // test convergence
+    resid = r.Norm() / normb;
+    if( resid < epsilon ) {
+        // method converges?
+        return 0;
+    }
+
+
+    while( i<i_max ) {
+
+        i++;
+
+        rho_1 = DenseVectorDotProduct( rtilde, r );
+        if( rho_1 == 0 ) {
+            // method fails
+            return -i;
+        }
+
+
+        if( i == 1 ) {
+            p.Set( r );
+        }
+        else {
+            beta = (rho_1 / rho_2) * (alpha / omega);
+
+            // p = r + beta * (p - omega * v);
+            p.Mad( p, v, -omega );
+            p.Mad( r, p, beta );
+        }
+
+        //phat = M.solve(p);
+        //phat.Set( p );
+        M.Precond( &phat, p );
+
+        //v = A * phat;
+        A.Product( phat, v );
+
+        alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
+
+        // s = r - alpha * v;
+        s.Mad( r, v, -alpha );
+
+
+        resid = s.Norm() / normb;
+
+        //printf( "--- Iteration %d: residual = %f\n", i, resid );
+
+        if( resid < epsilon ) {
+            // x += alpha * phat;
+            x.Mad( x, phat, alpha );
+            return i;
+        }
+
+        //shat = M.solve(s);
+        //shat.Set( s );
+        M.Precond( &shat, s );
+
+        //t = A * shat;
+        A.Product( shat, t );
+
+        omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
+
+        // x += alpha * phat + omega * shat;
+        x.Mad( x, shat, omega );
+        x.Mad( x, phat, alpha );
+
+        //r = s - omega * t;
+        r.Mad( s, t, -omega );
+
+        rho_2 = rho_1;
+
+        resid = r.Norm() / normb;
+        if( resid < epsilon ) {
+            return i;
+        }
+
+        if( omega == 0 ) {
+            return -i;	// ???
+        }
+    }
+
+    return i;
+}
+
+#endif
diff --git a/thirdparty/thekla_atlas/nvmath/Solver.h b/thirdparty/thekla_atlas/nvmath/Solver.h
new file mode 100644
index 0000000000..2bbf92736a
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Solver.h
@@ -0,0 +1,24 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_SOLVER_H
+#define NV_MATH_SOLVER_H
+
+#include "nvmath.h"
+
+namespace nv
+{
+    class SparseMatrix;
+    class FullVector;
+
+
+    // Linear solvers.
+    NVMATH_API bool LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
+    NVMATH_API bool LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f);
+    NVMATH_API bool SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
+    //NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
+
+} // nv namespace
+
+
+#endif // NV_MATH_SOLVER_H
diff --git a/thirdparty/thekla_atlas/nvmath/Sparse.cpp b/thirdparty/thekla_atlas/nvmath/Sparse.cpp
new file mode 100644
index 0000000000..421e7ee022
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Sparse.cpp
@@ -0,0 +1,889 @@
+// This code is in the public domain -- Ignacio Casta�o <castanyo@yahoo.es>
+
+#include "Sparse.h"
+#include "KahanSum.h"
+
+#include "nvcore/Array.inl"
+
+#define USE_KAHAN_SUM 0
+
+
+using namespace nv;
+
+
+FullVector::FullVector(uint dim)
+{ 
+    m_array.resize(dim); 
+}
+
+FullVector::FullVector(const FullVector & v) : m_array(v.m_array)
+{
+}
+
+const FullVector & FullVector::operator=(const FullVector & v)
+{
+    nvCheck(dimension() == v.dimension());
+
+    m_array = v.m_array;
+
+    return *this;
+}
+
+
+void FullVector::fill(float f)
+{
+    const uint dim = dimension();
+    for (uint i = 0; i < dim; i++)
+    {
+        m_array[i] = f;
+    }
+}
+
+void FullVector::operator+= (const FullVector & v)
+{
+    nvDebugCheck(dimension() == v.dimension());
+
+    const uint dim = dimension();
+    for (uint i = 0; i < dim; i++)
+    {
+        m_array[i] += v.m_array[i];
+    }
+}
+
+void FullVector::operator-= (const FullVector & v)
+{
+    nvDebugCheck(dimension() == v.dimension());
+
+    const uint dim = dimension();
+    for (uint i = 0; i < dim; i++)
+    {
+        m_array[i] -= v.m_array[i];
+    }
+}
+
+void FullVector::operator*= (const FullVector & v)
+{
+    nvDebugCheck(dimension() == v.dimension());
+
+    const uint dim = dimension();
+    for (uint i = 0; i < dim; i++)
+    {
+        m_array[i] *= v.m_array[i];
+    }
+}
+
+void FullVector::operator+= (float f)
+{
+    const uint dim = dimension();
+    for (uint i = 0; i < dim; i++)
+    {
+        m_array[i] += f;
+    }
+}
+
+void FullVector::operator-= (float f)
+{
+    const uint dim = dimension();
+    for (uint i = 0; i < dim; i++)
+    {
+        m_array[i] -= f;
+    }
+}
+
+void FullVector::operator*= (float f)
+{
+    const uint dim = dimension();
+    for (uint i = 0; i < dim; i++)
+    {
+        m_array[i] *= f;
+    }
+}
+
+
+void nv::saxpy(float a, const FullVector & x, FullVector & y)
+{
+    nvDebugCheck(x.dimension() == y.dimension());
+
+    const uint dim = x.dimension();
+    for (uint i = 0; i < dim; i++)
+    {
+        y[i] += a * x[i];
+    }
+}
+
+void nv::copy(const FullVector & x, FullVector & y)
+{
+    nvDebugCheck(x.dimension() == y.dimension());
+
+    const uint dim = x.dimension();
+    for (uint i = 0; i < dim; i++)
+    {
+        y[i] = x[i];
+    }
+}
+
+void nv::scal(float a, FullVector & x)
+{
+    const uint dim = x.dimension();
+    for (uint i = 0; i < dim; i++)
+    {
+        x[i] *= a;
+    }
+}
+
+float nv::dot(const FullVector & x, const FullVector & y)
+{
+    nvDebugCheck(x.dimension() == y.dimension());
+
+    const uint dim = x.dimension();
+
+#if USE_KAHAN_SUM
+    KahanSum kahan;
+    for (uint i = 0; i < dim; i++)
+    {
+        kahan.add(x[i] * y[i]);
+    }
+    return kahan.sum();
+#else
+    float sum = 0;
+    for (uint i = 0; i < dim; i++)
+    {
+        sum += x[i] * y[i];
+    }
+    return sum;
+#endif
+}
+
+
+FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d)
+{
+    m_array.resize(d*d, 0.0f);
+}
+
+FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h)
+{
+    m_array.resize(w*h, 0.0f);
+}
+
+FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height)
+{
+    m_array = m.m_array;
+}
+
+const FullMatrix & FullMatrix::operator=(const FullMatrix & m)
+{
+    nvCheck(width() == m.width());
+    nvCheck(height() == m.height());
+
+    m_array = m.m_array;
+
+    return *this;
+}
+
+
+float FullMatrix::getCoefficient(uint x, uint y) const
+{
+    nvDebugCheck( x < width() );
+    nvDebugCheck( y < height() );
+
+    return m_array[y * width() + x];
+}
+
+void FullMatrix::setCoefficient(uint x, uint y, float f)
+{
+    nvDebugCheck( x < width() );
+    nvDebugCheck( y < height() );
+
+    m_array[y * width() + x] = f;
+}
+
+void FullMatrix::addCoefficient(uint x, uint y, float f)
+{
+    nvDebugCheck( x < width() );
+    nvDebugCheck( y < height() );
+
+    m_array[y * width() + x] += f;
+}
+
+void FullMatrix::mulCoefficient(uint x, uint y, float f)
+{
+    nvDebugCheck( x < width() );
+    nvDebugCheck( y < height() );
+
+    m_array[y * width() + x] *= f;
+}
+
+float FullMatrix::dotRow(uint y, const FullVector & v) const
+{
+    nvDebugCheck( v.dimension() == width() );
+    nvDebugCheck( y < height() );
+
+    float sum = 0;
+
+    const uint count = v.dimension();
+    for (uint i = 0; i < count; i++)
+    {
+        sum += m_array[y * count + i] * v[i];
+    }
+
+    return sum;
+}
+
+void FullMatrix::madRow(uint y, float alpha, FullVector & v) const
+{
+    nvDebugCheck( v.dimension() == width() );
+    nvDebugCheck( y < height() );
+
+    const uint count = v.dimension();
+    for (uint i = 0; i < count; i++)
+    {
+        v[i] += m_array[y * count + i];
+    }
+}
+
+
+// y = M * x
+void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y)
+{
+    mult(NoTransposed, M, x, y);
+}
+
+void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y)
+{
+    const uint w = M.width();
+    const uint h = M.height();
+
+    if (TM == Transposed)
+    {
+        nvDebugCheck( h == x.dimension() );
+        nvDebugCheck( w == y.dimension() );
+
+        y.fill(0.0f);
+
+        for (uint i = 0; i < h; i++)
+        {
+            M.madRow(i, x[i], y);
+        }
+    }
+    else
+    {
+        nvDebugCheck( w == x.dimension() );
+        nvDebugCheck( h == y.dimension() );
+
+        for (uint i = 0; i < h; i++)
+        {
+            y[i] = M.dotRow(i, x);
+        }
+    }
+}
+
+// y = alpha*A*x + beta*y
+void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
+{
+    sgemv(alpha, NoTransposed, A, x, beta, y);
+}
+
+void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
+{
+    const uint w = A.width();
+    const uint h = A.height();
+
+    if (TA == Transposed)
+    {
+        nvDebugCheck( h == x.dimension() );
+        nvDebugCheck( w == y.dimension() );
+
+        for (uint i = 0; i < h; i++)
+        {
+            A.madRow(i, alpha * x[i], y);
+        }
+    }
+    else
+    {
+        nvDebugCheck( w == x.dimension() );
+        nvDebugCheck( h == y.dimension() );
+
+        for (uint i = 0; i < h; i++)
+        {
+            y[i] = alpha * A.dotRow(i, x) + beta * y[i];
+        }
+    }
+}
+
+
+// Multiply a row of A by a column of B.
+static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B)
+{
+    const uint w = (TA == NoTransposed) ? A.width() : A.height();
+    nvDebugCheck(w == ((TB == NoTransposed) ? B.height() : A.width()));
+
+    float sum = 0.0f;
+
+    for (uint k = 0; k < w; k++)
+    {
+        const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop?
+        const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i);
+        sum += a * b;
+    }
+
+    return sum;
+}
+
+
+// C = A * B
+void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C)
+{
+    mult(NoTransposed, A, NoTransposed, B, C);
+}
+
+void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C)
+{
+    sgemm(1.0f, TA, A, TB, B, 0.0f, C);
+}
+
+// C = alpha*A*B + beta*C
+void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C)
+{
+    sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
+}
+
+void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C)
+{
+    const uint w = C.width();
+    const uint h = C.height();
+
+    uint aw = (TA == NoTransposed) ? A.width() : A.height();
+    uint ah = (TA == NoTransposed) ? A.height() : A.width();
+    uint bw = (TB == NoTransposed) ? B.width() : B.height();
+    uint bh = (TB == NoTransposed) ? B.height() : B.width();
+
+    nvDebugCheck(aw == bh);
+    nvDebugCheck(bw == ah);
+    nvDebugCheck(w == bw);
+    nvDebugCheck(h == ah);
+
+    for (uint y = 0; y < h; y++)
+    {
+        for (uint x = 0; x < w; x++)
+        {
+            float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y);
+            C.setCoefficient(x, y, c);
+        }
+    }
+}
+
+
+
+
+
+/// Ctor. Init the size of the sparse matrix.
+SparseMatrix::SparseMatrix(uint d) : m_width(d)
+{
+    m_array.resize(d);
+}
+
+/// Ctor. Init the size of the sparse matrix.
+SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w)
+{
+    m_array.resize(h);
+}
+
+SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width)
+{
+    m_array = m.m_array;
+}
+
+const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m)
+{
+    nvCheck(width() == m.width());
+    nvCheck(height() == m.height());
+
+    m_array = m.m_array;
+
+    return *this;
+}
+
+
+// x is column, y is row
+float SparseMatrix::getCoefficient(uint x, uint y) const
+{
+    nvDebugCheck( x < width() );
+    nvDebugCheck( y < height() );
+
+    const uint count = m_array[y].count();
+    for (uint i = 0; i < count; i++)
+    {
+        if (m_array[y][i].x == x) return m_array[y][i].v;
+    }
+
+    return 0.0f;
+}
+
+void SparseMatrix::setCoefficient(uint x, uint y, float f)
+{
+    nvDebugCheck( x < width() );
+    nvDebugCheck( y < height() );
+
+    const uint count = m_array[y].count();
+    for (uint i = 0; i < count; i++)
+    {
+        if (m_array[y][i].x == x) 
+        {
+            m_array[y][i].v = f;
+            return;
+        }
+    }
+
+    if (f != 0.0f)
+    {
+        Coefficient c = { x, f };
+        m_array[y].append( c );
+    }
+}
+
+void SparseMatrix::addCoefficient(uint x, uint y, float f)
+{
+    nvDebugCheck( x < width() );
+    nvDebugCheck( y < height() );
+
+    if (f != 0.0f)
+    {
+        const uint count = m_array[y].count();
+        for (uint i = 0; i < count; i++)
+        {
+            if (m_array[y][i].x == x) 
+            {
+                m_array[y][i].v += f;
+                return;
+            }
+        }
+
+        Coefficient c = { x, f };
+        m_array[y].append( c );
+    }
+}
+
+void SparseMatrix::mulCoefficient(uint x, uint y, float f)
+{
+    nvDebugCheck( x < width() );
+    nvDebugCheck( y < height() );
+
+    const uint count = m_array[y].count();
+    for (uint i = 0; i < count; i++)
+    {
+        if (m_array[y][i].x == x) 
+        {
+            m_array[y][i].v *= f;
+            return;
+        }
+    }
+
+    if (f != 0.0f)
+    {
+        Coefficient c = { x, f };
+        m_array[y].append( c );
+    }
+}
+
+
+float SparseMatrix::sumRow(uint y) const
+{
+    nvDebugCheck( y < height() );
+
+    const uint count = m_array[y].count();
+
+#if USE_KAHAN_SUM
+    KahanSum kahan;
+    for (uint i = 0; i < count; i++)
+    {
+        kahan.add(m_array[y][i].v);
+    }
+    return kahan.sum();
+#else
+    float sum = 0;
+    for (uint i = 0; i < count; i++)
+    {
+        sum += m_array[y][i].v;
+    }
+    return sum;
+#endif
+}
+
+float SparseMatrix::dotRow(uint y, const FullVector & v) const
+{
+    nvDebugCheck( y < height() );
+
+    const uint count = m_array[y].count();
+
+#if USE_KAHAN_SUM
+    KahanSum kahan;
+    for (uint i = 0; i < count; i++)
+    {
+        kahan.add(m_array[y][i].v * v[m_array[y][i].x]);
+    }
+    return kahan.sum();
+#else
+    float sum = 0;
+    for (uint i = 0; i < count; i++)
+    {
+        sum += m_array[y][i].v * v[m_array[y][i].x];
+    }
+    return sum;
+#endif
+}
+
+void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const
+{
+    nvDebugCheck(y < height());
+
+    const uint count = m_array[y].count();
+    for (uint i = 0; i < count; i++)
+    {
+        v[m_array[y][i].x] += alpha * m_array[y][i].v;
+    }
+}
+
+
+void SparseMatrix::clearRow(uint y)
+{
+    nvDebugCheck( y < height() );
+
+    m_array[y].clear();
+}
+
+void SparseMatrix::scaleRow(uint y, float f)
+{
+    nvDebugCheck( y < height() );
+
+    const uint count = m_array[y].count();
+    for (uint i = 0; i < count; i++)
+    {
+        m_array[y][i].v *= f;
+    }
+}
+
+void SparseMatrix::normalizeRow(uint y)
+{
+    nvDebugCheck( y < height() );
+
+    float norm = 0.0f;
+
+    const uint count = m_array[y].count();
+    for (uint i = 0; i < count; i++)
+    {
+        float f = m_array[y][i].v;
+        norm += f * f;
+    }
+
+    scaleRow(y, 1.0f / sqrtf(norm));
+}
+
+
+void SparseMatrix::clearColumn(uint x)
+{
+    nvDebugCheck(x < width());
+
+    for (uint y = 0; y < height(); y++)
+    {
+        const uint count = m_array[y].count();
+        for (uint e = 0; e < count; e++)
+        {
+            if (m_array[y][e].x == x)
+            {
+                m_array[y][e].v = 0.0f;
+                break;
+            }
+        }
+    }
+}
+
+void SparseMatrix::scaleColumn(uint x, float f)
+{
+    nvDebugCheck(x < width());
+
+    for (uint y = 0; y < height(); y++)
+    {
+        const uint count = m_array[y].count();
+        for (uint e = 0; e < count; e++)
+        {
+            if (m_array[y][e].x == x)
+            {
+                m_array[y][e].v *= f;
+                break;
+            }
+        }
+    }
+}
+
+const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const
+{
+    return m_array[y];
+}
+
+
+bool SparseMatrix::isSymmetric() const
+{
+    for (uint y = 0; y < height(); y++)
+    {
+        const uint count = m_array[y].count();
+        for (uint e = 0; e < count; e++)
+        {
+            const uint x = m_array[y][e].x;
+            if (x > y) {
+                float v = m_array[y][e].v;
+
+                if (!equal(getCoefficient(y, x), v)) {  // @@ epsilon
+                    return false;
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+
+// y = M * x
+void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y)
+{
+    mult(NoTransposed, M, x, y);
+}
+
+void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y)
+{
+    const uint w = M.width();
+    const uint h = M.height();
+
+    if (TM == Transposed)
+    {
+        nvDebugCheck( h == x.dimension() );
+        nvDebugCheck( w == y.dimension() );
+
+        y.fill(0.0f);
+
+        for (uint i = 0; i < h; i++)
+        {
+            M.madRow(i, x[i], y);
+        }
+    }
+    else
+    {
+        nvDebugCheck( w == x.dimension() );
+        nvDebugCheck( h == y.dimension() );
+
+        for (uint i = 0; i < h; i++)
+        {
+            y[i] = M.dotRow(i, x);
+        }
+    }
+}
+
+// y = alpha*A*x + beta*y
+void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
+{
+    sgemv(alpha, NoTransposed, A, x, beta, y);
+}
+
+void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
+{
+    const uint w = A.width();
+    const uint h = A.height();
+
+    if (TA == Transposed)
+    {
+        nvDebugCheck( h == x.dimension() );
+        nvDebugCheck( w == y.dimension() );
+
+        for (uint i = 0; i < h; i++)
+        {
+            A.madRow(i, alpha * x[i], y);
+        }
+    }
+    else
+    {
+        nvDebugCheck( w == x.dimension() );
+        nvDebugCheck( h == y.dimension() );
+
+        for (uint i = 0; i < h; i++)
+        {
+            y[i] = alpha * A.dotRow(i, x) + beta * y[i];
+        }
+    }
+}
+
+
+// dot y-row of A by x-column of B
+static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
+{
+    const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
+
+    const uint count = row.count();
+
+#if USE_KAHAN_SUM
+    KahanSum kahan;
+    for (uint i = 0; i < count; i++)
+    {
+        const SparseMatrix::Coefficient & c = row[i];
+        kahan.add(c.v * B.getCoefficient(x, c.x));
+    }
+    return kahan.sum();
+#else
+    float sum = 0.0f;
+    for (uint i = 0; i < count; i++)
+    {
+        const SparseMatrix::Coefficient & c = row[i];
+        sum += c.v * B.getCoefficient(x, c.x);
+    }
+    return sum;
+#endif
+}
+
+// dot y-row of A by x-row of B
+static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
+{
+    const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
+
+    const uint count = row.count();
+
+#if USE_KAHAN_SUM
+    KahanSum kahan;
+    for (uint i = 0; i < count; i++)
+    {
+        const SparseMatrix::Coefficient & c = row[i];
+        kahan.add(c.v * B.getCoefficient(c.x, x));
+    }
+    return kahan.sum();
+#else
+    float sum = 0.0f;
+    for (uint i = 0; i < count; i++)
+    {
+        const SparseMatrix::Coefficient & c = row[i];
+        sum += c.v * B.getCoefficient(c.x, x);
+    }
+    return sum;
+#endif
+}
+
+// dot y-column of A by x-column of B
+static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
+{
+    nvDebugCheck(A.height() == B.height());
+
+    const uint h = A.height();
+
+#if USE_KAHAN_SUM
+    KahanSum kahan;
+    for (uint i = 0; i < h; i++)
+    {
+        kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i));
+    }
+    return kahan.sum();
+#else
+    float sum = 0.0f;
+    for (uint i = 0; i < h; i++)
+    {
+        sum += A.getCoefficient(y, i) * B.getCoefficient(x, i);
+    }
+    return sum;
+#endif
+}
+
+
+void nv::transpose(const SparseMatrix & A, SparseMatrix & B)
+{
+    nvDebugCheck(A.width() == B.height());
+    nvDebugCheck(B.width() == A.height());
+
+    const uint w = A.width();
+    for (uint x = 0; x < w; x++)
+    {
+        B.clearRow(x);
+    }
+
+    const uint h = A.height();
+    for (uint y = 0; y < h; y++)
+    {
+        const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
+
+        const uint count = row.count();
+        for (uint i = 0; i < count; i++)
+        {
+            const SparseMatrix::Coefficient & c = row[i];
+            nvDebugCheck(c.x < w);
+
+            B.setCoefficient(y, c.x, c.v);
+        }
+    }
+}
+
+// C = A * B
+void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C)
+{
+    mult(NoTransposed, A, NoTransposed, B, C);
+}
+
+void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C)
+{
+    sgemm(1.0f, TA, A, TB, B, 0.0f, C);
+}
+
+// C = alpha*A*B + beta*C
+void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C)
+{
+    sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
+}
+
+void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C)
+{
+    const uint w = C.width();
+    const uint h = C.height();
+
+    uint aw = (TA == NoTransposed) ? A.width() : A.height();
+    uint ah = (TA == NoTransposed) ? A.height() : A.width();
+    uint bw = (TB == NoTransposed) ? B.width() : B.height();
+    uint bh = (TB == NoTransposed) ? B.height() : B.width();
+
+    nvDebugCheck(aw == bh);
+    nvDebugCheck(bw == ah);
+    nvDebugCheck(w == bw);
+    nvDebugCheck(h == ah);
+
+
+    for (uint y = 0; y < h; y++)
+    {
+        for (uint x = 0; x < w; x++)
+        {
+            float c = beta * C.getCoefficient(x, y);
+
+            if (TA == NoTransposed && TB == NoTransposed)
+            {
+                // dot y-row of A by x-column of B.
+                c += alpha * dotRowColumn(y, A, x, B);
+            }
+            else if (TA == Transposed && TB == Transposed)
+            {
+                // dot y-column of A by x-row of B.
+                c += alpha * dotRowColumn(x, B, y, A);
+            }
+            else if (TA == Transposed && TB == NoTransposed)
+            {
+                // dot y-column of A by x-column of B.
+                c += alpha * dotColumnColumn(y, A, x, B);
+            }
+            else if (TA == NoTransposed && TB == Transposed)
+            {
+                // dot y-row of A by x-row of B.
+                c += alpha * dotRowRow(y, A, x, B);
+            }
+
+            C.setCoefficient(x, y, c);
+        }
+    }
+}
+
+// C = At * A
+void nv::sqm(const SparseMatrix & A, SparseMatrix & C)
+{
+    // This is quite expensive...
+    mult(Transposed, A, NoTransposed, A, C);
+}
diff --git a/thirdparty/thekla_atlas/nvmath/Sparse.h b/thirdparty/thekla_atlas/nvmath/Sparse.h
new file mode 100644
index 0000000000..6b03ed51f3
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Sparse.h
@@ -0,0 +1,204 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_SPARSE_H
+#define NV_MATH_SPARSE_H
+
+#include "nvmath.h"
+#include "nvcore/Array.h"
+
+
+// Full and sparse vector and matrix classes. BLAS subset.
+
+namespace nv
+{
+    class FullVector;
+    class FullMatrix;
+    class SparseMatrix;
+
+
+    /// Fixed size vector class.
+    class FullVector
+    {
+    public:
+
+        FullVector(uint dim);
+        FullVector(const FullVector & v);
+
+        const FullVector & operator=(const FullVector & v);
+
+        uint dimension() const { return m_array.count(); }
+
+        const float & operator[]( uint index ) const { return m_array[index]; }
+        float & operator[] ( uint index ) { return m_array[index]; }
+
+        void fill(float f);
+
+        void operator+= (const FullVector & v);
+        void operator-= (const FullVector & v);
+        void operator*= (const FullVector & v);
+
+        void operator+= (float f);
+        void operator-= (float f);
+        void operator*= (float f);
+
+
+    private:
+
+        Array<float> m_array;
+
+    };
+
+    // Pseudo-BLAS interface.
+    NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y); // y = a * x + y
+    NVMATH_API void copy(const FullVector & x, FullVector & y);
+    NVMATH_API void scal(float a, FullVector & x);
+    NVMATH_API float dot(const FullVector & x, const FullVector & y);
+
+
+    enum Transpose
+    {
+        NoTransposed = 0,
+        Transposed = 1
+    };
+
+    /// Full matrix class.
+    class FullMatrix
+    {
+    public:
+
+        FullMatrix(uint d);
+        FullMatrix(uint w, uint h);
+        FullMatrix(const FullMatrix & m);
+
+        const FullMatrix & operator=(const FullMatrix & m);
+
+        uint width() const { return m_width; }
+        uint height() const { return m_height; }
+        bool isSquare() const { return m_width == m_height; }
+
+        float getCoefficient(uint x, uint y) const;
+
+        void setCoefficient(uint x, uint y, float f);
+        void addCoefficient(uint x, uint y, float f);
+        void mulCoefficient(uint x, uint y, float f);
+
+        float dotRow(uint y, const FullVector & v) const;
+        void madRow(uint y, float alpha, FullVector & v) const;
+
+    protected:
+
+        bool isValid() const {
+            return m_array.size() == (m_width * m_height);
+        }
+
+    private:
+
+        const uint m_width;
+        const uint m_height;
+        Array<float> m_array;
+
+    };
+
+    NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y);
+    NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y);
+
+    // y = alpha*A*x + beta*y
+    NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
+    NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
+
+    NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C);
+    NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C);
+
+    // C = alpha*A*B + beta*C
+    NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C);
+    NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C);
+
+
+    /**
+    * Sparse matrix class. The matrix is assumed to be sparse and to have
+    * very few non-zero elements, for this reason it's stored in indexed 
+    * format. To multiply column vectors efficiently, the matrix stores 
+    * the elements in indexed-column order, there is a list of indexed 
+    * elements for each row of the matrix. As with the FullVector the 
+    * dimension of the matrix is constant.
+    **/
+    class SparseMatrix
+    {
+        friend class FullMatrix;
+    public:
+
+        // An element of the sparse array.
+        struct Coefficient {
+            uint x;  // column
+            float v; // value
+        };
+
+
+    public:
+
+        SparseMatrix(uint d);
+        SparseMatrix(uint w, uint h);
+        SparseMatrix(const SparseMatrix & m);
+
+        const SparseMatrix & operator=(const SparseMatrix & m);
+
+
+        uint width() const { return m_width; }
+        uint height() const { return m_array.count(); }
+        bool isSquare() const { return width() == height(); }
+
+        float getCoefficient(uint x, uint y) const; // x is column, y is row
+
+        void setCoefficient(uint x, uint y, float f);
+        void addCoefficient(uint x, uint y, float f);
+        void mulCoefficient(uint x, uint y, float f);
+
+        float sumRow(uint y) const;
+        float dotRow(uint y, const FullVector & v) const;
+        void madRow(uint y, float alpha, FullVector & v) const;
+
+        void clearRow(uint y);
+        void scaleRow(uint y, float f);
+        void normalizeRow(uint y);
+
+        void clearColumn(uint x);
+        void scaleColumn(uint x, float f);
+
+        const Array<Coefficient> & getRow(uint y) const;
+
+        bool isSymmetric() const;
+
+    private:
+
+        /// Number of columns.
+        const uint m_width;
+
+        /// Array of matrix elements.
+        Array< Array<Coefficient> > m_array;
+
+    };
+
+    NVMATH_API void transpose(const SparseMatrix & A, SparseMatrix & B);
+
+    NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y);
+    NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y);
+
+    // y = alpha*A*x + beta*y
+    NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
+    NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
+
+    NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C);
+    NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C);
+
+    // C = alpha*A*B + beta*C
+    NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C);
+    NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C);
+
+    // C = At * A
+    NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C);
+
+} // nv namespace
+
+
+#endif // NV_MATH_SPARSE_H
diff --git a/thirdparty/thekla_atlas/nvmath/Sphere.cpp b/thirdparty/thekla_atlas/nvmath/Sphere.cpp
new file mode 100644
index 0000000000..e0c1ad652c
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Sphere.cpp
@@ -0,0 +1,431 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "Sphere.h"
+#include "Vector.inl"
+#include "Box.inl"
+
+#include <float.h> // FLT_MAX
+
+using namespace nv;
+
+const float radiusEpsilon = 1e-4f;
+
+Sphere::Sphere(Vector3::Arg p0, Vector3::Arg p1)
+{
+    if (p0 == p1) *this = Sphere(p0);
+    else {
+        center = (p0 + p1) * 0.5f;
+        radius = length(p0 - center) + radiusEpsilon;
+
+        float d0 = length(p0 - center);
+        float d1 = length(p1 - center);
+        nvDebugCheck(equal(d0, radius - radiusEpsilon));
+        nvDebugCheck(equal(d1, radius - radiusEpsilon));
+    }
+}
+
+Sphere::Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2)
+{
+    if (p0 == p1 || p0 == p2) *this = Sphere(p1, p2);
+    else if (p1 == p2) *this = Sphere(p0, p2);
+    else {
+        Vector3 a = p1 - p0;
+        Vector3 b = p2 - p0;
+        Vector3 c = cross(a, b);
+
+        float denominator = 2.0f * lengthSquared(c);
+        
+        if (!isZero(denominator)) {
+	        Vector3 d = (lengthSquared(b) * cross(c, a) + lengthSquared(a) * cross(b, c)) / denominator;
+
+	        center = p0 + d;
+	        radius = length(d) + radiusEpsilon;
+
+            float d0 = length(p0 - center);
+            float d1 = length(p1 - center);
+            float d2 = length(p2 - center);
+            nvDebugCheck(equal(d0, radius - radiusEpsilon));
+            nvDebugCheck(equal(d1, radius - radiusEpsilon));
+            nvDebugCheck(equal(d2, radius - radiusEpsilon));
+        }
+        else {
+            // @@ This is a specialization of the code below, but really, the only thing we need to do here is to find the two most distant points.
+            // Compute all possible spheres, invalidate those that do not contain the four points, keep the smallest.
+            Sphere s0(p1, p2);
+            float d0 = distanceSquared(s0, p0);
+            if (d0 > 0) s0.radius = NV_FLOAT_MAX;
+
+            Sphere s1(p0, p2);
+            float d1 = distanceSquared(s1, p1);
+            if (d1 > 0) s1.radius = NV_FLOAT_MAX;
+
+            Sphere s2(p0, p1);
+            float d2 = distanceSquared(s2, p2);
+            if (d2 > 0) s1.radius = NV_FLOAT_MAX;
+
+            if (s0.radius < s1.radius && s0.radius < s2.radius) {
+                center = s0.center;
+                radius = s0.radius;
+            }
+            else if (s1.radius < s2.radius) {
+                center = s1.center;
+                radius = s1.radius;
+            }
+            else {
+                center = s2.center;
+                radius = s2.radius;
+            }
+        }
+    }
+}
+
+Sphere::Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2, Vector3::Arg p3)
+{
+    if (p0 == p1 || p0 == p2 || p0 == p3) *this = Sphere(p1, p2, p3);
+    else if (p1 == p2 || p1 == p3) *this = Sphere(p0, p2, p3);
+    else if (p2 == p3) *this = Sphere(p0, p1, p2);
+    else {
+        // @@ This only works if the points are not coplanar!
+	    Vector3 a = p1 - p0;
+	    Vector3 b = p2 - p0;
+	    Vector3 c = p3 - p0;
+
+        float denominator = 2.0f * dot(c, cross(a, b)); // triple product.
+
+        if (!isZero(denominator)) {
+	        Vector3 d = (lengthSquared(c) * cross(a, b) + lengthSquared(b) * cross(c, a) + lengthSquared(a) * cross(b, c)) / denominator;
+
+	        center = p0 + d;
+            radius = length(d) + radiusEpsilon;
+
+            float d0 = length(p0 - center);
+            float d1 = length(p1 - center);
+            float d2 = length(p2 - center);
+            float d3 = length(p3 - center);
+            nvDebugCheck(equal(d0, radius - radiusEpsilon));
+            nvDebugCheck(equal(d1, radius - radiusEpsilon));
+            nvDebugCheck(equal(d2, radius - radiusEpsilon));
+            nvDebugCheck(equal(d3, radius - radiusEpsilon));
+        }
+        else {
+            // Compute all possible spheres, invalidate those that do not contain the four points, keep the smallest.
+            Sphere s0(p1, p2, p3);
+            float d0 = distanceSquared(s0, p0);
+            if (d0 > 0) s0.radius = NV_FLOAT_MAX;
+
+            Sphere s1(p0, p2, p3);
+            float d1 = distanceSquared(s1, p1);
+            if (d1 > 0) s1.radius = NV_FLOAT_MAX;
+
+            Sphere s2(p0, p1, p3);
+            float d2 = distanceSquared(s2, p2);
+            if (d2 > 0) s2.radius = NV_FLOAT_MAX;
+
+            Sphere s3(p0, p1, p2);
+            float d3 = distanceSquared(s3, p3);
+            if (d3 > 0) s2.radius = NV_FLOAT_MAX;
+
+            if (s0.radius < s1.radius && s0.radius < s2.radius && s0.radius < s3.radius) {
+                center = s0.center;
+                radius = s0.radius;
+            }
+            else if (s1.radius < s2.radius && s1.radius < s3.radius) {
+                center = s1.center;
+                radius = s1.radius;
+            }
+            else if (s1.radius < s3.radius) {
+                center = s2.center;
+                radius = s2.radius;
+            }
+            else {
+                center = s3.center;
+                radius = s3.radius;
+            }
+        }
+    }
+}
+
+
+float nv::distanceSquared(const Sphere & sphere, const Vector3 & point)
+{
+    return lengthSquared(sphere.center - point) - square(sphere.radius);
+}
+
+
+
+// Implementation of "MiniBall" based on:
+// http://www.flipcode.com/archives/Smallest_Enclosing_Spheres.shtml
+
+static Sphere recurseMini(const Vector3 *P[], uint p, uint b = 0)
+{
+	Sphere MB;
+
+	switch(b)
+	{
+	case 0:
+		MB = Sphere(*P[0]);
+		break;
+	case 1:
+		MB = Sphere(*P[-1]);
+		break;
+	case 2:
+		MB = Sphere(*P[-1], *P[-2]);
+		break;
+	case 3:
+		MB = Sphere(*P[-1], *P[-2], *P[-3]);
+		break;
+	case 4:
+		MB = Sphere(*P[-1], *P[-2], *P[-3], *P[-4]);
+		return MB;
+	}
+
+	for (uint i = 0; i < p; i++)
+    {
+        if (distanceSquared(MB, *P[i]) > 0)   // Signed square distance to sphere
+		{
+			for (uint j = i; j > 0; j--)
+			{
+                swap(P[j], P[j-1]);
+			}
+
+			MB = recurseMini(P + 1, i, b + 1);
+		}
+    }
+
+	return MB;
+}
+
+static bool allInside(const Sphere & sphere, const Vector3 * pointArray, const uint pointCount) {
+    for (uint i = 0; i < pointCount; i++) {
+        if (distanceSquared(sphere, pointArray[i]) >= NV_EPSILON) {
+            return false;
+        }
+    }
+    return true;
+}
+
+
+Sphere nv::miniBall(const Vector3 * pointArray, const uint pointCount)
+{
+    nvDebugCheck(pointArray != NULL);
+    nvDebugCheck(pointCount > 0);
+
+	const Vector3 **L = new const Vector3*[pointCount];
+
+    for (uint i = 0; i < pointCount; i++) {
+		L[i] = &pointArray[i];
+    }
+
+	Sphere sphere = recurseMini(L, pointCount);
+
+	delete [] L;
+
+    nvDebugCheck(allInside(sphere, pointArray, pointCount));
+
+	return sphere;
+}
+
+
+// Approximate bounding sphere, based on "An Efficient Bounding Sphere" by Jack Ritter, from "Graphics Gems"
+Sphere nv::approximateSphere_Ritter(const Vector3 * pointArray, const uint pointCount)
+{
+    nvDebugCheck(pointArray != NULL);
+    nvDebugCheck(pointCount > 0);
+
+    Vector3 xmin, xmax, ymin, ymax, zmin, zmax;
+
+    xmin = xmax = ymin = ymax = zmin = zmax = pointArray[0];
+
+    // FIRST PASS: find 6 minima/maxima points
+    xmin.x = ymin.y = zmin.z = FLT_MAX;
+    xmax.x = ymax.y = zmax.z = -FLT_MAX;
+
+    for (uint i = 0; i < pointCount; i++)
+	{
+        const Vector3 & p = pointArray[i];
+        if (p.x < xmin.x) xmin = p;
+	    if (p.x > xmax.x) xmax = p;
+	    if (p.y < ymin.y) ymin = p;
+	    if (p.y > ymax.y) ymax = p;
+	    if (p.z < zmin.z) zmin = p;
+	    if (p.z > zmax.z) zmax = p;
+	}
+
+    float xspan = lengthSquared(xmax - xmin);
+    float yspan = lengthSquared(ymax - ymin);
+    float zspan = lengthSquared(zmax - zmin);
+
+    // Set points dia1 & dia2 to the maximally separated pair.
+    Vector3 dia1 = xmin; 
+    Vector3 dia2 = xmax;
+    float maxspan = xspan;
+    if (yspan > maxspan) {
+	    maxspan = yspan;
+	    dia1 = ymin;
+        dia2 = ymax;
+	}
+    if (zspan > maxspan) {
+	    dia1 = zmin;
+        dia2 = zmax;
+	}
+
+    // |dia1-dia2| is a diameter of initial sphere
+    
+    // calc initial center
+    Sphere sphere;
+    sphere.center = (dia1 + dia2) / 2.0f;
+
+    // calculate initial radius**2 and radius
+    float rad_sq = lengthSquared(dia2 - sphere.center);
+    sphere.radius = sqrtf(rad_sq);
+
+
+    // SECOND PASS: increment current sphere
+    for (uint i = 0; i < pointCount; i++)
+	{
+        const Vector3 & p = pointArray[i];
+
+        float old_to_p_sq = lengthSquared(p - sphere.center);
+
+	    if (old_to_p_sq > rad_sq) 	// do r**2 test first
+		{ 	
+            // this point is outside of current sphere
+		    float old_to_p = sqrtf(old_to_p_sq);
+
+		    // calc radius of new sphere
+            sphere.radius = (sphere.radius + old_to_p) / 2.0f;
+		    rad_sq = sphere.radius * sphere.radius; 	// for next r**2 compare
+    		
+            float old_to_new = old_to_p - sphere.radius;
+
+		    // calc center of new sphere
+            sphere.center = (sphere.radius * sphere.center + old_to_new * p) / old_to_p;
+		}	
+	}
+
+    nvDebugCheck(allInside(sphere, pointArray, pointCount));
+
+    return sphere;
+}
+
+
+static float computeSphereRadius(const Vector3 & center, const Vector3 * pointArray, const uint pointCount) {
+
+    float maxRadius2 = 0;
+
+    for (uint i = 0; i < pointCount; i++)
+	{
+        const Vector3 & p = pointArray[i];
+
+        float r2 = lengthSquared(center - p);
+
+        if (r2 > maxRadius2) {
+            maxRadius2 = r2;
+        }
+    }
+
+    return sqrtf(maxRadius2) + radiusEpsilon;
+}
+
+
+Sphere nv::approximateSphere_AABB(const Vector3 * pointArray, const uint pointCount)
+{
+    nvDebugCheck(pointArray != NULL);
+    nvDebugCheck(pointCount > 0);
+
+    Box box;
+    box.clearBounds();
+
+    for (uint i = 0; i < pointCount; i++) {
+        box.addPointToBounds(pointArray[i]);
+    }
+
+    Sphere sphere;
+    sphere.center = box.center();
+    sphere.radius = computeSphereRadius(sphere.center, pointArray, pointCount);
+
+    nvDebugCheck(allInside(sphere, pointArray, pointCount));
+
+    return sphere;
+}
+
+
+static void computeExtremalPoints(const Vector3 & dir, const Vector3 * pointArray, uint pointCount, Vector3 * minPoint, Vector3 * maxPoint) {
+    nvDebugCheck(pointCount > 0);
+
+    uint mini = 0;
+    uint maxi = 0;
+    float minDist = FLT_MAX;
+    float maxDist = -FLT_MAX;
+
+    for (uint i = 0; i < pointCount; i++) {
+        float d = dot(dir, pointArray[i]);
+
+        if (d < minDist) {
+            minDist = d;
+            mini = i;
+        }
+        if (d > maxDist) {
+            maxDist = d;
+            maxi = i;
+        }
+    }
+    nvDebugCheck(minDist != FLT_MAX);
+    nvDebugCheck(maxDist != -FLT_MAX);
+
+    *minPoint = pointArray[mini];
+    *maxPoint = pointArray[maxi];
+}
+
+// EPOS algorithm based on:
+// http://www.ep.liu.se/ecp/034/009/ecp083409.pdf
+Sphere nv::approximateSphere_EPOS6(const Vector3 * pointArray, uint pointCount)
+{
+    nvDebugCheck(pointArray != NULL);
+    nvDebugCheck(pointCount > 0);
+
+    Vector3 extremalPoints[6];
+
+    // Compute 6 extremal points.
+    computeExtremalPoints(Vector3(1, 0, 0), pointArray, pointCount, extremalPoints+0, extremalPoints+1);
+    computeExtremalPoints(Vector3(0, 1, 0), pointArray, pointCount, extremalPoints+2, extremalPoints+3);
+    computeExtremalPoints(Vector3(0, 0, 1), pointArray, pointCount, extremalPoints+4, extremalPoints+5);
+
+    Sphere sphere = miniBall(extremalPoints, 6);
+    sphere.radius = computeSphereRadius(sphere.center, pointArray, pointCount);
+
+    nvDebugCheck(allInside(sphere, pointArray, pointCount));
+
+    return sphere;
+}
+
+Sphere nv::approximateSphere_EPOS14(const Vector3 * pointArray, uint pointCount)
+{
+    nvDebugCheck(pointArray != NULL);
+    nvDebugCheck(pointCount > 0);
+
+    Vector3 extremalPoints[14];
+
+    // Compute 14 extremal points.
+    computeExtremalPoints(Vector3(1, 0, 0), pointArray, pointCount, extremalPoints+0, extremalPoints+1);
+    computeExtremalPoints(Vector3(0, 1, 0), pointArray, pointCount, extremalPoints+2, extremalPoints+3);
+    computeExtremalPoints(Vector3(0, 0, 1), pointArray, pointCount, extremalPoints+4, extremalPoints+5);
+
+    float d = sqrtf(1.0f/3.0f);
+
+    computeExtremalPoints(Vector3(d, d, d), pointArray, pointCount, extremalPoints+6, extremalPoints+7);
+    computeExtremalPoints(Vector3(-d, d, d), pointArray, pointCount, extremalPoints+8, extremalPoints+9);
+    computeExtremalPoints(Vector3(-d, -d, d), pointArray, pointCount, extremalPoints+10, extremalPoints+11);
+    computeExtremalPoints(Vector3(d, -d, d), pointArray, pointCount, extremalPoints+12, extremalPoints+13);
+
+
+    Sphere sphere = miniBall(extremalPoints, 14);
+    sphere.radius = computeSphereRadius(sphere.center, pointArray, pointCount);
+
+    nvDebugCheck(allInside(sphere, pointArray, pointCount));
+
+    return sphere;
+}
+
+
+
diff --git a/thirdparty/thekla_atlas/nvmath/Sphere.h b/thirdparty/thekla_atlas/nvmath/Sphere.h
new file mode 100644
index 0000000000..300731af44
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Sphere.h
@@ -0,0 +1,43 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MATH_SPHERE_H
+#define NV_MATH_SPHERE_H
+
+#include "Vector.h"
+
+namespace nv
+{
+    
+    class Sphere
+    {
+    public:
+        Sphere() {}
+        Sphere(Vector3::Arg center, float radius) : center(center), radius(radius) {}
+
+        Sphere(Vector3::Arg center) : center(center), radius(0.0f) {}
+        Sphere(Vector3::Arg p0, Vector3::Arg p1);
+        Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2);
+        Sphere(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2, Vector3::Arg p3);
+
+        Vector3 center;
+        float radius;
+    };
+
+    // Returns negative values if point is inside.
+    float distanceSquared(const Sphere & sphere, const Vector3 &point);
+
+
+    // Welz's algorithm. Fairly slow, recursive implementation uses large stack.
+    Sphere miniBall(const Vector3 * pointArray, uint pointCount);
+
+    Sphere approximateSphere_Ritter(const Vector3 * pointArray, uint pointCount);
+    Sphere approximateSphere_AABB(const Vector3 * pointArray, uint pointCount);
+    Sphere approximateSphere_EPOS6(const Vector3 * pointArray, uint pointCount);
+    Sphere approximateSphere_EPOS14(const Vector3 * pointArray, uint pointCount);
+
+
+} // nv namespace
+
+
+#endif // NV_MATH_SPHERE_H
diff --git a/thirdparty/thekla_atlas/nvmath/TypeSerialization.cpp b/thirdparty/thekla_atlas/nvmath/TypeSerialization.cpp
new file mode 100644
index 0000000000..72fa678f47
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/TypeSerialization.cpp
@@ -0,0 +1,54 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "TypeSerialization.h"
+
+#include "nvcore/Stream.h"
+
+#include "nvmath/Vector.h"
+#include "nvmath/Matrix.h"
+#include "nvmath/Quaternion.h"
+#include "nvmath/Basis.h"
+#include "nvmath/Box.h"
+#include "nvmath/Plane.inl"
+
+using namespace nv;
+
+Stream & nv::operator<< (Stream & s, Vector2 & v)
+{
+    return s << v.x << v.y;
+}
+
+Stream & nv::operator<< (Stream & s, Vector3 & v)
+{
+    return s << v.x << v.y << v.z;
+}
+
+Stream & nv::operator<< (Stream & s, Vector4 & v)
+{
+    return s << v.x << v.y << v.z << v.w;
+}
+
+Stream & nv::operator<< (Stream & s, Matrix & m)
+{
+    return s;
+}
+
+Stream & nv::operator<< (Stream & s, Quaternion & q)
+{
+    return s << q.x << q.y << q.z << q.w;
+}
+
+Stream & nv::operator<< (Stream & s, Basis & basis)
+{
+    return s << basis.tangent << basis.bitangent << basis.normal;
+}
+
+Stream & nv::operator<< (Stream & s, Box & box)
+{
+    return s << box.minCorner << box.maxCorner;
+}
+
+Stream & nv::operator<< (Stream & s, Plane & plane)
+{
+    return s << plane.v;
+}
diff --git a/thirdparty/thekla_atlas/nvmath/TypeSerialization.h b/thirdparty/thekla_atlas/nvmath/TypeSerialization.h
new file mode 100644
index 0000000000..32d6de827e
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/TypeSerialization.h
@@ -0,0 +1,35 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MATH_TYPESERIALIZATION_H
+#define NV_MATH_TYPESERIALIZATION_H
+
+#include "nvmath.h"
+
+namespace nv
+{
+    class Stream;
+
+    class Vector2;
+    class Vector3;
+    class Vector4;
+
+    class Matrix;
+    class Quaternion;
+    class Basis;
+    class Box;
+    class Plane;
+
+    NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj);
+    NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj);
+    NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj);
+
+    NVMATH_API Stream & operator<< (Stream & s, Matrix & obj);
+    NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj);
+    NVMATH_API Stream & operator<< (Stream & s, Basis & obj);
+    NVMATH_API Stream & operator<< (Stream & s, Box & obj);
+    NVMATH_API Stream & operator<< (Stream & s, Plane & obj);
+
+} // nv namespace
+
+#endif // NV_MATH_TYPESERIALIZATION_H
diff --git a/thirdparty/thekla_atlas/nvmath/Vector.cpp b/thirdparty/thekla_atlas/nvmath/Vector.cpp
new file mode 100644
index 0000000000..9122a1b0e9
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Vector.cpp
@@ -0,0 +1,4 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "Vector.h"
+#include "Vector.inl"
diff --git a/thirdparty/thekla_atlas/nvmath/Vector.h b/thirdparty/thekla_atlas/nvmath/Vector.h
new file mode 100644
index 0000000000..ad18672a8a
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Vector.h
@@ -0,0 +1,149 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_VECTOR_H
+#define NV_MATH_VECTOR_H
+
+#include "nvmath.h"
+
+namespace nv
+{
+    class NVMATH_CLASS Vector2
+    {
+    public:
+        typedef Vector2 const & Arg;
+
+        Vector2();
+        explicit Vector2(float f);
+        Vector2(float x, float y);
+        Vector2(Vector2::Arg v);
+
+        //template <typename T> explicit Vector2(const T & v) : x(v.x), y(v.y) {}
+        //template <typename T> operator T() const { return T(x, y); }
+
+        const Vector2 & operator=(Vector2::Arg v);
+
+        const float * ptr() const;
+
+        void set(float x, float y);
+
+        Vector2 operator-() const;
+        void operator+=(Vector2::Arg v);
+        void operator-=(Vector2::Arg v);
+        void operator*=(float s);
+        void operator*=(Vector2::Arg v);
+
+        friend bool operator==(Vector2::Arg a, Vector2::Arg b);
+        friend bool operator!=(Vector2::Arg a, Vector2::Arg b);
+
+        union {
+            struct {
+                float x, y;
+            };
+            float component[2];
+        };
+    };
+
+    class NVMATH_CLASS Vector3
+    {
+    public:
+        typedef Vector3 const & Arg;
+
+        Vector3();
+        explicit Vector3(float x);
+        //explicit Vector3(int x) : x(float(x)), y(float(x)), z(float(x)) {}
+        Vector3(float x, float y, float z);
+        Vector3(Vector2::Arg v, float z);
+        Vector3(Vector3::Arg v);
+
+        //template <typename T> explicit Vector3(const T & v) : x(v.x), y(v.y), z(v.z) {}
+        //template <typename T> operator T() const { return T(x, y, z); }
+
+        const Vector3 & operator=(Vector3::Arg v);
+
+        Vector2 xy() const;
+
+        const float * ptr() const;
+
+        void set(float x, float y, float z);
+
+        Vector3 operator-() const;
+        void operator+=(Vector3::Arg v);
+        void operator-=(Vector3::Arg v);
+        void operator*=(float s);
+        void operator/=(float s);
+        void operator*=(Vector3::Arg v);
+        void operator/=(Vector3::Arg v);
+
+        friend bool operator==(Vector3::Arg a, Vector3::Arg b);
+        friend bool operator!=(Vector3::Arg a, Vector3::Arg b);
+
+        union {
+            struct {
+                float x, y, z;
+            };
+            float component[3];
+        };
+    };
+
+    class NVMATH_CLASS Vector4
+    {
+    public:
+        typedef Vector4 const & Arg;
+
+        Vector4();
+        explicit Vector4(float x);
+        Vector4(float x, float y, float z, float w);
+        Vector4(Vector2::Arg v, float z, float w);
+        Vector4(Vector2::Arg v, Vector2::Arg u);
+        Vector4(Vector3::Arg v, float w);
+        Vector4(Vector4::Arg v);
+        //	Vector4(const Quaternion & v);
+
+        //template <typename T> explicit Vector4(const T & v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
+        //template <typename T> operator T() const { return T(x, y, z, w); }
+
+        const Vector4 & operator=(Vector4::Arg v);
+
+        Vector2 xy() const;
+        Vector2 zw() const;
+        Vector3 xyz() const;
+
+        const float * ptr() const;
+
+        void set(float x, float y, float z, float w);
+
+        Vector4 operator-() const;
+        void operator+=(Vector4::Arg v);
+        void operator-=(Vector4::Arg v);
+        void operator*=(float s);
+        void operator/=(float s);
+        void operator*=(Vector4::Arg v);
+        void operator/=(Vector4::Arg v);
+
+        friend bool operator==(Vector4::Arg a, Vector4::Arg b);
+        friend bool operator!=(Vector4::Arg a, Vector4::Arg b);
+
+        union {
+            struct {
+                float x, y, z, w;
+            };
+            float component[4];
+        };
+    };
+
+} // nv namespace
+
+// If we had these functions, they would be ambiguous, the compiler would not know which one to pick:
+//template <typename T> Vector2 to(const T & v) { return Vector2(v.x, v.y); }
+//template <typename T> Vector3 to(const T & v) { return Vector3(v.x, v.y, v.z); }
+//template <typename T> Vector4 to(const T & v) { return Vector4(v.x, v.y, v.z, v.z); }
+
+// We could use a cast operator so that we could infer the expected type, but that doesn't work the same way in all compilers and produces horrible error messages.
+
+// Instead we simply have explicit casts:
+template <typename T> T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); }
+template <typename T> T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); }
+template <typename T> T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.w); }
+
+#endif // NV_MATH_VECTOR_H
diff --git a/thirdparty/thekla_atlas/nvmath/Vector.inl b/thirdparty/thekla_atlas/nvmath/Vector.inl
new file mode 100644
index 0000000000..bcaec7bf2a
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/Vector.inl
@@ -0,0 +1,919 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_VECTOR_INL
+#define NV_MATH_VECTOR_INL
+
+#include "Vector.h"
+#include "nvcore/Utils.h" // min, max
+#include "nvcore/Hash.h" // hash
+
+namespace nv
+{
+
+    // Helpers to convert vector types. Assume T has x,y members and 2 argument constructor.
+    //template <typename T> T to(Vector2::Arg v) { return T(v.x, v.y); }
+
+    // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor.
+    //template <typename T> T to(Vector3::Arg v) { return T(v.x, v.y, v.z); }
+
+    // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor.
+    //template <typename T> T to(Vector4::Arg v) { return T(v.x, v.y, v.z, v.w); }
+
+
+    // Vector2
+    inline Vector2::Vector2() {}
+    inline Vector2::Vector2(float f) : x(f), y(f) {}
+    inline Vector2::Vector2(float x, float y) : x(x), y(y) {}
+    inline Vector2::Vector2(Vector2::Arg v) : x(v.x), y(v.y) {}
+
+    inline const Vector2 & Vector2::operator=(Vector2::Arg v)
+    {
+        x = v.x;
+        y = v.y;
+        return *this;
+    }
+
+    inline const float * Vector2::ptr() const
+    {
+        return &x;
+    }
+
+    inline void Vector2::set(float x, float y)
+    {
+        this->x = x;
+        this->y = y;
+    }
+
+    inline Vector2 Vector2::operator-() const
+    {
+        return Vector2(-x, -y);
+    }
+
+    inline void Vector2::operator+=(Vector2::Arg v)
+    {
+        x += v.x;
+        y += v.y;
+    }
+
+    inline void Vector2::operator-=(Vector2::Arg v)
+    {
+        x -= v.x;
+        y -= v.y;
+    }
+
+    inline void Vector2::operator*=(float s)
+    {
+        x *= s;
+        y *= s;
+    }
+
+    inline void Vector2::operator*=(Vector2::Arg v)
+    {
+        x *= v.x;
+        y *= v.y;
+    }
+
+    inline bool operator==(Vector2::Arg a, Vector2::Arg b)
+    {
+        return a.x == b.x && a.y == b.y; 
+    }
+    inline bool operator!=(Vector2::Arg a, Vector2::Arg b)
+    {
+        return a.x != b.x || a.y != b.y; 
+    }
+
+
+    // Vector3
+    inline Vector3::Vector3() {}
+    inline Vector3::Vector3(float f) : x(f), y(f), z(f) {}
+    inline Vector3::Vector3(float x, float y, float z) : x(x), y(y), z(z) {}
+    inline Vector3::Vector3(Vector2::Arg v, float z) : x(v.x), y(v.y), z(z) {}
+    inline Vector3::Vector3(Vector3::Arg v) : x(v.x), y(v.y), z(v.z) {}
+
+    inline const Vector3 & Vector3::operator=(Vector3::Arg v)
+    {
+        x = v.x;
+        y = v.y;
+        z = v.z;
+        return *this;
+    }
+
+
+    inline Vector2 Vector3::xy() const
+    {
+        return Vector2(x, y);
+    }
+
+    inline const float * Vector3::ptr() const
+    {
+        return &x;
+    }
+
+    inline void Vector3::set(float x, float y, float z)
+    {
+        this->x = x;
+        this->y = y;
+        this->z = z;
+    }
+
+    inline Vector3 Vector3::operator-() const
+    {
+        return Vector3(-x, -y, -z);
+    }
+
+    inline void Vector3::operator+=(Vector3::Arg v)
+    {
+        x += v.x;
+        y += v.y;
+        z += v.z;
+    }
+
+    inline void Vector3::operator-=(Vector3::Arg v)
+    {
+        x -= v.x;
+        y -= v.y;
+        z -= v.z;
+    }
+
+    inline void Vector3::operator*=(float s)
+    {
+        x *= s;
+        y *= s;
+        z *= s;
+    }
+
+    inline void Vector3::operator/=(float s)
+    {
+        float is = 1.0f / s;
+        x *= is;
+        y *= is;
+        z *= is;
+    }
+
+    inline void Vector3::operator*=(Vector3::Arg v)
+    {
+        x *= v.x;
+        y *= v.y;
+        z *= v.z;
+    }
+
+    inline void Vector3::operator/=(Vector3::Arg v)
+    {
+        x /= v.x;
+        y /= v.y;
+        z /= v.z;
+    }
+
+    inline bool operator==(Vector3::Arg a, Vector3::Arg b)
+    {
+        return a.x == b.x && a.y == b.y && a.z == b.z; 
+    }
+    inline bool operator!=(Vector3::Arg a, Vector3::Arg b)
+    {
+        return a.x != b.x || a.y != b.y || a.z != b.z; 
+    }
+
+
+    // Vector4
+    inline Vector4::Vector4() {}
+    inline Vector4::Vector4(float f) : x(f), y(f), z(f), w(f) {}
+    inline Vector4::Vector4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {}
+    inline Vector4::Vector4(Vector2::Arg v, float z, float w) : x(v.x), y(v.y), z(z), w(w) {}
+    inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {}
+    inline Vector4::Vector4(Vector3::Arg v, float w) : x(v.x), y(v.y), z(v.z), w(w) {}
+    inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
+
+    inline const Vector4 & Vector4::operator=(const Vector4 & v)
+    {
+        x = v.x;
+        y = v.y;
+        z = v.z;
+        w = v.w;
+        return *this;
+    }
+
+    inline Vector2 Vector4::xy() const
+    {
+        return Vector2(x, y);
+    }
+
+    inline Vector2 Vector4::zw() const
+    {
+        return Vector2(z, w);
+    }
+
+    inline Vector3 Vector4::xyz() const
+    {
+        return Vector3(x, y, z);
+    }
+
+    inline const float * Vector4::ptr() const
+    {
+        return &x;
+    }
+
+    inline void Vector4::set(float x, float y, float z, float w)
+    {
+        this->x = x;
+        this->y = y;
+        this->z = z;
+        this->w = w;
+    }
+
+    inline Vector4 Vector4::operator-() const
+    {
+        return Vector4(-x, -y, -z, -w);
+    }
+
+    inline void Vector4::operator+=(Vector4::Arg v)
+    {
+        x += v.x;
+        y += v.y;
+        z += v.z;
+        w += v.w;
+    }
+
+    inline void Vector4::operator-=(Vector4::Arg v)
+    {
+        x -= v.x;
+        y -= v.y;
+        z -= v.z;
+        w -= v.w;
+    }
+
+    inline void Vector4::operator*=(float s)
+    {
+        x *= s;
+        y *= s;
+        z *= s;
+        w *= s;
+    }
+
+    inline void Vector4::operator/=(float s)
+    {
+        x /= s;
+        y /= s;
+        z /= s;
+        w /= s;
+    }
+
+    inline void Vector4::operator*=(Vector4::Arg v)
+    {
+        x *= v.x;
+        y *= v.y;
+        z *= v.z;
+        w *= v.w;
+    }
+
+    inline void Vector4::operator/=(Vector4::Arg v)
+    {
+        x /= v.x;
+        y /= v.y;
+        z /= v.z;
+        w /= v.w;
+    }
+
+    inline bool operator==(Vector4::Arg a, Vector4::Arg b)
+    {
+        return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; 
+    }
+    inline bool operator!=(Vector4::Arg a, Vector4::Arg b)
+    {
+        return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; 
+    }
+
+
+
+    // Functions
+
+
+    // Vector2
+
+    inline Vector2 add(Vector2::Arg a, Vector2::Arg b)
+    {
+        return Vector2(a.x + b.x, a.y + b.y);
+    }
+    inline Vector2 operator+(Vector2::Arg a, Vector2::Arg b)
+    {
+        return add(a, b);
+    }
+
+    inline Vector2 sub(Vector2::Arg a, Vector2::Arg b)
+    {
+        return Vector2(a.x - b.x, a.y - b.y);
+    }
+    inline Vector2 operator-(Vector2::Arg a, Vector2::Arg b)
+    {
+        return sub(a, b);
+    }
+
+    inline Vector2 scale(Vector2::Arg v, float s)
+    {
+        return Vector2(v.x * s, v.y * s);
+    }
+
+    inline Vector2 scale(Vector2::Arg v, Vector2::Arg s)
+    {
+        return Vector2(v.x * s.x, v.y * s.y);
+    }
+
+    inline Vector2 operator*(Vector2::Arg v, float s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector2 operator*(Vector2::Arg v1, Vector2::Arg v2)
+    {
+        return Vector2(v1.x*v2.x, v1.y*v2.y);
+    }
+
+    inline Vector2 operator*(float s, Vector2::Arg v)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector2 operator/(Vector2::Arg v, float s)
+    {
+        return scale(v, 1.0f/s);
+    }
+
+    inline Vector2 lerp(Vector2::Arg v1, Vector2::Arg v2, float t)
+    {
+        const float s = 1.0f - t;
+        return Vector2(v1.x * s + t * v2.x, v1.y * s + t * v2.y);
+    }
+
+    inline float dot(Vector2::Arg a, Vector2::Arg b)
+    {
+        return a.x * b.x + a.y * b.y;
+    }
+
+    inline float lengthSquared(Vector2::Arg v)
+    {
+        return v.x * v.x + v.y * v.y;
+    }
+
+    inline float length(Vector2::Arg v)
+    {
+        return sqrtf(lengthSquared(v));
+    }
+
+    inline float distance(Vector2::Arg a, Vector2::Arg b)
+    {
+        return length(a - b);
+    }
+
+    inline float inverseLength(Vector2::Arg v)
+    {
+        return 1.0f / sqrtf(lengthSquared(v));
+    }
+
+    inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
+    {
+        return equal(length(v), 1, epsilon);
+    }
+
+    inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
+    {
+        float l = length(v);
+        nvDebugCheck(!isZero(l, epsilon));
+        Vector2 n = scale(v, 1.0f / l);
+        nvDebugCheck(isNormalized(n));
+        return n;
+    }
+
+    inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
+    {
+        float l = length(v);
+        if (isZero(l, epsilon)) {
+            return fallback;
+        }
+        return scale(v, 1.0f / l);
+    }
+
+    // Safe, branchless normalization from Andy Firth. All error checking ommitted.
+    // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
+    inline Vector2 normalizeFast(Vector2::Arg v)
+    {
+        const float very_small_float = 1.0e-037f;
+        float l = very_small_float + length(v);
+        return scale(v, 1.0f / l);
+    }
+
+    inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
+    {
+        return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon);
+    }
+
+    inline Vector2 min(Vector2::Arg a, Vector2::Arg b)
+    {
+        return Vector2(min(a.x, b.x), min(a.y, b.y));
+    }
+
+    inline Vector2 max(Vector2::Arg a, Vector2::Arg b)
+    {
+        return Vector2(max(a.x, b.x), max(a.y, b.y));
+    }
+
+    inline Vector2 clamp(Vector2::Arg v, float min, float max)
+    {
+        return Vector2(clamp(v.x, min, max), clamp(v.y, min, max));
+    }
+
+    inline Vector2 saturate(Vector2::Arg v)
+    {
+        return Vector2(saturate(v.x), saturate(v.y));
+    }
+
+    inline bool isFinite(Vector2::Arg v)
+    {
+        return isFinite(v.x) && isFinite(v.y);
+    }
+
+    inline Vector2 validate(Vector2::Arg v, Vector2::Arg fallback = Vector2(0.0f))
+    {
+        if (!isFinite(v)) return fallback;
+        Vector2 vf = v;
+        nv::floatCleanup(vf.component, 2);
+        return vf;
+    }
+
+    // Note, this is the area scaled by 2!
+    inline float triangleArea(Vector2::Arg v0, Vector2::Arg v1)
+    {
+	    return (v0.x * v1.y - v0.y * v1.x); // * 0.5f;
+    }
+    inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
+    {
+        // IC: While it may be appealing to use the following expression:
+        //return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f;
+
+        // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point 
+        // numbers and the results becomes very unstable and dependent on the order of the factors.
+
+        // Instead, it's preferable to subtract the vertices first, and multiply the resulting small values together. The result
+        // in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of 
+        // the triangle.
+
+        //return ((a.x - c.x) * (b.y - c.y) - (a.y - c.y) * (b.x - c.x)); // * 0.5f;
+        return triangleArea(a-c, b-c);
+    }
+
+
+    template <>
+    inline uint hash(const Vector2 & v, uint h)
+    {
+        return sdbmFloatHash(v.component, 2, h);
+    }
+
+
+
+    // Vector3
+
+    inline Vector3 add(Vector3::Arg a, Vector3::Arg b)
+    {
+        return Vector3(a.x + b.x, a.y + b.y, a.z + b.z);
+    }
+    inline Vector3 add(Vector3::Arg a, float b)
+    {
+        return Vector3(a.x + b, a.y + b, a.z + b);
+    }
+    inline Vector3 operator+(Vector3::Arg a, Vector3::Arg b)
+    {
+        return add(a, b);
+    }
+    inline Vector3 operator+(Vector3::Arg a, float b)
+    {
+        return add(a, b);
+    }
+
+    inline Vector3 sub(Vector3::Arg a, Vector3::Arg b)
+    {
+        return Vector3(a.x - b.x, a.y - b.y, a.z - b.z);
+    }
+    inline Vector3 sub(Vector3::Arg a, float b)
+    {
+        return Vector3(a.x - b, a.y - b, a.z - b);
+    }
+    inline Vector3 operator-(Vector3::Arg a, Vector3::Arg b)
+    {
+        return sub(a, b);
+    }
+    inline Vector3 operator-(Vector3::Arg a, float b)
+    {
+        return sub(a, b);
+    }
+
+    inline Vector3 cross(Vector3::Arg a, Vector3::Arg b)
+    {
+        return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x);
+    }
+
+    inline Vector3 scale(Vector3::Arg v, float s)
+    {
+        return Vector3(v.x * s, v.y * s, v.z * s);
+    }
+
+    inline Vector3 scale(Vector3::Arg v, Vector3::Arg s)
+    {
+        return Vector3(v.x * s.x, v.y * s.y, v.z * s.z);
+    }
+
+    inline Vector3 operator*(Vector3::Arg v, float s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector3 operator*(float s, Vector3::Arg v)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector3 operator*(Vector3::Arg v, Vector3::Arg s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector3 operator/(Vector3::Arg v, float s)
+    {
+        return scale(v, 1.0f/s);
+    }
+
+    /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, float s)
+    {
+        return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s);
+    }*/
+
+    inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, float t)
+    {
+        const float s = 1.0f - t;
+        return Vector3(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z);
+    }
+
+    inline float dot(Vector3::Arg a, Vector3::Arg b)
+    {
+        return a.x * b.x + a.y * b.y + a.z * b.z;
+    }
+
+    inline float lengthSquared(Vector3::Arg v)
+    {
+        return v.x * v.x + v.y * v.y + v.z * v.z;
+    }
+
+    inline float length(Vector3::Arg v)
+    {
+        return sqrtf(lengthSquared(v));
+    }
+
+    inline float distance(Vector3::Arg a, Vector3::Arg b)
+    {
+        return length(a - b);
+    }
+
+    inline float distanceSquared(Vector3::Arg a, Vector3::Arg b)
+    {
+        return lengthSquared(a - b);
+    }
+
+    inline float inverseLength(Vector3::Arg v)
+    {
+        return 1.0f / sqrtf(lengthSquared(v));
+    }
+
+    inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
+    {
+        return equal(length(v), 1, epsilon);
+    }
+
+    inline Vector3 normalize(Vector3::Arg v, float epsilon = NV_EPSILON)
+    {
+        float l = length(v);
+        nvDebugCheck(!isZero(l, epsilon));
+        Vector3 n = scale(v, 1.0f / l);
+        nvDebugCheck(isNormalized(n));
+        return n;
+    }
+
+    inline Vector3 normalizeSafe(Vector3::Arg v, Vector3::Arg fallback, float epsilon = NV_EPSILON)
+    {
+        float l = length(v);
+        if (isZero(l, epsilon)) {
+            return fallback;
+        }
+        return scale(v, 1.0f / l);
+    }
+
+    // Safe, branchless normalization from Andy Firth. All error checking ommitted.
+    // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
+    inline Vector3 normalizeFast(Vector3::Arg v)
+    {
+        const float very_small_float = 1.0e-037f;
+        float l = very_small_float + length(v);
+        return scale(v, 1.0f / l);
+    }
+
+    inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON)
+    {
+        return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon);
+    }
+
+    inline Vector3 min(Vector3::Arg a, Vector3::Arg b)
+    {
+        return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
+    }
+
+    inline Vector3 max(Vector3::Arg a, Vector3::Arg b)
+    {
+        return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
+    }
+
+    inline Vector3 clamp(Vector3::Arg v, float min, float max)
+    {
+        return Vector3(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max));
+    }
+
+    inline Vector3 saturate(Vector3::Arg v)
+    {
+        return Vector3(saturate(v.x), saturate(v.y), saturate(v.z));
+    }
+
+    inline Vector3 floor(Vector3::Arg v)
+    {
+        return Vector3(floorf(v.x), floorf(v.y), floorf(v.z));
+    }
+
+    inline Vector3 ceil(Vector3::Arg v)
+    {
+        return Vector3(ceilf(v.x), ceilf(v.y), ceilf(v.z));
+    }
+
+    inline bool isFinite(Vector3::Arg v)
+    {
+        return isFinite(v.x) && isFinite(v.y) && isFinite(v.z);
+    }
+
+    inline Vector3 validate(Vector3::Arg v, Vector3::Arg fallback = Vector3(0.0f))
+    {
+        if (!isFinite(v)) return fallback;
+        Vector3 vf = v;
+        nv::floatCleanup(vf.component, 3);
+        return vf;
+    }
+
+    inline Vector3 reflect(Vector3::Arg v, Vector3::Arg n)
+    {
+	    return v - (2 * dot(v, n)) * n;
+    }
+
+    template <>
+    inline uint hash(const Vector3 & v, uint h)
+    {
+        return sdbmFloatHash(v.component, 3, h);
+    }
+
+
+    // Vector4
+
+    inline Vector4 add(Vector4::Arg a, Vector4::Arg b)
+    {
+        return Vector4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
+    }
+    inline Vector4 operator+(Vector4::Arg a, Vector4::Arg b)
+    {
+        return add(a, b);
+    }
+
+    inline Vector4 sub(Vector4::Arg a, Vector4::Arg b)
+    {
+        return Vector4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
+    }
+    inline Vector4 operator-(Vector4::Arg a, Vector4::Arg b)
+    {
+        return sub(a, b);
+    }
+
+    inline Vector4 scale(Vector4::Arg v, float s)
+    {
+        return Vector4(v.x * s, v.y * s, v.z * s, v.w * s);
+    }
+
+    inline Vector4 scale(Vector4::Arg v, Vector4::Arg s)
+    {
+        return Vector4(v.x * s.x, v.y * s.y, v.z * s.z, v.w * s.w);
+    }
+
+    inline Vector4 operator*(Vector4::Arg v, float s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector4 operator*(float s, Vector4::Arg v)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector4 operator*(Vector4::Arg v, Vector4::Arg s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector4 operator/(Vector4::Arg v, float s)
+    {
+        return scale(v, 1.0f/s);
+    }
+
+    /*inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, float s)
+    {
+        return Vector4(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s, a.w + b.w * s);
+    }*/
+
+    inline Vector4 lerp(Vector4::Arg v1, Vector4::Arg v2, float t)
+    {
+        const float s = 1.0f - t;
+        return Vector4(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z, v1.w * s + t * v2.w);
+    }
+
+    inline float dot(Vector4::Arg a, Vector4::Arg b)
+    {
+        return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
+    }
+
+    inline float lengthSquared(Vector4::Arg v)
+    {
+        return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w;
+    }
+
+    inline float length(Vector4::Arg v)
+    {
+        return sqrtf(lengthSquared(v));
+    }
+
+    inline float inverseLength(Vector4::Arg v)
+    {
+        return 1.0f / sqrtf(lengthSquared(v));
+    }
+
+    inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
+    {
+        return equal(length(v), 1, epsilon);
+    }
+
+    inline Vector4 normalize(Vector4::Arg v, float epsilon = NV_EPSILON)
+    {
+        float l = length(v);
+        nvDebugCheck(!isZero(l, epsilon));
+        Vector4 n = scale(v, 1.0f / l);
+        nvDebugCheck(isNormalized(n));
+        return n;
+    }
+
+    inline Vector4 normalizeSafe(Vector4::Arg v, Vector4::Arg fallback, float epsilon = NV_EPSILON)
+    {
+        float l = length(v);
+        if (isZero(l, epsilon)) {
+            return fallback;
+        }
+        return scale(v, 1.0f / l);
+    }
+
+    // Safe, branchless normalization from Andy Firth. All error checking ommitted.
+    // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
+    inline Vector4 normalizeFast(Vector4::Arg v)
+    {
+        const float very_small_float = 1.0e-037f;
+        float l = very_small_float + length(v);
+        return scale(v, 1.0f / l);
+    }
+
+    inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON)
+    {
+        return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon);
+    }
+
+    inline Vector4 min(Vector4::Arg a, Vector4::Arg b)
+    {
+        return Vector4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
+    }
+
+    inline Vector4 max(Vector4::Arg a, Vector4::Arg b)
+    {
+        return Vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
+    }
+
+    inline Vector4 clamp(Vector4::Arg v, float min, float max)
+    {
+        return Vector4(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max), clamp(v.w, min, max));
+    }
+
+    inline Vector4 saturate(Vector4::Arg v)
+    {
+        return Vector4(saturate(v.x), saturate(v.y), saturate(v.z), saturate(v.w));
+    }
+
+    inline bool isFinite(Vector4::Arg v)
+    {
+        return isFinite(v.x) && isFinite(v.y) && isFinite(v.z) && isFinite(v.w);
+    }
+
+    inline Vector4 validate(Vector4::Arg v, Vector4::Arg fallback = Vector4(0.0f))
+    {
+        if (!isFinite(v)) return fallback;
+        Vector4 vf = v;
+        nv::floatCleanup(vf.component, 4);
+        return vf;
+    }
+
+    template <>
+    inline uint hash(const Vector4 & v, uint h)
+    {
+        return sdbmFloatHash(v.component, 4, h);
+    }
+
+
+#if NV_OS_IOS // LLVM is not happy with implicit conversion of immediate constants to float
+
+    //int:
+
+    inline Vector2 scale(Vector2::Arg v, int s)
+    {
+        return Vector2(v.x * s, v.y * s);
+    }
+
+    inline Vector2 operator*(Vector2::Arg v, int s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector2 operator*(int s, Vector2::Arg v)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector2 operator/(Vector2::Arg v, int s)
+    {
+        return scale(v, 1.0f/s);
+    }
+
+    inline Vector3 scale(Vector3::Arg v, int s)
+    {
+        return Vector3(v.x * s, v.y * s, v.z * s);
+    }
+
+    inline Vector3 operator*(Vector3::Arg v, int s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector3 operator*(int s, Vector3::Arg v)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector3 operator/(Vector3::Arg v, int s)
+    {
+        return scale(v, 1.0f/s);
+    }
+
+    inline Vector4 scale(Vector4::Arg v, int s)
+    {
+        return Vector4(v.x * s, v.y * s, v.z * s, v.w * s);
+    }
+
+    inline Vector4 operator*(Vector4::Arg v, int s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector4 operator*(int s, Vector4::Arg v)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector4 operator/(Vector4::Arg v, int s)
+    {
+        return scale(v, 1.0f/s);
+    }
+
+    //double:
+
+    inline Vector3 operator*(Vector3::Arg v, double s)
+    {
+        return scale(v, (float)s);
+    }
+
+    inline Vector3 operator*(double s, Vector3::Arg v)
+    {
+        return scale(v, (float)s);
+    }
+
+    inline Vector3 operator/(Vector3::Arg v, double s)
+    {
+        return scale(v, 1.f/((float)s));
+    }    
+        
+#endif //NV_OS_IOS
+
+} // nv namespace
+
+#endif // NV_MATH_VECTOR_INL
diff --git a/thirdparty/thekla_atlas/nvmath/ftoi.h b/thirdparty/thekla_atlas/nvmath/ftoi.h
new file mode 100644
index 0000000000..bee15c0908
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/ftoi.h
@@ -0,0 +1,258 @@
+// This code is in the public domain -- castano@gmail.com
+
+#pragma once
+#ifndef NV_MATH_FTOI_H
+#define NV_MATH_FTOI_H
+
+#include "nvmath/nvmath.h"
+
+#include <math.h>
+
+namespace nv
+{
+    // Optimized float to int conversions. See:
+    // http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html
+    // http://www.stereopsis.com/sree/fpu2006.html
+    // http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/
+    // http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point
+
+
+    union DoubleAnd64 {
+        uint64    i;
+        double    d;
+    };
+
+    static const double floatutil_xs_doublemagic = (6755399441055744.0);                            // 2^52 * 1.5
+    static const double floatutil_xs_doublemagicdelta = (1.5e-8);                                   // almost .5f = .5f + 1e^(number of exp bit)
+    static const double floatutil_xs_doublemagicroundeps = (0.5f - floatutil_xs_doublemagicdelta);  // almost .5f = .5f - 1e^(number of exp bit)
+
+    NV_FORCEINLINE int ftoi_round_xs(double val, double magic) {
+#if 1
+        DoubleAnd64 dunion;
+        dunion.d = val + magic;
+        return (int32) dunion.i; // just cast to grab the bottom bits
+#else
+        val += magic;
+        return ((int*)&val)[0]; // @@ Assumes little endian.
+#endif
+    }
+
+    NV_FORCEINLINE int ftoi_round_xs(float val) {
+        return ftoi_round_xs(val, floatutil_xs_doublemagic);
+    }
+
+    NV_FORCEINLINE int ftoi_floor_xs(float val) {
+        return ftoi_round_xs(val - floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
+    }
+
+    NV_FORCEINLINE int ftoi_ceil_xs(float val) {
+        return ftoi_round_xs(val + floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
+    }
+
+    NV_FORCEINLINE int ftoi_trunc_xs(float val) {
+        return (val<0) ? ftoi_ceil_xs(val) : ftoi_floor_xs(val);
+    }
+
+#if NV_CPU_X86 || NV_CPU_X86_64
+
+    NV_FORCEINLINE int ftoi_round_sse(float f) {
+        return _mm_cvt_ss2si(_mm_set_ss(f));
+    }
+
+    NV_FORCEINLINE int ftoi_trunc_sse(float f) {
+      return _mm_cvtt_ss2si(_mm_set_ss(f));
+    }
+
+#endif
+
+
+
+#if NV_USE_SSE
+
+    NV_FORCEINLINE int ftoi_round(float val) {
+        return ftoi_round_sse(val);
+    }
+
+    NV_FORCEINLINE int ftoi_trunc(float f) {
+      return ftoi_trunc_sse(f);
+    }
+
+    // We can probably do better than this. See for example:
+    // http://dss.stephanierct.com/DevBlog/?p=8
+    NV_FORCEINLINE int ftoi_floor(float val) {
+        return ftoi_round(floorf(val));
+    }
+
+    NV_FORCEINLINE int ftoi_ceil(float val) {
+        return ftoi_round(ceilf(val));
+    }
+
+#else
+
+    // In theory this should work with any double floating point math implementation, but it appears that MSVC produces incorrect code
+    // when SSE2 is targeted and fast math is enabled (/arch:SSE2 & /fp:fast). These problems go away with /fp:precise, which is the default mode.
+
+    NV_FORCEINLINE int ftoi_round(float val) {
+        return ftoi_round_xs(val);
+    }
+
+    NV_FORCEINLINE int ftoi_floor(float val) {
+        return ftoi_floor_xs(val);
+    }
+
+    NV_FORCEINLINE int ftoi_ceil(float val) {
+        return ftoi_ceil_xs(val);
+    }
+
+    NV_FORCEINLINE int ftoi_trunc(float f) {
+      return ftoi_trunc_xs(f);
+    }
+
+#endif
+
+
+    inline void test_ftoi() {
+
+        // Round to nearest integer.
+        nvCheck(ftoi_round(0.1f) == 0);
+        nvCheck(ftoi_round(0.6f) == 1);
+        nvCheck(ftoi_round(-0.2f) == 0);
+        nvCheck(ftoi_round(-0.7f) == -1);
+        nvCheck(ftoi_round(10.1f) == 10);
+        nvCheck(ftoi_round(10.6f) == 11);
+        nvCheck(ftoi_round(-90.1f) == -90);
+        nvCheck(ftoi_round(-90.6f) == -91);
+
+        nvCheck(ftoi_round(0) == 0);
+        nvCheck(ftoi_round(1) == 1);
+        nvCheck(ftoi_round(-1) == -1);
+        
+        nvCheck(ftoi_round(0.5f) == 0);  // How are midpoints rounded? Bankers rounding.
+        nvCheck(ftoi_round(1.5f) == 2);
+        nvCheck(ftoi_round(2.5f) == 2);
+        nvCheck(ftoi_round(3.5f) == 4);
+        nvCheck(ftoi_round(4.5f) == 4);
+        nvCheck(ftoi_round(-0.5f) == 0);
+        nvCheck(ftoi_round(-1.5f) == -2);
+                
+
+        // Truncation (round down if > 0, round up if < 0).
+        nvCheck(ftoi_trunc(0.1f) == 0);
+        nvCheck(ftoi_trunc(0.6f) == 0);
+        nvCheck(ftoi_trunc(-0.2f) == 0);
+        nvCheck(ftoi_trunc(-0.7f) == 0);    // @@ When using /arch:SSE2 in Win32, msvc produce wrong code for this one. It is skipping the addition.
+        nvCheck(ftoi_trunc(1.99f) == 1);
+        nvCheck(ftoi_trunc(-1.2f) == -1);
+
+        // Floor (round down).
+        nvCheck(ftoi_floor(0.1f) == 0);
+        nvCheck(ftoi_floor(0.6f) == 0);
+        nvCheck(ftoi_floor(-0.2f) == -1);
+        nvCheck(ftoi_floor(-0.7f) == -1);
+        nvCheck(ftoi_floor(1.99f) == 1);
+        nvCheck(ftoi_floor(-1.2f) == -2);
+
+        nvCheck(ftoi_floor(0) == 0);
+        nvCheck(ftoi_floor(1) == 1);
+        nvCheck(ftoi_floor(-1) == -1);
+        nvCheck(ftoi_floor(2) == 2);
+        nvCheck(ftoi_floor(-2) == -2);
+
+        // Ceil (round up).
+        nvCheck(ftoi_ceil(0.1f) == 1);
+        nvCheck(ftoi_ceil(0.6f) == 1);
+        nvCheck(ftoi_ceil(-0.2f) == 0);
+        nvCheck(ftoi_ceil(-0.7f) == 0);
+        nvCheck(ftoi_ceil(1.99f) == 2);
+        nvCheck(ftoi_ceil(-1.2f) == -1);
+
+        nvCheck(ftoi_ceil(0) == 0);
+        nvCheck(ftoi_ceil(1) == 1);
+        nvCheck(ftoi_ceil(-1) == -1);
+        nvCheck(ftoi_ceil(2) == 2);
+        nvCheck(ftoi_ceil(-2) == -2);
+    }
+
+
+
+
+
+    // Safe versions using standard casts.
+
+    inline int iround(float f)
+    {
+        return ftoi_round(f);
+        //return int(floorf(f + 0.5f));
+    }
+
+    inline int iround(double f)
+    {
+        return int(::floor(f + 0.5));
+    }
+
+    inline int ifloor(float f)
+    {
+        return ftoi_floor(f);
+        //return int(floorf(f));
+    }
+
+    inline int iceil(float f)
+    {
+        return int(ceilf(f));
+    }
+
+
+
+    // I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule.
+    // Some notes from cbloom: http://cbloomrants.blogspot.com/2011/07/07-26-11-pixel-int-to-float-options.html
+
+    // Quantize a float in the [0,1] range, using exact end points or uniform bins.
+    inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) {
+        nvDebugCheck(bits <= 16);
+
+        float range = float(1 << bits);
+        if (exactEndPoints) {
+            return floorf(x * (range-1) + 0.5f) / (range-1);
+        }
+        else {
+            return (floorf(x * range) + 0.5f) / range;
+        }
+    }
+
+
+    // This is the most common rounding mode:
+    // 
+    //   0     1       2     3
+    // |___|_______|_______|___|
+    // 0                       1
+    //
+    // You get that if you take the unit floating point number multiply by 'N-1' and round to nearest. That is, `i = round(f * (N-1))`.
+    // You reconstruct the original float dividing by 'N-1': `f = i / (N-1)`
+
+
+    //    0     1     2     3
+    // |_____|_____|_____|_____|
+    // 0                       1
+
+    /*enum BinningMode {
+        RoundMode_ExactEndPoints,       
+        RoundMode_UniformBins,
+    };*/
+
+    template <int N>
+    inline uint unitFloatToFixed(float f) {
+        return ftoi_round(f * ((1<<N)-1));
+    }
+
+    inline uint8 unitFloatToFixed8(float f) {
+        return (uint8)unitFloatToFixed<8>(f);
+    }
+
+    inline uint16 unitFloatToFixed16(float f) {
+        return (uint16)unitFloatToFixed<16>(f);
+    }
+
+
+} // nv
+
+#endif // NV_MATH_FTOI_H
diff --git a/thirdparty/thekla_atlas/nvmath/nvmath.h b/thirdparty/thekla_atlas/nvmath/nvmath.h
new file mode 100644
index 0000000000..695f452c1d
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmath/nvmath.h
@@ -0,0 +1,337 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MATH_H
+#define NV_MATH_H
+
+#include "nvcore/nvcore.h"
+#include "nvcore/Debug.h"   // nvDebugCheck
+#include "nvcore/Utils.h"   // max, clamp
+
+#include <math.h>
+
+#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
+#include <float.h>  // finite, isnan
+#endif
+
+#if NV_CPU_X86 || NV_CPU_X86_64
+    //#include <intrin.h>
+    #include <xmmintrin.h>
+#endif
+
+
+
+// Function linkage
+#if NVMATH_SHARED
+#ifdef NVMATH_EXPORTS
+#define NVMATH_API DLL_EXPORT
+#define NVMATH_CLASS DLL_EXPORT_CLASS
+#else
+#define NVMATH_API DLL_IMPORT
+#define NVMATH_CLASS DLL_IMPORT
+#endif
+#else // NVMATH_SHARED
+#define NVMATH_API
+#define NVMATH_CLASS
+#endif // NVMATH_SHARED
+
+// Set some reasonable defaults.
+#ifndef NV_USE_ALTIVEC
+#   define NV_USE_ALTIVEC NV_CPU_PPC
+//#   define NV_USE_ALTIVEC defined(__VEC__)
+#endif
+
+#ifndef NV_USE_SSE
+#   if NV_CPU_X86_64
+        // x64 always supports at least SSE2
+#       define NV_USE_SSE 2
+#   elif NV_CC_MSVC && defined(_M_IX86_FP)
+        // Also on x86 with the /arch:SSE flag in MSVC.
+#       define NV_USE_SSE _M_IX86_FP       // 1=SSE, 2=SS2
+#   elif defined(__SSE__)
+#       define NV_USE_SSE 1
+#   elif defined(__SSE2__)
+#       define NV_USE_SSE 2
+#   else
+        // Otherwise we assume no SSE.
+#       define NV_USE_SSE 0
+#   endif
+#endif
+
+
+// Internally set NV_USE_SIMD when either altivec or sse is available.
+#if NV_USE_ALTIVEC && NV_USE_SSE
+#	error "Cannot enable both altivec and sse!"
+#endif
+
+
+
+#ifndef PI
+#define PI                  float(3.1415926535897932384626433833)
+#endif
+
+#define NV_EPSILON          (0.0001f)
+#define NV_NORMAL_EPSILON   (0.001f)
+
+/*
+#define SQ(r)               ((r)*(r))
+
+#define SIGN_BITMASK        0x80000000
+
+/// Integer representation of a floating-point value.
+#define IR(x)               ((uint32 &)(x))
+
+/// Absolute integer representation of a floating-point value
+#define AIR(x)              (IR(x) & 0x7fffffff)
+
+/// Floating-point representation of an integer value.
+#define FR(x)               ((float&)(x))
+
+/// Integer-based comparison of a floating point value.
+/// Don't use it blindly, it can be faster or slower than the FPU comparison, depends on the context.
+#define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK)
+*/
+
+extern "C" inline double sqrt_assert(const double f)
+{
+    nvDebugCheck(f >= 0.0f);
+    return sqrt(f);
+}
+
+inline float sqrtf_assert(const float f)
+{
+    nvDebugCheck(f >= 0.0f);
+    return sqrtf(f);
+}
+
+extern "C" inline double acos_assert(const double f) 
+{
+    nvDebugCheck(f >= -1.0f && f <= 1.0f);
+    return acos(f);
+}
+
+inline float acosf_assert(const float f)
+{
+    nvDebugCheck(f >= -1.0f && f <= 1.0f);
+    return acosf(f);
+}
+
+extern "C" inline double asin_assert(const double f)
+{
+    nvDebugCheck(f >= -1.0f && f <= 1.0f);
+    return asin(f);
+}
+
+inline float asinf_assert(const float f)
+{
+    nvDebugCheck(f >= -1.0f && f <= 1.0f);
+    return asinf(f);
+}
+
+// Replace default functions with asserting ones.
+#if !NV_CC_MSVC || (NV_CC_MSVC && (_MSC_VER < 1700))    // IC: Apparently this was causing problems in Visual Studio 2012. See Issue 194: https://code.google.com/p/nvidia-texture-tools/issues/detail?id=194
+#define sqrt sqrt_assert
+#define sqrtf sqrtf_assert
+#define acos acos_assert
+#define acosf acosf_assert
+#define asin asin_assert
+#define asinf asinf_assert
+#endif
+
+#if NV_CC_MSVC
+NV_FORCEINLINE float log2f(float x)
+{
+    nvCheck(x >= 0);
+    return logf(x) / logf(2.0f);
+}
+NV_FORCEINLINE float exp2f(float x)
+{
+    return powf(2.0f, x);
+}
+#endif
+
+namespace nv
+{
+    inline float toRadian(float degree) { return degree * (PI / 180.0f); }
+    inline float toDegree(float radian) { return radian * (180.0f / PI); }
+
+    // Robust floating point comparisons:
+    // http://realtimecollisiondetection.net/blog/?p=89
+    inline bool equal(const float f0, const float f1, const float epsilon = NV_EPSILON)
+    {
+        //return fabs(f0-f1) <= epsilon;
+        return fabs(f0-f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1));
+    }
+
+    inline bool isZero(const float f, const float epsilon = NV_EPSILON)
+    {
+        return fabs(f) <= epsilon;
+    }
+
+    inline bool isFinite(const float f)
+    {
+#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
+        return _finite(f) != 0;
+#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS
+        return isfinite(f);
+#elif NV_OS_LINUX
+        return finitef(f);
+#else
+#   error "isFinite not supported"
+#endif
+        //return std::isfinite (f);
+        //return finite (f);
+    }
+
+    inline bool isNan(const float f)
+    {
+#if NV_OS_WIN32 || NV_OS_XBOX || NV_OS_DURANGO
+        return _isnan(f) != 0;
+#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS
+        return isnan(f);
+#elif NV_OS_LINUX
+        return isnanf(f);
+#else
+#   error "isNan not supported"
+#endif
+    }
+
+    inline uint log2(uint32 i)
+    {
+        uint32 value = 0;
+        while( i >>= 1 ) value++;
+        return value;
+    }
+
+    inline uint log2(uint64 i)
+    {
+        uint64 value = 0;
+        while (i >>= 1) value++;
+        return U32(value);
+    }
+
+    inline float lerp(float f0, float f1, float t)
+    {
+        const float s = 1.0f - t;
+        return f0 * s + f1 * t;
+    }
+
+    inline float square(float f) { return f * f; }
+    inline int square(int i) { return i * i; }
+
+    inline float cube(float f) { return f * f * f; }
+    inline int cube(int i) { return i * i * i; }
+
+    inline float frac(float f)
+    {
+        return f - floor(f);
+    }
+
+    inline float floatRound(float f)
+    {
+        return floorf(f + 0.5f);
+    }
+
+    // Eliminates negative zeros from a float array.
+    inline void floatCleanup(float * fp, int n)
+    {
+        for (int i = 0; i < n; i++) {
+            //nvDebugCheck(isFinite(fp[i]));
+            union { float f; uint32 i; } x = { fp[i] };
+            if (x.i == 0x80000000) fp[i] = 0.0f;
+        }
+    }
+
+    inline float saturate(float f) {
+        return clamp(f, 0.0f, 1.0f);
+    }
+
+    inline float linearstep(float edge0, float edge1, float x) {
+        // Scale, bias and saturate x to 0..1 range
+        return saturate((x - edge0) / (edge1 - edge0));
+    }
+
+    inline float smoothstep(float edge0, float edge1, float x) {
+        x = linearstep(edge0, edge1, x); 
+
+        // Evaluate polynomial
+        return x*x*(3 - 2*x);
+    }
+
+    inline int sign(float a)
+    {
+        return (a > 0) - (a < 0);
+        //if (a > 0.0f) return 1;
+        //if (a < 0.0f) return -1;
+        //return 0;
+    }
+
+    union Float754 {
+        unsigned int raw;
+        float value;
+        struct {
+        #if NV_BIG_ENDIAN
+            unsigned int negative:1;
+            unsigned int biasedexponent:8;
+            unsigned int mantissa:23;
+        #else
+            unsigned int mantissa:23;
+            unsigned int biasedexponent:8;
+            unsigned int negative:1;
+        #endif
+        } field;
+    };
+
+    // Return the exponent of x ~ Floor(Log2(x))
+    inline int floatExponent(float x)
+    {
+        Float754 f;
+        f.value = x;
+        return (f.field.biasedexponent - 127);
+    }
+
+
+    // FloatRGB9E5
+    union Float3SE {
+        uint32 v;
+        struct {
+        #if NV_BIG_ENDIAN
+            uint32 e : 5;
+            uint32 zm : 9;
+            uint32 ym : 9;
+            uint32 xm : 9;
+        #else
+            uint32 xm : 9;
+            uint32 ym : 9;
+            uint32 zm : 9;
+            uint32 e : 5;
+        #endif
+        };
+    };
+
+    // FloatR11G11B10
+    union Float3PK {
+        uint32 v;
+        struct {
+        #if NV_BIG_ENDIAN
+            uint32 ze : 5;
+            uint32 zm : 5;
+            uint32 ye : 5;
+            uint32 ym : 6;
+            uint32 xe : 5;
+            uint32 xm : 6;
+        #else
+            uint32 xm : 6;
+            uint32 xe : 5;
+            uint32 ym : 6;
+            uint32 ye : 5;
+            uint32 zm : 5;
+            uint32 ze : 5;
+        #endif
+        };
+    };
+
+
+} // nv
+
+#endif // NV_MATH_H
diff --git a/thirdparty/thekla_atlas/nvmesh/BaseMesh.cpp b/thirdparty/thekla_atlas/nvmesh/BaseMesh.cpp
new file mode 100644
index 0000000000..f17d3b46fd
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/BaseMesh.cpp
@@ -0,0 +1,19 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "BaseMesh.h"
+#include "Stream.h"
+#include "nvmath/TypeSerialization.h"
+
+
+namespace nv
+{
+	static Stream & operator<< (Stream & s, BaseMesh::Vertex & vertex)
+	{
+		return s << vertex.id << vertex.pos << vertex.nor << vertex.tex;
+	}
+
+	Stream & operator<< (Stream & s, BaseMesh & mesh)
+	{
+		return s << mesh.m_vertexArray;
+	}
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/BaseMesh.h b/thirdparty/thekla_atlas/nvmesh/BaseMesh.h
new file mode 100644
index 0000000000..c8559511f1
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/BaseMesh.h
@@ -0,0 +1,72 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MESH_BASEMESH_H
+#define NV_MESH_BASEMESH_H
+
+#include "nvmesh.h"
+#include "nvmath/Vector.h"
+#include "nvcore/Array.h"
+#include "nvcore/Hash.h"
+
+namespace nv
+{
+
+    /// Base mesh without connectivity.
+    class BaseMesh
+    {
+    public:
+        struct Vertex;
+
+        BaseMesh() {}
+
+        BaseMesh(uint vertexNum) :
+            m_vertexArray(vertexNum) {}
+
+        // Vertex methods.
+        uint vertexCount() const { return m_vertexArray.count(); }
+        const Vertex & vertexAt(uint i) const { return m_vertexArray[i]; }
+        Vertex & vertexAt(uint i) { return m_vertexArray[i]; }
+        const Array<Vertex> & vertices() const { return m_vertexArray; }
+        Array<Vertex> & vertices() { return m_vertexArray; }
+
+        friend Stream & operator<< (Stream & s, BaseMesh & obj);
+
+    protected:
+
+        Array<Vertex> m_vertexArray;
+    };
+
+
+    /// BaseMesh vertex.
+    struct BaseMesh::Vertex
+    {
+        Vertex() : id(NIL), pos(0.0f), nor(0.0f), tex(0.0f) {}
+
+        uint id;		// @@ Vertex should be an index into the vertex data.
+        Vector3 pos;
+        Vector3 nor;
+        Vector2 tex;
+    };
+
+    inline bool operator==(const BaseMesh::Vertex & a, const BaseMesh::Vertex & b)
+    {
+        return a.pos == b.pos && a.nor == b.nor && a.tex == b.tex;
+    }
+
+    inline bool operator!=(const BaseMesh::Vertex & a, const BaseMesh::Vertex & b)
+    {
+        return a.pos != b.pos && a.nor != b.nor && a.tex != b.tex;
+    }
+
+    template <> struct Hash<BaseMesh::Vertex>
+    {
+        uint operator()(const BaseMesh::Vertex & v) const
+        {
+            return Hash<Vector3>()(v.pos);
+        }
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_BASEMESH_H
diff --git a/thirdparty/thekla_atlas/nvmesh/MeshBuilder.cpp b/thirdparty/thekla_atlas/nvmesh/MeshBuilder.cpp
new file mode 100644
index 0000000000..24d8ddff89
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/MeshBuilder.cpp
@@ -0,0 +1,1000 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "nvmesh.h" // pch
+
+#include "MeshBuilder.h"
+#include "TriMesh.h"
+#include "QuadTriMesh.h"
+#include "halfedge/Mesh.h"
+#include "halfedge/Vertex.h"
+#include "halfedge/Face.h"
+
+#include "weld/Weld.h"
+
+#include "nvmath/Box.h"
+#include "nvmath/Vector.inl"
+
+#include "nvcore/StrLib.h"
+#include "nvcore/RadixSort.h"
+#include "nvcore/Ptr.h"
+#include "nvcore/Array.inl"
+#include "nvcore/HashMap.inl"
+
+
+using namespace nv;
+
+/*
+By default the mesh builder creates 3 streams (position, normal, texcoord), I'm planning to add support for extra streams as follows:
+
+enum StreamType { StreamType_Float, StreamType_Vector2, StreamType_Vector3, StreamType_Vector4 };
+
+uint addStream(const char *, uint idx, StreamType);
+
+uint addAttribute(float)
+uint addAttribute(Vector2)
+uint addAttribute(Vector3)
+uint addAttribute(Vector4)
+
+struct Vertex
+{
+    uint pos;
+    uint nor;
+    uint tex;
+    uint * attribs;	// NULL or NIL terminated array?
+};
+
+All streams must be added before hand, so that you know the size of the attribs array.
+
+The vertex hash function could be kept as is, but the == operator should be extended to test 
+the extra atributes when available.
+
+That might require a custom hash implementation, or an extension of the current one. How to
+handle the variable number of attributes in the attribs array?
+
+bool operator()(const Vertex & a, const Vertex & b) const
+{ 
+    if (a.pos != b.pos || a.nor != b.nor || a.tex != b.tex) return false;
+    if (a.attribs == NULL && b.attribs == NULL) return true;
+    return 0 == memcmp(a.attribs, b.attribs, ???);
+}
+
+We could use a NIL terminated array, or provide custom user data to the equals functor.
+
+vertexMap.setUserData((void *)vertexAttribCount);
+
+bool operator()(const Vertex & a, const Vertex & b, void * userData) const { ... }
+
+*/
+
+
+
+namespace 
+{
+    struct Material
+    {
+        Material() : faceCount(0) {}
+        Material(const String & str) : name(str), faceCount(0) {}
+
+        String name;
+        uint faceCount;
+    };
+
+    struct Vertex
+    {
+        //Vertex() {}
+        //Vertex(uint p, uint n, uint t0, uint t1, uint c) : pos(p), nor(n), tex0(t0), tex1(t1), col(c) {}
+
+        friend bool operator==(const Vertex & a, const Vertex & b)
+        {
+            return a.pos == b.pos && a.nor == b.nor && a.tex[0] == b.tex[0] && a.tex[1] == b.tex[1] && a.col[0] == b.col[0] && a.col[1] == b.col[1] && a.col[2] == b.col[2];
+        }
+
+        uint pos;
+        uint nor;
+        uint tex[2];
+        uint col[3];
+    };
+
+    struct Face
+    {
+        uint id;
+        uint firstIndex;
+        uint indexCount;
+        uint material;
+        uint group;
+    };
+
+} // namespace
+
+
+namespace nv
+{
+    // This is a much better hash than the default and greatly improves performance!
+    template <> struct Hash<Vertex>
+    {
+        uint operator()(const Vertex & v) const { return v.pos + v.nor + v.tex[0]/* + v.col*/; }
+    };
+}
+
+struct MeshBuilder::PrivateData
+{
+    PrivateData() : currentGroup(NIL), currentMaterial(NIL), maxFaceIndexCount(0) {}
+
+    uint pushVertex(uint p, uint n, uint t0, uint t1, uint c0, uint c1, uint c2);
+    uint pushVertex(const Vertex & v);
+
+    Array<Vector3> posArray;
+    Array<Vector3> norArray;
+    Array<Vector2> texArray[2];
+    Array<Vector4> colArray[3];
+
+    Array<Vertex> vertexArray;
+    HashMap<Vertex, uint> vertexMap;
+
+    HashMap<String, uint> materialMap;
+    Array<Material> materialArray;
+
+    uint currentGroup;
+    uint currentMaterial;
+
+    Array<uint> indexArray;
+    Array<Face> faceArray;
+
+    uint maxFaceIndexCount;
+};
+
+
+uint MeshBuilder::PrivateData::pushVertex(uint p, uint n, uint t0, uint t1, uint c0, uint c1, uint c2)
+{
+    Vertex v;
+    v.pos = p;
+    v.nor = n;
+    v.tex[0] = t0;
+    v.tex[1] = t1;
+    v.col[0] = c0;
+    v.col[1] = c1;
+    v.col[2] = c2;
+    return pushVertex(v);
+}
+
+uint MeshBuilder::PrivateData::pushVertex(const Vertex & v)
+{
+    // Lookup vertex v in map.
+    uint idx;
+    if (vertexMap.get(v, &idx))
+    {
+        return idx;
+    }
+
+    idx = vertexArray.count();
+    vertexArray.pushBack(v);
+    vertexMap.add(v, idx);
+
+    return idx;
+}
+
+
+MeshBuilder::MeshBuilder() : d(new PrivateData())
+{
+}
+
+MeshBuilder::~MeshBuilder()
+{
+    nvDebugCheck(d != NULL);
+    delete d;
+}
+
+
+// Builder methods.
+uint MeshBuilder::addPosition(const Vector3 & v)
+{
+    d->posArray.pushBack(validate(v));
+    return d->posArray.count() - 1;
+}
+
+uint MeshBuilder::addNormal(const Vector3 & v)
+{
+    d->norArray.pushBack(validate(v));
+    return d->norArray.count() - 1;
+}
+
+uint MeshBuilder::addTexCoord(const Vector2 & v, uint set/*=0*/)
+{
+    d->texArray[set].pushBack(validate(v));
+    return d->texArray[set].count() - 1;
+}
+
+uint MeshBuilder::addColor(const Vector4 & v, uint set/*=0*/)
+{
+    d->colArray[set].pushBack(validate(v));
+    return d->colArray[set].count() - 1;
+}
+
+void MeshBuilder::beginGroup(uint id)
+{
+    d->currentGroup = id;
+}
+
+void MeshBuilder::endGroup()
+{
+    d->currentGroup = NIL;
+}
+
+// Add named material, check for uniquenes.
+uint MeshBuilder::addMaterial(const char * name)
+{
+    uint index;
+    if (d->materialMap.get(name, &index)) {
+        nvDebugCheck(d->materialArray[index].name == name);
+    }
+    else {
+        index = d->materialArray.count();
+        d->materialMap.add(name, index);
+        
+        Material material(name);
+        d->materialArray.append(material);
+    }
+    return index;
+}
+
+void MeshBuilder::beginMaterial(uint id)
+{
+    d->currentMaterial = id;
+}
+
+void MeshBuilder::endMaterial()
+{
+    d->currentMaterial = NIL;
+}
+
+void MeshBuilder::beginPolygon(uint id/*=0*/)
+{
+    Face face;
+    face.id = id;
+    face.firstIndex = d->indexArray.count();
+    face.indexCount = 0;
+    face.material = d->currentMaterial;
+    face.group = d->currentGroup;
+
+    d->faceArray.pushBack(face);
+}
+
+uint MeshBuilder::addVertex(uint p, uint n/*= NIL*/, uint t0/*= NIL*/, uint t1/*= NIL*/, uint c0/*= NIL*/, uint c1/*= NIL*/, uint c2/*= NIL*/)
+{
+    // @@ In theory there's no need to add vertices before faces, but I'm adding this to debug problems in our maya exporter:
+    nvDebugCheck(p < d->posArray.count());
+    nvDebugCheck(n == NIL || n < d->norArray.count());
+    nvDebugCheck(t0 == NIL || t0 < d->texArray[0].count());
+    nvDebugCheck(t1 == NIL || t1 < d->texArray[1].count());
+    //nvDebugCheck(c0 == NIL || c0 < d->colArray[0].count());
+    if (c0 > d->colArray[0].count()) c0 = NIL;    // @@ This seems to be happening in loc_swamp_catwalk.mb! No idea why.
+    nvDebugCheck(c1 == NIL || c1 < d->colArray[1].count());
+    nvDebugCheck(c2 == NIL || c2 < d->colArray[2].count());
+
+    uint idx = d->pushVertex(p, n, t0, t1, c0, c1, c2);
+    d->indexArray.pushBack(idx);
+    d->faceArray.back().indexCount++;
+    return idx;
+}
+
+uint MeshBuilder::addVertex(const Vector3 & pos)
+{
+    uint p = addPosition(pos);
+    return addVertex(p);
+}
+
+#if 0
+uint MeshBuilder::addVertex(const Vector3 & pos, const Vector3 & nor, const Vector2 & tex0, const Vector2 & tex1, const Vector4 & col0, const Vector4 & col1)
+{
+    uint p = addPosition(pos);
+    uint n = addNormal(nor);
+    uint t0 = addTexCoord(tex0, 0);
+    uint t1 = addTexCoord(tex1, 1);
+    uint c0 = addColor(col0);
+    uint c1 = addColor(col1);
+    return addVertex(p, n, t0, t1, c0, c1);
+}
+#endif
+
+// Return true if the face is valid and was added to the mesh.
+bool MeshBuilder::endPolygon()
+{
+    const Face & face = d->faceArray.back();
+    const uint count = face.indexCount;
+
+    // Validate polygon here.
+    bool invalid = count <= 2;
+
+    if (!invalid) {
+        // Skip zero area polygons. Or polygons with degenerate edges (which will result in zero-area triangles).
+        const uint first = face.firstIndex;
+        for (uint j = count - 1, i = 0; i < count; j = i, i++) {
+            uint v0 = d->indexArray[first + i];
+            uint v1 = d->indexArray[first + j];
+
+            uint p0 = d->vertexArray[v0].pos;
+            uint p1 = d->vertexArray[v1].pos;
+
+            if (p0 == p1) {
+                invalid = true;
+                break;
+            }
+
+            if (equal(d->posArray[p0], d->posArray[p1], FLT_EPSILON)) {
+                invalid = true;
+                break;
+            }
+        }
+
+        uint v0 = d->indexArray[first];
+        uint p0 = d->vertexArray[v0].pos;
+        Vector3 x0 = d->posArray[p0];
+
+        float area = 0.0f;
+        for (uint j = 1, i = 2; i < count; j = i, i++) {
+            uint v1 = d->indexArray[first + i];
+            uint v2 = d->indexArray[first + j];
+
+            uint p1 = d->vertexArray[v1].pos;
+            uint p2 = d->vertexArray[v2].pos;
+
+            Vector3 x1 = d->posArray[p1];
+            Vector3 x2 = d->posArray[p2];
+
+            area += length(cross(x1-x0, x2-x0));
+        }
+
+        if (0.5 * area < 1e-6) {    // Reduce this threshold if artists have legitimate complains.
+            invalid = true;
+        }
+
+        // @@ This is not complete. We may still get zero area triangles after triangulation.
+        // However, our plugin triangulates before building the mesh, so hopefully that's not a problem.
+
+    }
+
+    if (invalid)
+    {
+        d->indexArray.resize(d->indexArray.size() - count);
+        d->faceArray.popBack();
+        return false;
+    }
+    else
+    {
+        if (d->currentMaterial != NIL) {
+            d->materialArray[d->currentMaterial].faceCount++;
+        }
+
+        d->maxFaceIndexCount = max(d->maxFaceIndexCount, count);
+        return true;
+    }
+}
+
+
+uint MeshBuilder::weldPositions()
+{
+    Array<uint> xrefs;
+    Weld<Vector3> weldVector3;
+
+    if (d->posArray.count()) {
+        // Weld vertex attributes.
+        weldVector3(d->posArray, xrefs);
+
+        // Remap vertex indices.
+        const uint vertexCount = d->vertexArray.count();
+        for (uint v = 0; v < vertexCount; v++)
+        {
+            Vertex & vertex = d->vertexArray[v];
+            if (vertex.pos != NIL) vertex.pos = xrefs[vertex.pos];
+        }
+    }
+
+    return d->posArray.count();
+}
+
+uint MeshBuilder::weldNormals()
+{
+    Array<uint> xrefs;
+    Weld<Vector3> weldVector3;
+
+    if (d->norArray.count()) {
+        // Weld vertex attributes.
+        weldVector3(d->norArray, xrefs);
+
+        // Remap vertex indices.
+        const uint vertexCount = d->vertexArray.count();
+        for (uint v = 0; v < vertexCount; v++)
+        {
+            Vertex & vertex = d->vertexArray[v];
+            if (vertex.nor != NIL) vertex.nor = xrefs[vertex.nor];
+        }
+    }
+
+    return d->norArray.count();
+}
+
+uint MeshBuilder::weldTexCoords(uint set/*=0*/)
+{
+    Array<uint> xrefs;
+    Weld<Vector2> weldVector2;
+
+    if (d->texArray[set].count()) {
+        // Weld vertex attributes.
+        weldVector2(d->texArray[set], xrefs);
+
+        // Remap vertex indices.
+        const uint vertexCount = d->vertexArray.count();
+        for (uint v = 0; v < vertexCount; v++)
+        {
+            Vertex & vertex = d->vertexArray[v];
+            if (vertex.tex[set] != NIL) vertex.tex[set] = xrefs[vertex.tex[set]];
+        }
+    }
+
+    return d->texArray[set].count();
+}
+
+uint  MeshBuilder::weldColors(uint set/*=0*/)
+{
+    Array<uint> xrefs;
+    Weld<Vector4> weldVector4;
+
+    if (d->colArray[set].count()) {
+        // Weld vertex attributes.
+        weldVector4(d->colArray[set], xrefs);
+
+        // Remap vertex indices.
+        const uint vertexCount = d->vertexArray.count();
+        for (uint v = 0; v < vertexCount; v++)
+        {
+            Vertex & vertex = d->vertexArray[v];
+            if (vertex.col[set] != NIL) vertex.col[set] = xrefs[vertex.col[set]];
+        }
+    }
+
+    return d->colArray[set].count();
+}
+
+void MeshBuilder::weldVertices() {
+
+    if (d->vertexArray.count() == 0) {
+        // Nothing to do.
+        return;
+    }
+
+    Array<uint> xrefs;
+    Weld<Vertex> weldVertex;
+
+    // Weld vertices.
+    weldVertex(d->vertexArray, xrefs);
+
+    // Remap face indices.
+    const uint indexCount = d->indexArray.count();
+    for (uint i = 0; i < indexCount; i++)
+    {
+        d->indexArray[i] = xrefs[d->indexArray[i]];
+    }
+
+    // Remap vertex map.
+    foreach(i, d->vertexMap)
+    {
+        d->vertexMap[i].value = xrefs[d->vertexMap[i].value];
+    }
+}
+
+
+void MeshBuilder::optimize()
+{
+    if (d->vertexArray.count() == 0)
+    {
+        return;
+    }
+
+    weldPositions();
+    weldNormals();
+    weldTexCoords(0);
+    weldTexCoords(1);
+    weldColors();
+
+    weldVertices();
+}
+
+
+
+
+
+
+void MeshBuilder::removeUnusedMaterials(Array<uint> & newMaterialId)
+{
+    uint materialCount = d->materialArray.count();
+
+    // Reset face counts.
+    for (uint i = 0; i < materialCount; i++) {
+        d->materialArray[i].faceCount = 0;
+    }
+
+    // Count faces.
+    foreach(i, d->faceArray) {
+        Face & face = d->faceArray[i];
+
+        if (face.material != NIL) {
+            nvDebugCheck(face.material < materialCount);
+
+            d->materialArray[face.material].faceCount++;
+        }
+    }
+
+    // Remove unused materials.
+    newMaterialId.resize(materialCount);
+
+    for (uint i = 0, m = 0; i < materialCount; i++)
+    {
+        if (d->materialArray[m].faceCount > 0)
+        {
+            newMaterialId[i] = m++;
+        }
+        else
+        {
+            newMaterialId[i] = NIL;
+            d->materialArray.removeAt(m);
+        }
+    }
+
+    materialCount = d->materialArray.count();
+
+    // Update face material ids.
+    foreach(i, d->faceArray) {
+        Face & face = d->faceArray[i];
+
+        if (face.material != NIL) {
+            uint id = newMaterialId[face.material];
+            nvDebugCheck(id != NIL && id < materialCount);
+
+            face.material = id;
+        }
+    }
+}
+
+void MeshBuilder::sortFacesByGroup()
+{
+    const uint faceCount = d->faceArray.count();
+
+    Array<uint> faceGroupArray;
+    faceGroupArray.resize(faceCount);
+    
+    for (uint i = 0; i < faceCount; i++) {
+        faceGroupArray[i] = d->faceArray[i].group;
+    }
+
+    RadixSort radix;
+    radix.sort(faceGroupArray);
+
+    Array<Face> newFaceArray;
+    newFaceArray.resize(faceCount);
+
+    for (uint i = 0; i < faceCount; i++) {
+        newFaceArray[i] = d->faceArray[radix.rank(i)];
+    }
+
+    swap(newFaceArray, d->faceArray);
+}
+
+void MeshBuilder::sortFacesByMaterial()
+{
+    const uint faceCount = d->faceArray.count();
+
+    Array<uint> faceMaterialArray;
+    faceMaterialArray.resize(faceCount);
+    
+    for (uint i = 0; i < faceCount; i++) {
+        faceMaterialArray[i] = d->faceArray[i].material;
+    }
+
+    RadixSort radix;
+    radix.sort(faceMaterialArray);
+
+    Array<Face> newFaceArray;
+    newFaceArray.resize(faceCount);
+
+    for (uint i = 0; i < faceCount; i++) {
+        newFaceArray[i] = d->faceArray[radix.rank(i)];
+    }
+
+    swap(newFaceArray, d->faceArray);
+}
+
+
+void MeshBuilder::reset()
+{
+    nvDebugCheck(d != NULL);
+    delete d;
+    d = new PrivateData();
+}
+
+void MeshBuilder::done()
+{
+    if (d->currentGroup != NIL) {
+        endGroup();
+    }
+
+    if (d->currentMaterial != NIL) {
+        endMaterial();
+    }
+}
+
+// Hints.
+void MeshBuilder::hintTriangleCount(uint count)
+{
+    d->indexArray.reserve(d->indexArray.count() + count * 4);
+}
+
+void MeshBuilder::hintVertexCount(uint count)
+{
+    d->vertexArray.reserve(d->vertexArray.count() + count);
+    d->vertexMap.resize(d->vertexMap.count() + count);
+}
+
+void MeshBuilder::hintPositionCount(uint count)
+{
+    d->posArray.reserve(d->posArray.count() + count);
+}
+
+void MeshBuilder::hintNormalCount(uint count)
+{
+    d->norArray.reserve(d->norArray.count() + count);
+}
+
+void MeshBuilder::hintTexCoordCount(uint count, uint set/*=0*/)
+{
+    d->texArray[set].reserve(d->texArray[set].count() + count);
+}
+
+void MeshBuilder::hintColorCount(uint count, uint set/*=0*/)
+{
+    d->colArray[set].reserve(d->colArray[set].count() + count);
+}
+
+
+// Helpers.
+void MeshBuilder::addTriangle(uint v0, uint v1, uint v2)
+{
+    beginPolygon();
+    addVertex(v0);
+    addVertex(v1);
+    addVertex(v2);
+    endPolygon();
+}
+
+void MeshBuilder::addQuad(uint v0, uint v1, uint v2, uint v3)
+{
+    beginPolygon();
+    addVertex(v0);
+    addVertex(v1);
+    addVertex(v2);
+    addVertex(v3);
+    endPolygon();
+}
+
+
+// Get tri mesh.
+TriMesh * MeshBuilder::buildTriMesh() const
+{
+    const uint faceCount = d->faceArray.count();
+    uint triangleCount = 0;
+    for (uint f = 0; f < faceCount; f++) {
+        triangleCount += d->faceArray[f].indexCount - 2;
+    }
+    
+    const uint vertexCount = d->vertexArray.count();
+    TriMesh * mesh = new TriMesh(triangleCount, vertexCount);
+
+    // Build faces.
+    Array<TriMesh::Face> & faces = mesh->faces();
+
+    for(uint f = 0; f < faceCount; f++)
+    {
+        int firstIndex = d->faceArray[f].firstIndex;
+        int indexCount = d->faceArray[f].indexCount;
+
+        int v0 = d->indexArray[firstIndex + 0];
+        int v1 = d->indexArray[firstIndex + 1];
+
+        for(int t = 0; t < indexCount - 2; t++) {
+            int v2 = d->indexArray[firstIndex + t + 2];
+
+            TriMesh::Face face;
+            face.id = faces.count();
+            face.v[0] = v0;
+            face.v[1] = v1;
+            face.v[2] = v2;
+            faces.append(face);
+
+            v1 = v2;
+        }
+    }
+
+    // Build vertices.
+    Array<BaseMesh::Vertex> & vertices = mesh->vertices();
+
+    for(uint i = 0; i < vertexCount; i++)
+    {
+        BaseMesh::Vertex vertex;
+        vertex.id = i;
+        if (d->vertexArray[i].pos != NIL) vertex.pos = d->posArray[d->vertexArray[i].pos];
+        if (d->vertexArray[i].nor != NIL) vertex.nor = d->norArray[d->vertexArray[i].nor];
+        if (d->vertexArray[i].tex[0] != NIL) vertex.tex = d->texArray[0][d->vertexArray[i].tex[0]];
+
+        vertices.append(vertex);
+    }
+
+    return mesh;
+}
+
+// Get quad/tri mesh.
+QuadTriMesh * MeshBuilder::buildQuadTriMesh() const
+{
+    const uint faceCount = d->faceArray.count();
+    const uint vertexCount = d->vertexArray.count();
+    QuadTriMesh * mesh = new QuadTriMesh(faceCount, vertexCount);
+
+    // Build faces.
+    Array<QuadTriMesh::Face> & faces = mesh->faces();
+
+    for (uint f = 0; f < faceCount; f++) 
+    {
+        int firstIndex = d->faceArray[f].firstIndex;
+        int indexCount = d->faceArray[f].indexCount;
+
+        QuadTriMesh::Face face;
+        face.id = f;
+
+        face.v[0] = d->indexArray[firstIndex + 0];
+        face.v[1] = d->indexArray[firstIndex + 1];
+        face.v[2] = d->indexArray[firstIndex + 2];
+
+        // Only adds triangles and quads. Ignores polygons.
+        if (indexCount == 3) {
+            face.v[3] = NIL;
+            faces.append(face);
+        }
+        else if (indexCount == 4) {
+            face.v[3] = d->indexArray[firstIndex + 3];
+            faces.append(face);
+        }
+    }
+
+    // Build vertices.
+    Array<BaseMesh::Vertex> & vertices = mesh->vertices();
+
+    for(uint i = 0; i < vertexCount; i++)
+    {
+        BaseMesh::Vertex vertex;
+        vertex.id = i;
+        if (d->vertexArray[i].pos != NIL) vertex.pos = d->posArray[d->vertexArray[i].pos];
+        if (d->vertexArray[i].nor != NIL) vertex.nor = d->norArray[d->vertexArray[i].nor];
+        if (d->vertexArray[i].tex[0] != NIL) vertex.tex = d->texArray[0][d->vertexArray[i].tex[0]];
+
+        vertices.append(vertex);
+    }
+
+    return mesh;
+}
+
+// Get half edge mesh.
+HalfEdge::Mesh * MeshBuilder::buildHalfEdgeMesh(bool weldPositions, Error * error/*=NULL*/, Array<uint> * badFaces/*=NULL*/) const
+{
+    if (error != NULL) *error = Error_None;
+
+    const uint vertexCount = d->vertexArray.count();
+    AutoPtr<HalfEdge::Mesh> mesh(new HalfEdge::Mesh());
+
+    for(uint v = 0; v < vertexCount; v++)
+    {
+        HalfEdge::Vertex * vertex = mesh->addVertex(d->posArray[d->vertexArray[v].pos]);
+        if (d->vertexArray[v].nor != NIL) vertex->nor = d->norArray[d->vertexArray[v].nor];
+        if (d->vertexArray[v].tex[0] != NIL) vertex->tex = Vector2(d->texArray[0][d->vertexArray[v].tex[0]]);
+        if (d->vertexArray[v].col[0] != NIL) vertex->col = d->colArray[0][d->vertexArray[v].col[0]];
+    }
+
+    if (weldPositions) {
+        mesh->linkColocals();
+    }
+    else {
+        // Build canonical map from position indices.
+        Array<uint> canonicalMap(vertexCount);
+        
+        foreach (i, d->vertexArray) {
+            canonicalMap.append(d->vertexArray[i].pos);
+        }
+
+        mesh->linkColocalsWithCanonicalMap(canonicalMap);
+    }
+
+    const uint faceCount = d->faceArray.count();
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const uint firstIndex = d->faceArray[f].firstIndex;
+        const uint indexCount = d->faceArray[f].indexCount;
+
+        HalfEdge::Face * face = mesh->addFace(d->indexArray, firstIndex, indexCount);
+        
+        // @@ This is too late, removing the face here will leave the mesh improperly connected.
+        /*if (face->area() <= FLT_EPSILON) {
+            mesh->remove(face);
+            face = NULL;
+        }*/
+
+        if (face == NULL) {
+            // Non manifold mesh.
+            if (error != NULL) *error = Error_NonManifoldEdge;
+            if (badFaces != NULL) {
+                badFaces->append(d->faceArray[f].id);
+            }
+            //return NULL; // IC: Ignore error and continue building the mesh.
+        }
+
+        if (face != NULL) {
+            face->group = d->faceArray[f].group;
+            face->material = d->faceArray[f].material;
+        }
+    }
+
+    mesh->linkBoundary();
+
+    // We cannot fix functions here, because this would introduce new vertices and these vertices won't have the corresponding builder data.
+
+    // Maybe the builder should perform the search for T-junctions and update the vertex data directly.
+
+    // For now, we don't fix T-junctions at export time, but only during parameterization.
+
+    //mesh->fixBoundaryJunctions();
+
+    //mesh->sewBoundary();
+
+    return mesh.release();
+}
+
+
+bool MeshBuilder::buildPositions(Array<Vector3> & positionArray)
+{
+    const uint vertexCount = d->vertexArray.count();
+    positionArray.resize(vertexCount);
+
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        nvDebugCheck(d->vertexArray[v].pos != NIL);
+        positionArray[v] = d->posArray[d->vertexArray[v].pos];
+    }
+
+    return true;
+}
+
+bool MeshBuilder::buildNormals(Array<Vector3> & normalArray)
+{
+    bool anyNormal = false;
+
+    const uint vertexCount = d->vertexArray.count();
+    normalArray.resize(vertexCount);
+
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        if (d->vertexArray[v].nor == NIL) {
+            normalArray[v] = Vector3(0, 0, 1);
+        }
+        else {
+            anyNormal = true;
+            normalArray[v] = d->norArray[d->vertexArray[v].nor];
+        }
+    }
+
+    return anyNormal;
+}
+
+bool MeshBuilder::buildTexCoords(Array<Vector2> & texCoordArray, uint set/*=0*/)
+{
+    bool anyTexCoord = false;
+
+    const uint vertexCount = d->vertexArray.count();
+    texCoordArray.resize(vertexCount);
+
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        if (d->vertexArray[v].tex[set] == NIL) {
+            texCoordArray[v] = Vector2(0, 0);
+        }
+        else {
+            anyTexCoord = true;
+            texCoordArray[v] = d->texArray[set][d->vertexArray[v].tex[set]];
+        }
+    }
+
+    return anyTexCoord;
+}
+
+bool MeshBuilder::buildColors(Array<Vector4> & colorArray, uint set/*=0*/)
+{
+    bool anyColor = false;
+
+    const uint vertexCount = d->vertexArray.count();
+    colorArray.resize(vertexCount);
+
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        if (d->vertexArray[v].col[set] == NIL) {
+            colorArray[v] = Vector4(0, 0, 0, 1);
+        }
+        else {
+            anyColor = true;
+            colorArray[v] = d->colArray[set][d->vertexArray[v].col[set]];
+        }
+    }
+
+    return anyColor;
+}
+
+void MeshBuilder::buildVertexToPositionMap(Array<int> &map)
+{
+	const uint vertexCount = d->vertexArray.count();
+	map.resize(vertexCount);
+
+	foreach (i, d->vertexArray) {
+		map[i] = d->vertexArray[i].pos;
+	}
+}
+
+
+
+uint MeshBuilder::vertexCount() const
+{
+    return d->vertexArray.count();
+}
+
+
+uint MeshBuilder::positionCount() const
+{
+    return d->posArray.count();
+}
+
+uint MeshBuilder::normalCount() const
+{
+    return d->norArray.count();
+}
+
+uint MeshBuilder::texCoordCount(uint set/*=0*/) const
+{
+    return d->texArray[set].count();
+}
+
+uint MeshBuilder::colorCount(uint set/*=0*/) const
+{
+    return d->colArray[set].count();
+}
+
+
+uint MeshBuilder::materialCount() const
+{
+    return d->materialArray.count();
+}
+
+const char * MeshBuilder::material(uint i) const
+{
+    return d->materialArray[i].name;
+}
+
+
+uint MeshBuilder::positionIndex(uint vertex) const
+{
+    return d->vertexArray[vertex].pos;
+}
+uint MeshBuilder::normalIndex(uint vertex) const
+{
+    return d->vertexArray[vertex].nor;
+}
+uint MeshBuilder::texCoordIndex(uint vertex, uint set/*=0*/) const
+{
+    return d->vertexArray[vertex].tex[set];
+}
+uint MeshBuilder::colorIndex(uint vertex, uint set/*=0*/) const
+{
+    return d->vertexArray[vertex].col[set];
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/MeshBuilder.h b/thirdparty/thekla_atlas/nvmesh/MeshBuilder.h
new file mode 100644
index 0000000000..5b3af3fc1d
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/MeshBuilder.h
@@ -0,0 +1,119 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MESH_MESHBUILDER_H
+#define NV_MESH_MESHBUILDER_H
+
+#include "nvmesh.h"
+#include "nvcore/Array.h"
+#include "nvmath/Vector.h"
+
+namespace nv
+{
+    class String;
+    class TriMesh;
+    class QuadTriMesh;
+    namespace HalfEdge { class Mesh; }
+
+
+    /// Mesh builder is a helper class for importers.
+    /// Ideally it should handle any vertex data, but for now it only accepts positions, 
+    /// normals and texcoords.
+    class MeshBuilder
+    {
+        NV_FORBID_COPY(MeshBuilder);
+        NV_FORBID_HEAPALLOC();
+    public:
+        MeshBuilder();
+        ~MeshBuilder();
+
+        // Builder methods.
+        uint addPosition(const Vector3 & v);
+        uint addNormal(const Vector3 & v);
+        uint addTexCoord(const Vector2 & v, uint set = 0);
+        uint addColor(const Vector4 & v, uint set = 0);
+
+        void beginGroup(uint id);
+        void endGroup();
+
+        uint addMaterial(const char * name);
+        void beginMaterial(uint id);
+        void endMaterial();
+
+        void beginPolygon(uint id = 0);
+        uint addVertex(uint p, uint n = NIL, uint t0 = NIL, uint t1 = NIL, uint c0 = NIL, uint c1 = NIL, uint c2 = NIL);
+        uint addVertex(const Vector3 & p);
+        //uint addVertex(const Vector3 & p, const Vector3 & n, const Vector2 & t0 = Vector2(0), const Vector2 & t1 = Vector2(0), const Vector4 & c0 = Vector4(0), const Vector4 & c1 = Vector4(0));
+        bool endPolygon();
+
+        uint weldPositions();
+        uint weldNormals();
+        uint weldTexCoords(uint set = 0);
+        uint weldColors(uint set = 0);
+        void weldVertices();
+
+        void optimize(); // eliminate duplicate components and duplicate vertices.
+        void removeUnusedMaterials(Array<uint> & newMaterialId);
+        void sortFacesByGroup();
+        void sortFacesByMaterial();
+
+        void done();
+        void reset();
+
+        // Hints.
+        void hintTriangleCount(uint count);
+        void hintVertexCount(uint count);
+        void hintPositionCount(uint count);
+        void hintNormalCount(uint count);
+        void hintTexCoordCount(uint count, uint set = 0);
+        void hintColorCount(uint count, uint set = 0);
+
+        // Helpers.
+        void addTriangle(uint v0, uint v1, uint v2);
+        void addQuad(uint v0, uint v1, uint v2, uint v3);
+
+        // Get result.
+        TriMesh * buildTriMesh() const;
+        QuadTriMesh * buildQuadTriMesh() const;
+
+        enum Error {
+            Error_None,
+            Error_NonManifoldEdge,
+            Error_NonManifoldVertex,
+        };
+
+        HalfEdge::Mesh * buildHalfEdgeMesh(bool weldPositions, Error * error = NULL, Array<uint> * badFaces = NULL) const;
+
+        bool buildPositions(Array<Vector3> & positionArray);
+        bool buildNormals(Array<Vector3> & normalArray);
+        bool buildTexCoords(Array<Vector2> & texCoordArray, uint set = 0);
+        bool buildColors(Array<Vector4> & colorArray, uint set = 0);
+		void buildVertexToPositionMap(Array<int> & map);
+
+
+        // Expose attribute indices of the unified vertex array.
+        uint vertexCount() const;
+        
+        uint positionCount() const;
+        uint normalCount() const;
+        uint texCoordCount(uint set = 0) const;
+        uint colorCount(uint set = 0) const;
+
+        uint materialCount() const;
+        const char * material(uint i) const;
+
+        uint positionIndex(uint vertex) const;
+        uint normalIndex(uint vertex) const;
+        uint texCoordIndex(uint vertex, uint set = 0) const;
+        uint colorIndex(uint vertex, uint set = 0) const;
+
+    private:
+
+        struct PrivateData;
+        PrivateData * d;
+
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_MESHBUILDER_H
diff --git a/thirdparty/thekla_atlas/nvmesh/MeshTopology.cpp b/thirdparty/thekla_atlas/nvmesh/MeshTopology.cpp
new file mode 100644
index 0000000000..e7e1dce421
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/MeshTopology.cpp
@@ -0,0 +1,122 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "nvmesh.h" // pch
+
+#include "nvcore/Array.h"
+#include "nvcore/BitArray.h"
+
+#include "nvmesh/MeshTopology.h"
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Edge.h"
+#include "nvmesh/halfedge/Face.h"
+
+using namespace nv;
+
+void MeshTopology::buildTopologyInfo(const HalfEdge::Mesh * mesh)
+{
+    const uint vertexCount = mesh->colocalVertexCount();
+    const uint faceCount = mesh->faceCount();
+    const uint edgeCount = mesh->edgeCount();
+
+    nvDebug( "--- Building mesh topology:\n" );
+
+    Array<uint> stack(faceCount);
+
+    BitArray bitFlags(faceCount);
+    bitFlags.clearAll();
+
+    // Compute connectivity.
+    nvDebug( "---   Computing connectivity.\n" );
+
+    m_connectedCount = 0;
+
+    for(uint f = 0; f < faceCount; f++ ) {
+        if( bitFlags.bitAt(f) == false ) {
+            m_connectedCount++;
+
+            stack.pushBack( f );
+            while( !stack.isEmpty() ) {
+
+                const uint top = stack.back();
+                nvCheck(top != NIL);
+                stack.popBack();
+
+                if( bitFlags.bitAt(top) == false ) {
+                    bitFlags.setBitAt(top);
+
+                    const HalfEdge::Face * face = mesh->faceAt(top);
+                    const HalfEdge::Edge * firstEdge = face->edge;
+                    const HalfEdge::Edge * edge = firstEdge;
+
+                    do {
+                        const HalfEdge::Face * neighborFace = edge->pair->face;
+                        if (neighborFace != NULL) {
+                            stack.pushBack(neighborFace->id);
+                        }
+                        edge = edge->next;
+                    } while(edge != firstEdge);
+                }
+            }
+        }
+    }
+    nvCheck(stack.isEmpty());
+    nvDebug( "---   %d connected components.\n", m_connectedCount );
+
+
+    // Count boundary loops.
+    nvDebug( "---   Counting boundary loops.\n" );
+    m_boundaryCount = 0;
+
+    bitFlags.resize(edgeCount);
+    bitFlags.clearAll();
+
+    // Don't forget to link the boundary otherwise this won't work.
+    for (uint e = 0; e < edgeCount; e++)
+    {
+        const HalfEdge::Edge * startEdge = mesh->edgeAt(e);
+        if (startEdge != NULL && startEdge->isBoundary() && bitFlags.bitAt(e) == false)
+        {
+            nvDebugCheck(startEdge->face != NULL);
+            nvDebugCheck(startEdge->pair->face == NULL);
+
+            startEdge = startEdge->pair;
+
+            m_boundaryCount++;
+
+            const HalfEdge::Edge * edge = startEdge;
+            do {
+                bitFlags.setBitAt(edge->id / 2);
+                edge = edge->next;
+            } while(startEdge != edge);
+        }
+    }
+    nvDebug("---   %d boundary loops found.\n", m_boundaryCount );
+
+
+    // Compute euler number.
+    m_eulerNumber = vertexCount - edgeCount + faceCount;
+    nvDebug("---   Euler number: %d.\n", m_eulerNumber);
+
+
+    // Compute genus. (only valid on closed connected surfaces)
+    m_genus = -1;
+    if( isClosed() && isConnected() ) {
+        m_genus = (2 - m_eulerNumber) / 2;
+        nvDebug("---   Genus: %d.\n", m_genus);
+    }
+}
+
+
+/*static*/ bool MeshTopology::isQuadOnly(const HalfEdge::Mesh * mesh)
+{
+    const uint faceCount = mesh->faceCount();
+    for(uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = mesh->faceAt(f);
+        if (face->edgeCount() != 4) {
+            return false;
+        }
+    }
+
+    return true;
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/MeshTopology.h b/thirdparty/thekla_atlas/nvmesh/MeshTopology.h
new file mode 100644
index 0000000000..c3d7477b15
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/MeshTopology.h
@@ -0,0 +1,66 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MESH_MESHTOPOLOGY_H
+#define NV_MESH_MESHTOPOLOGY_H
+
+#include <nvmesh/nvmesh.h>
+
+namespace nv
+{
+    namespace HalfEdge { class Mesh; }
+    class MeshAdjacency;
+
+    /// Mesh topology information.
+    class MeshTopology
+    {
+    public:
+        MeshTopology(const HalfEdge::Mesh * mesh) { buildTopologyInfo(mesh); }
+
+        /// Determine if the mesh is connected.
+        bool isConnected() const { return m_connectedCount == 1; }
+
+        /// Determine if the mesh is closed. (Each edge is shared by two faces)
+        bool isClosed() const { return m_boundaryCount == 0; }
+
+        /// Return true if the mesh has the topology of a disk.
+        bool isDisk() const { return isConnected() && m_boundaryCount == 1/* && m_eulerNumber == 1*/; }
+
+        /// Return the number of connected components.
+        int connectedCount() const { return m_connectedCount; }
+
+        /// Return the number of open holes.
+        int holeCount() const { return m_boundaryCount; }
+
+        /// Return the genus of the mesh.
+        int genus() const { return m_genus; }
+
+        /// Return the euler number of the mesh.
+        int euler() const { return m_eulerNumber; }
+
+
+        static bool isQuadOnly(const HalfEdge::Mesh * mesh);
+
+
+    private:
+
+        NVMESH_API void buildTopologyInfo(const HalfEdge::Mesh * mesh);
+
+    private:
+
+        ///< Number of boundary loops.
+        int m_boundaryCount;		
+
+        ///< Number of connected components.
+        int m_connectedCount;		
+
+        ///< Euler number.
+        int m_eulerNumber;
+
+        /// Mesh genus.
+        int m_genus;
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_MESHTOPOLOGY_H
diff --git a/thirdparty/thekla_atlas/nvmesh/QuadTriMesh.cpp b/thirdparty/thekla_atlas/nvmesh/QuadTriMesh.cpp
new file mode 100644
index 0000000000..64a071abe9
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/QuadTriMesh.cpp
@@ -0,0 +1,36 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "QuadTriMesh.h"
+#include "Stream.h"
+
+using namespace nv;
+
+
+bool QuadTriMesh::isQuadFace(uint i) const 
+{ 
+    return m_faceArray[i].isQuadFace();
+}
+
+const QuadTriMesh::Vertex & QuadTriMesh::faceVertex(uint f, uint v) const 
+{
+    if (isQuadFace(f)) nvDebugCheck(v < 4);
+    else nvDebugCheck(v < 3);
+
+    const Face & face = this->faceAt(f);
+    return this->vertexAt(face.v[v]);
+}
+
+
+namespace nv
+{
+    static Stream & operator<< (Stream & s, QuadTriMesh::Face & face)
+    {
+        return s << face.id << face.v[0] << face.v[1] << face.v[2] << face.v[3];
+    }
+
+    Stream & operator<< (Stream & s, QuadTriMesh & mesh)
+    {
+        return s << mesh.m_faceArray << (BaseMesh &) mesh;
+    }
+}
+
diff --git a/thirdparty/thekla_atlas/nvmesh/QuadTriMesh.h b/thirdparty/thekla_atlas/nvmesh/QuadTriMesh.h
new file mode 100644
index 0000000000..b8465f2db0
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/QuadTriMesh.h
@@ -0,0 +1,60 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MESH_QUADTRIMESH_H
+#define NV_MESH_QUADTRIMESH_H
+
+#include "nvcore/Array.h"
+#include "nvmath/Vector.h"
+#include "nvmesh/nvmesh.h"
+#include "nvmesh/BaseMesh.h"
+
+namespace nv
+{
+    class Stream;
+
+    /// Mixed quad/triangle mesh.
+    class QuadTriMesh : public BaseMesh
+    {
+    public:
+        struct Face;
+        typedef BaseMesh::Vertex Vertex;
+
+        QuadTriMesh() {};
+        QuadTriMesh(uint faceCount, uint vertexCount) : BaseMesh(vertexCount), m_faceArray(faceCount) {}
+
+        // Face methods.
+        uint faceCount() const { return m_faceArray.count(); }
+
+        const Face & faceAt(uint i) const { return m_faceArray[i]; }
+        Face & faceAt(uint i) { return m_faceArray[i]; }
+
+        const Array<Face> & faces() const { return m_faceArray; }
+        Array<Face> & faces() { return m_faceArray; }
+
+        bool isQuadFace(uint i) const;
+
+        const Vertex & faceVertex(uint f, uint v) const;
+
+        friend Stream & operator<< (Stream & s, QuadTriMesh & obj);
+
+    private:
+
+        Array<Face> m_faceArray;
+
+    };
+
+
+    /// QuadTriMesh face.
+    struct QuadTriMesh::Face
+    {
+        uint id;
+        uint v[4];
+
+        bool isQuadFace() const { return v[3] != NIL; }
+    };
+
+} // nv namespace
+
+
+#endif // NV_MESH_QUADTRIMESH_H
diff --git a/thirdparty/thekla_atlas/nvmesh/TriMesh.cpp b/thirdparty/thekla_atlas/nvmesh/TriMesh.cpp
new file mode 100644
index 0000000000..bf10a474fb
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/TriMesh.cpp
@@ -0,0 +1,25 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#include "TriMesh.h"
+
+using namespace nv;
+
+
+/// Triangle mesh.
+Vector3 TriMesh::faceNormal(uint f) const
+{
+    const Face & face = this->faceAt(f);
+    const Vector3 & p0 = this->vertexAt(face.v[0]).pos;
+    const Vector3 & p1 = this->vertexAt(face.v[1]).pos;
+    const Vector3 & p2 = this->vertexAt(face.v[2]).pos;
+    return normalizeSafe(cross(p1 - p0, p2 - p0), Vector3(0.0f), 0.0f);
+}
+
+/// Get face vertex.
+const TriMesh::Vertex & TriMesh::faceVertex(uint f, uint v) const
+{
+    nvDebugCheck(v < 3);
+    const Face & face = this->faceAt(f);
+    return this->vertexAt(face.v[v]);
+}
+
diff --git a/thirdparty/thekla_atlas/nvmesh/TriMesh.h b/thirdparty/thekla_atlas/nvmesh/TriMesh.h
new file mode 100644
index 0000000000..bc5672c1ac
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/TriMesh.h
@@ -0,0 +1,51 @@
+// This code is in the public domain -- Ignacio Casta�o <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MESH_TRIMESH_H
+#define NV_MESH_TRIMESH_H
+
+#include "nvcore/Array.h"
+#include "nvmath/Vector.inl"
+#include "nvmesh/nvmesh.h"
+#include "nvmesh/BaseMesh.h"
+
+namespace nv
+{
+    /// Triangle mesh.
+    class TriMesh : public BaseMesh
+    {
+    public:
+        struct Face;
+        typedef BaseMesh::Vertex Vertex;
+
+        TriMesh(uint faceCount, uint vertexCount) : BaseMesh(vertexCount), m_faceArray(faceCount) {}
+
+        // Face methods.
+        uint faceCount() const { return m_faceArray.count(); }
+        const Face & faceAt(uint i) const { return m_faceArray[i]; }
+        Face & faceAt(uint i) { return m_faceArray[i]; }
+        const Array<Face> & faces() const { return m_faceArray; }
+        Array<Face> & faces() { return m_faceArray; }
+
+        NVMESH_API Vector3 faceNormal(uint f) const;
+        NVMESH_API const Vertex & faceVertex(uint f, uint v) const;
+
+        friend Stream & operator<< (Stream & s, BaseMesh & obj);
+
+    private:
+
+        Array<Face> m_faceArray;
+
+    };
+
+
+    /// TriMesh face.
+    struct TriMesh::Face
+    {
+        uint id;
+        uint v[3];
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_TRIMESH_H
diff --git a/thirdparty/thekla_atlas/nvmesh/geometry/Bounds.cpp b/thirdparty/thekla_atlas/nvmesh/geometry/Bounds.cpp
new file mode 100644
index 0000000000..69fd1deb24
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/geometry/Bounds.cpp
@@ -0,0 +1,54 @@
+// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+
+#include "nvmesh.h" // pch
+
+#include "Bounds.h"
+
+#include "nvmesh/BaseMesh.h"
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Vertex.h"
+
+#include "nvmath/Box.inl"
+
+using namespace nv;
+
+Box MeshBounds::box(const BaseMesh * mesh)
+{
+    nvCheck(mesh != NULL);
+
+    Box bounds;
+    bounds.clearBounds();
+
+    const uint vertexCount = mesh->vertexCount();
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        const BaseMesh::Vertex & vertex = mesh->vertexAt(v);
+        bounds.addPointToBounds( vertex.pos );
+    }
+
+    return bounds;
+}
+
+Box MeshBounds::box(const HalfEdge::Mesh * mesh)
+{
+    nvCheck(mesh != NULL);
+
+    Box bounds;
+    bounds.clearBounds();
+
+    const uint vertexCount = mesh->vertexCount();
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        const HalfEdge::Vertex * vertex = mesh->vertexAt(v);
+        nvDebugCheck(vertex != NULL);
+        bounds.addPointToBounds( vertex->pos );
+    }
+
+    return bounds;
+}
+
+/*Sphere MeshBounds::sphere(const HalfEdge::Mesh * mesh)
+{
+    // @@ TODO
+    return Sphere();
+}*/
diff --git a/thirdparty/thekla_atlas/nvmesh/geometry/Bounds.h b/thirdparty/thekla_atlas/nvmesh/geometry/Bounds.h
new file mode 100644
index 0000000000..1cb5b7b905
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/geometry/Bounds.h
@@ -0,0 +1,28 @@
+// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
+
+#pragma once
+#ifndef NV_MESH_MESHBOUNDS_H
+#define NV_MESH_MESHBOUNDS_H
+
+#include <nvmath/Sphere.h>
+#include <nvmath/Box.h>
+
+#include <nvmesh/nvmesh.h>
+
+namespace nv
+{
+    class BaseMesh;
+    namespace HalfEdge { class Mesh; }
+
+    // Bounding volumes computation.
+    namespace MeshBounds
+    {
+        Box box(const BaseMesh * mesh);
+        Box box(const HalfEdge::Mesh * mesh);
+
+        Sphere sphere(const HalfEdge::Mesh * mesh);
+    }
+
+} // nv namespace
+
+#endif // NV_MESH_MESHBOUNDS_H
diff --git a/thirdparty/thekla_atlas/nvmesh/geometry/Measurements.cpp b/thirdparty/thekla_atlas/nvmesh/geometry/Measurements.cpp
new file mode 100644
index 0000000000..e0c271663b
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/geometry/Measurements.cpp
@@ -0,0 +1,36 @@
+// This code is in the public domain -- castano@gmail.com
+
+#include "nvmesh.h" // pch
+
+#include "Measurements.h"
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Face.h"
+
+using namespace nv;
+
+float nv::computeSurfaceArea(const HalfEdge::Mesh * mesh)
+{
+    float area = 0;
+
+    for (HalfEdge::Mesh::ConstFaceIterator it(mesh->faces()); !it.isDone(); it.advance())
+    {
+        const HalfEdge::Face * face = it.current();
+        area += face->area();
+    }
+    nvDebugCheck(area >= 0);
+
+    return area;
+}
+
+float nv::computeParametricArea(const HalfEdge::Mesh * mesh)
+{
+    float area = 0;
+
+    for (HalfEdge::Mesh::ConstFaceIterator it(mesh->faces()); !it.isDone(); it.advance())
+    {
+        const HalfEdge::Face * face = it.current();
+        area += face->parametricArea();
+    }
+
+    return area;
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/geometry/Measurements.h b/thirdparty/thekla_atlas/nvmesh/geometry/Measurements.h
new file mode 100644
index 0000000000..0be863b79e
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/geometry/Measurements.h
@@ -0,0 +1,18 @@
+// This code is in the public domain -- castano@gmail.com
+
+#pragma once
+#ifndef NV_MESH_MESHMEASUREMENTS_H
+#define NV_MESH_MESHMEASUREMENTS_H
+
+#include "nvmesh/nvmesh.h"
+
+namespace nv
+{
+    namespace HalfEdge { class Mesh; }
+
+	float computeSurfaceArea(const HalfEdge::Mesh * mesh);
+	float computeParametricArea(const HalfEdge::Mesh * mesh);
+
+} // nv namespace
+
+#endif // NV_MESH_MESHMEASUREMENTS_H
diff --git a/thirdparty/thekla_atlas/nvmesh/halfedge/Edge.cpp b/thirdparty/thekla_atlas/nvmesh/halfedge/Edge.cpp
new file mode 100644
index 0000000000..671650296c
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/halfedge/Edge.cpp
@@ -0,0 +1,57 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "nvmesh.h" // pch
+
+#include "Edge.h"
+#include "Vertex.h"
+
+#include "nvmath/Vector.inl"
+
+using namespace nv;
+using namespace HalfEdge;
+
+Vector3 Edge::midPoint() const
+{
+    return (to()->pos + from()->pos) * 0.5f;
+}
+
+float Edge::length() const
+{
+    return ::length(to()->pos - from()->pos); 
+}
+
+// Return angle between this edge and the previous one.
+float Edge::angle() const {
+    Vector3 p = vertex->pos;
+    Vector3 a = prev->vertex->pos;
+    Vector3 b = next->vertex->pos;
+
+    Vector3 v0 = a - p;
+    Vector3 v1 = b - p;
+
+    return acosf(dot(v0, v1) / (nv::length(v0) * nv::length(v1)));
+}
+
+bool Edge::isValid() const
+{
+    // null face is OK.
+    if (next == NULL || prev == NULL || pair == NULL || vertex == NULL) return false;
+    if (next->prev != this) return false;
+    if (prev->next != this) return false;
+    if (pair->pair != this) return false;
+    return true;
+}
+
+/*
+Edge * Edge::nextBoundary() {
+    nvDebugCheck(this->m_pair == NULL);
+
+}
+
+Edge * Edge::prevBoundary() {
+    nvDebugCheck(this->m_pair == NULL);
+
+}
+*/
+
+
diff --git a/thirdparty/thekla_atlas/nvmesh/halfedge/Edge.h b/thirdparty/thekla_atlas/nvmesh/halfedge/Edge.h
new file mode 100644
index 0000000000..25c47f4860
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/halfedge/Edge.h
@@ -0,0 +1,70 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MESH_HALFEDGE_EDGE_H
+#define NV_MESH_HALFEDGE_EDGE_H
+
+#include "nvmath/Vector.h"
+
+namespace nv
+{
+    namespace HalfEdge { class Vertex; class Face; class Edge; }
+
+    /// Half edge edge. 
+    class HalfEdge::Edge
+    {
+        NV_FORBID_COPY(Edge);
+    public:
+
+        uint id;
+
+        Edge * next;
+        Edge * prev;	// This is not strictly half-edge, but makes algorithms easier and faster.
+        Edge * pair;
+        Vertex * vertex;
+        Face * face;
+
+
+        // Default constructor.
+        Edge(uint id) : id(id), next(NULL), prev(NULL), pair(NULL), vertex(NULL), face(NULL)
+        {
+        }
+
+
+        // Vertex queries.
+        const Vertex * from() const { return vertex; }
+        Vertex * from() { return vertex; }
+
+        const Vertex * to() const { return pair->vertex; }  // This used to be 'next->vertex', but that changed often when the connectivity of the mesh changes.
+        Vertex * to() { return pair->vertex; }
+
+
+        // Edge queries.
+        void setNext(Edge * e) { next = e; if (e != NULL) e->prev = this; }
+        void setPrev(Edge * e) { prev = e; if (e != NULL) e->next = this; }
+
+        // @@ Add these helpers:
+        //Edge * nextBoundary();
+        //Edge * prevBoundary();
+
+
+        // @@ It would be more simple to only check m_pair == NULL
+        // Face queries.
+        bool isBoundary() const { return !(face && pair->face); }
+
+        // @@ This is not exactly accurate, we should compare the texture coordinates...
+        bool isSeam() const { return vertex != pair->next->vertex || next->vertex != pair->vertex; }
+
+        bool isValid() const;
+
+        // Geometric queries.
+        Vector3 midPoint() const;
+        float length() const;
+        float angle() const;
+
+    };
+
+} // nv namespace
+
+
+#endif // NV_MESH_HALFEDGE_EDGE_H
diff --git a/thirdparty/thekla_atlas/nvmesh/halfedge/Face.cpp b/thirdparty/thekla_atlas/nvmesh/halfedge/Face.cpp
new file mode 100644
index 0000000000..9f6987154e
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/halfedge/Face.cpp
@@ -0,0 +1,268 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "nvmesh.h" // pch
+
+#include "Face.h"
+#include "Vertex.h"
+
+#include "nvmath/Fitting.h"
+#include "nvmath/Plane.h"
+#include "nvmath/Vector.inl"
+
+#include "nvcore/Array.h"
+
+
+using namespace nv;
+using namespace HalfEdge;
+
+/// Get face area.
+float Face::area() const
+{
+    float area = 0;
+    const Vector3 & v0 = edge->from()->pos;
+
+    for (ConstEdgeIterator it(edges(edge->next)); it.current() != edge->prev; it.advance())
+    {
+        const Edge * e = it.current();
+
+        const Vector3 & v1 = e->vertex->pos;
+        const Vector3 & v2 = e->next->vertex->pos; 
+
+        area += length(cross(v1-v0, v2-v0));
+    }
+
+    return area * 0.5f;
+}
+
+float Face::parametricArea() const
+{
+    float area = 0;
+    const Vector2 & v0 = edge->from()->tex;
+
+    for (ConstEdgeIterator it(edges(edge->next)); it.current() != edge->prev; it.advance())
+    {
+        const Edge * e = it.current();
+
+        const Vector2 & v1 = e->vertex->tex;
+        const Vector2 & v2 = e->next->vertex->tex;
+
+        area += triangleArea(v0, v1, v2);
+    }
+
+    return area * 0.5f;
+}
+
+
+/// Get boundary length.
+float Face::boundaryLength() const
+{
+    float bl = 0;
+
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance())
+    {
+        const Edge * edge = it.current();
+        bl += edge->length();
+    }
+
+    return bl;
+}
+
+
+/// Get face normal.
+Vector3 Face::normal() const
+{
+    Vector3 n(0);
+
+    const Vertex * vertex0 = NULL;
+
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance())
+    {
+        const Edge * edge = it.current();
+        nvCheck(edge != NULL);
+
+        if (vertex0 == NULL)
+        {
+            vertex0 = edge->vertex;
+        }
+        else if (edge->next->vertex != vertex0)
+        {
+            const HalfEdge::Vertex * vertex1 = edge->from();
+            const HalfEdge::Vertex * vertex2 = edge->to();
+
+            const Vector3 & p0 = vertex0->pos;
+            const Vector3 & p1 = vertex1->pos;
+            const Vector3 & p2 = vertex2->pos;
+
+            Vector3 v10 = p1 - p0;
+            Vector3 v20 = p2 - p0;
+
+            n += cross(v10, v20);
+        }
+    }
+
+    return normalizeSafe(n, Vector3(0, 0, 1), 0.0f);
+
+
+    // Get face points eliminating duplicates.
+    /*Array<Vector3> points(4);
+
+    points.append(m_edge->prev()->from()->pos);
+
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance())
+    {
+        const Edge * edge = it.current();
+        nvDebugCheck(edge != NULL);
+
+        const Vector3 & p = edge->from()->pos;
+        if (points.back() != p)
+        {
+            points.append(edge->from()->pos);
+        }
+    }
+
+    points.popBack();
+
+    if (points.count() < 3)
+    {
+        // Invalid normal.
+        return Vector3(0.0f);
+    }
+    else
+    {
+        // Compute regular normal.
+        Vector3 normal = normalizeSafe(cross(points[1] - points[0], points[2] - points[0]), Vector3(0.0f), 0.0f);
+
+#pragma NV_MESSAGE("TODO: make sure these three points are not colinear")
+
+        if (points.count() > 3)
+        {
+            // Compute best fitting plane to the points.
+            Plane plane = Fit::bestPlane(points.count(), points.buffer());
+
+            // Adjust normal orientation.
+            if (dot(normal, plane.vector()) > 0) {
+                normal = plane.vector();
+            }
+            else {
+                normal = -plane.vector();
+            }
+        }
+
+        nvDebugCheck(isNormalized(normal));
+        return normal;
+    }*/
+}
+
+Vector3 Face::centroid() const
+{
+    Vector3 sum(0.0f);
+    uint count = 0;
+
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance())
+    {
+        const Edge * edge = it.current();
+        sum += edge->from()->pos;
+        count++;
+    }
+
+    return sum / float(count);
+}
+
+
+bool Face::isValid() const
+{
+    uint count = 0;
+
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance())
+    {
+        const Edge * edge = it.current();
+        if (edge->face != this) return false;
+        if (!edge->isValid()) return false;
+        if (!edge->pair->isValid()) return false;
+        count++;
+    }
+
+    if (count < 3) return false;
+
+    return true;
+}
+
+
+// Determine if this face contains the given edge.
+bool Face::contains(const Edge * e) const
+{
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance())
+    {
+        if(it.current() == e) return true;
+    }
+    return false;
+}
+
+// Returns index in this face of the given edge.
+uint Face::edgeIndex(const Edge * e) const
+{
+    int i = 0;
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance(), i++)
+    {
+        if(it.current() == e) return i;
+    }
+    return NIL;
+}
+
+
+Edge * Face::edgeAt(uint idx)
+{ 
+    int i = 0;
+    for(EdgeIterator it(edges()); !it.isDone(); it.advance(), i++) {
+        if (i == idx) return it.current();
+    }
+    return NULL;
+}
+const Edge * Face::edgeAt(uint idx) const 
+{
+    int i = 0;
+    for(ConstEdgeIterator it(edges()); !it.isDone(); it.advance(), i++) {
+        if (i == idx) return it.current();
+    }
+    return NULL;
+}
+
+
+// Count the number of edges in this face.
+uint Face::edgeCount() const
+{
+    uint count = 0;
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance()) { ++count; }
+    return count;
+}
+
+// Determine if this is a boundary face.
+bool Face::isBoundary() const
+{
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance())
+    {
+        const Edge * edge = it.current();
+        nvDebugCheck(edge->pair != NULL);
+
+        if (edge->pair->face == NULL) {
+            return true;
+        }
+    }
+    return false;
+}
+
+// Count the number of boundary edges in the face.
+uint Face::boundaryCount() const
+{
+    uint count = 0;
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance())
+    {
+        const Edge * edge = it.current();
+        nvDebugCheck(edge->pair != NULL);
+
+        if (edge->pair->face == NULL) {
+            count++;
+        }
+    }
+    return count;
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/halfedge/Face.h b/thirdparty/thekla_atlas/nvmesh/halfedge/Face.h
new file mode 100644
index 0000000000..677f8666f0
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/halfedge/Face.h
@@ -0,0 +1,106 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MESH_HALFEDGE_FACE_H
+#define NV_MESH_HALFEDGE_FACE_H
+
+#include <nvmesh/halfedge/Edge.h>
+
+namespace nv
+{
+    namespace HalfEdge { class Vertex; class Face; class Edge; }
+
+    /// Face of a half-edge mesh.
+    class HalfEdge::Face
+    {
+        NV_FORBID_COPY(Face);
+    public:
+
+        uint id;
+        uint16 group;
+        uint16 material;
+        Edge * edge;
+
+
+        Face(uint id) : id(id), group(~0), material(~0), edge(NULL) {}
+
+        float area() const;
+        float parametricArea() const;
+        float boundaryLength() const;
+        Vector3 normal() const;
+        Vector3 centroid() const;
+
+        bool isValid() const;
+
+        bool contains(const Edge * e) const;
+        uint edgeIndex(const Edge * e) const;
+        
+        Edge * edgeAt(uint idx);
+        const Edge * edgeAt(uint idx) const;
+
+        uint edgeCount() const;
+        bool isBoundary() const;
+        uint boundaryCount() const;
+
+
+        // The iterator that visits the edges of this face in clockwise order.
+        class EdgeIterator //: public Iterator<Edge *>
+        {
+        public:
+            EdgeIterator(Edge * e) : m_end(NULL), m_current(e) { }
+
+            virtual void advance()
+            {
+                if (m_end == NULL) m_end = m_current;
+                m_current = m_current->next;
+            }
+
+            virtual bool isDone() const { return m_end == m_current; }
+            virtual Edge * current() const { return m_current; }
+            Vertex * vertex() const { return m_current->vertex; }
+
+        private:
+            Edge * m_end;
+            Edge * m_current;
+        };
+
+        EdgeIterator edges() { return EdgeIterator(edge); }
+        EdgeIterator edges(Edge * e)
+        { 
+            nvDebugCheck(contains(e));
+            return EdgeIterator(e); 
+        }
+
+        // The iterator that visits the edges of this face in clockwise order.
+        class ConstEdgeIterator //: public Iterator<const Edge *>
+        {
+        public:
+            ConstEdgeIterator(const Edge * e) : m_end(NULL), m_current(e) { }
+            ConstEdgeIterator(const EdgeIterator & it) : m_end(NULL), m_current(it.current()) { }
+
+            virtual void advance()
+            {
+                if (m_end == NULL) m_end = m_current;
+                m_current = m_current->next;
+            }
+
+            virtual bool isDone() const { return m_end == m_current; }
+            virtual const Edge * current() const { return m_current; }
+            const Vertex * vertex() const { return m_current->vertex; }
+
+        private:
+            const Edge * m_end;
+            const Edge * m_current;
+        };
+
+        ConstEdgeIterator edges() const { return ConstEdgeIterator(edge); }
+        ConstEdgeIterator edges(const Edge * e) const
+        { 
+            nvDebugCheck(contains(e));
+            return ConstEdgeIterator(e); 
+        }
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_HALFEDGE_FACE_H
diff --git a/thirdparty/thekla_atlas/nvmesh/halfedge/Mesh.cpp b/thirdparty/thekla_atlas/nvmesh/halfedge/Mesh.cpp
new file mode 100644
index 0000000000..0012513bce
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/halfedge/Mesh.cpp
@@ -0,0 +1,1284 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "nvmesh.h" // pch
+
+#include "Mesh.h"
+#include "Edge.h"
+#include "Vertex.h"
+#include "Face.h"
+
+#include "nvmesh/TriMesh.h"
+#include "nvmesh/QuadTriMesh.h"
+#include "nvmesh/MeshBuilder.h"
+
+#include "nvmath/Vector.inl"
+#include "nvcore/Array.inl"
+#include "nvcore/HashMap.inl"
+
+
+using namespace nv;
+using namespace HalfEdge;
+
+Mesh::Mesh() : m_colocalVertexCount(0)
+{
+    errorCount = 0;
+}
+
+Mesh::Mesh(const Mesh * mesh)
+{
+    errorCount = 0;
+
+    // Copy mesh vertices.
+    const uint vertexCount = mesh->vertexCount();
+    m_vertexArray.resize(vertexCount);
+
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        const Vertex * vertex = mesh->vertexAt(v);
+        nvDebugCheck(vertex->id == v);
+
+        m_vertexArray[v] = new Vertex(v);
+        m_vertexArray[v]->pos = vertex->pos;
+        m_vertexArray[v]->nor = vertex->nor;
+        m_vertexArray[v]->tex = vertex->tex;
+    }
+
+    m_colocalVertexCount = vertexCount;
+
+
+    // Copy mesh faces.
+    const uint faceCount = mesh->faceCount();
+
+    Array<uint> indexArray(3);
+
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const Face * face = mesh->faceAt(f);
+
+        for(Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance()) {
+            const Vertex * vertex = it.current()->from();
+            indexArray.append(vertex->id);
+        }
+
+        addFace(indexArray);
+        indexArray.clear();
+    }
+}
+
+Mesh::~Mesh()
+{
+    clear();
+}
+
+
+void Mesh::clear()
+{
+    deleteAll(m_vertexArray); 
+    m_vertexArray.clear();
+
+    foreach(i, m_edgeMap)
+    {
+        delete m_edgeMap[i].value;
+    }
+    //deleteAll(m_edgeArray);	// edgeArray only contains 1/2 of the edges!
+    m_edgeArray.clear();
+    m_edgeMap.clear();
+
+    deleteAll(m_faceArray);
+    m_faceArray.clear();
+}
+
+
+Vertex * Mesh::addVertex(const Vector3 & pos)
+{
+    nvDebugCheck(isFinite(pos));
+
+    Vertex * v = new Vertex(m_vertexArray.count());
+    v->pos = pos;
+    m_vertexArray.append(v);
+
+    return v;
+
+//    return addVertex(m_vertexArray.count(), pos);
+}
+
+/*Vertex * Mesh::addVertex(uint id, const Vector3 & pos)
+{
+    nvDebugCheck(isFinite(pos));
+
+    Vertex * v = new Vertex(id);
+    v->pos = pos;
+    m_vertexArray.append(v);
+
+    return v;
+}*/
+
+/*void Mesh::addVertices(const Mesh * mesh)
+{
+nvCheck(mesh != NULL);
+
+// Add mesh vertices
+for (uint v = 0; v < vertexCount; v++)
+{
+const Vertex * vertex = mesh->vertexAt(v);
+nvDebugCheck(vertex != NULL);
+
+Vertex * v = addVertex(vertex->pos());
+nvDebugCheck(v != NULL);
+
+v->setNor(vertex->nor());
+v->setTex(vertex->tex());
+}
+}*/
+
+
+/// Link colocal vertices based on geometric location only.
+void Mesh::linkColocals()
+{
+    nvDebug("--- Linking colocals:\n");
+
+    const uint vertexCount = this->vertexCount();
+    HashMap<Vector3, Vertex *> vertexMap(vertexCount);
+
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        Vertex * vertex = vertexAt(v);
+
+        Vertex * colocal;
+        if (vertexMap.get(vertex->pos, &colocal))
+        {
+            colocal->linkColocal(vertex);
+        }
+        else
+        {
+            vertexMap.add(vertex->pos, vertex);
+        }
+    }
+
+    m_colocalVertexCount = vertexMap.count();
+
+    nvDebug("---   %d vertex positions.\n", m_colocalVertexCount);
+
+    // @@ Remove duplicated vertices? or just leave them as colocals?
+}
+
+void Mesh::linkColocalsWithCanonicalMap(const Array<uint> & canonicalMap)
+{
+    nvDebug("--- Linking colocals:\n");
+
+    uint vertexMapSize = 0;
+    foreach(i, canonicalMap) {
+        vertexMapSize = max(vertexMapSize, canonicalMap[i] + 1);
+    }
+    
+    Array<Vertex *> vertexMap;
+    vertexMap.resize(vertexMapSize, NULL);
+
+    m_colocalVertexCount = 0;
+
+    const uint vertexCount = this->vertexCount();
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        Vertex * vertex = vertexAt(v);
+
+        Vertex * colocal = vertexMap[canonicalMap[v]];
+        if (colocal != NULL)
+        {
+            nvDebugCheck(vertex->pos == colocal->pos);
+            colocal->linkColocal(vertex);
+        }
+        else
+        {
+            vertexMap[canonicalMap[v]] = vertex;
+            m_colocalVertexCount++;
+        }
+    }
+
+    nvDebug("---   %d vertex positions.\n", m_colocalVertexCount);
+}
+
+
+Face * Mesh::addFace()
+{
+    Face * f = new Face(m_faceArray.count());
+    m_faceArray.append(f);
+    return f;
+}
+
+Face * Mesh::addFace(uint v0, uint v1, uint v2)
+{
+    Array<uint> indexArray(3);
+    indexArray << v0 << v1 << v2;
+    return addFace(indexArray, 0, 3);
+}
+
+Face * Mesh::addFace(uint v0, uint v1, uint v2, uint v3)
+{
+    Array<uint> indexArray(4);
+    indexArray << v0 << v1 << v2 << v3;
+    return addFace(indexArray, 0, 4);
+}
+
+Face * Mesh::addFace(const Array<uint> & indexArray)
+{
+    return addFace(indexArray, 0, indexArray.count());
+}
+
+
+Face * Mesh::addFace(const Array<uint> & indexArray, uint first, uint num)
+{
+    nvDebugCheck(first < indexArray.count());
+    nvDebugCheck(num <= indexArray.count()-first);
+    nvDebugCheck(num > 2);
+
+    if (!canAddFace(indexArray, first, num)) {
+        errorCount++;
+        return NULL;
+    }
+
+    Face * f = new Face(m_faceArray.count());
+
+    Edge * firstEdge = NULL;
+    Edge * last = NULL;
+    Edge * current = NULL;
+
+    for(uint i = 0; i < num-1; i++)
+    {
+        current = addEdge(indexArray[first+i], indexArray[first+i+1]);
+        nvCheck(current != NULL && current->face == NULL);
+
+        current->face = f;
+
+        if (last != NULL) last->setNext(current);
+        else firstEdge = current;
+
+        last = current;
+    }
+
+    current = addEdge(indexArray[first+num-1], indexArray[first]);
+    nvCheck(current != NULL && current->face == NULL);
+
+    current->face = f;
+
+    last->setNext(current);
+    current->setNext(firstEdge);
+
+    f->edge = firstEdge;
+    m_faceArray.append(f);
+
+    return f;
+}
+
+/*void Mesh::addFaces(const Mesh * mesh)
+{
+nvCheck(mesh != NULL);
+
+Array indexArray;
+// Add faces
+
+}*/
+
+
+// Return true if the face can be added to the manifold mesh.
+bool Mesh::canAddFace(const Array<uint> & indexArray, uint first, uint num) const
+{
+    for (uint j = num - 1, i = 0; i < num; j = i++) {
+        if (!canAddEdge(indexArray[first+j], indexArray[first+i])) {
+            errorIndex0 = indexArray[first+j];
+            errorIndex1 = indexArray[first+i];
+            return false;
+        }
+    }
+
+    // We also have to make sure the face does not have any duplicate edge!
+    for (uint i = 0; i < num; i++) {
+
+        int i0 = indexArray[first + i + 0];
+        int i1 = indexArray[first + (i + 1)%num];
+
+        for (uint j = i + 1; j < num; j++) {
+            int j0 = indexArray[first + j + 0];
+            int j1 = indexArray[first + (j + 1)%num];
+
+            if (i0 == j0 && i1 == j1) {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+// Return true if the edge doesn't exist or doesn't have any adjacent face. 
+bool Mesh::canAddEdge(uint i, uint j) const
+{
+    if (i == j) {
+        // Skip degenerate edges.
+        return false;
+    }
+
+    // Same check, but taking into account colocal vertices.
+    const Vertex * v0 = vertexAt(i);
+    const Vertex * v1 = vertexAt(j);
+
+    for(Vertex::ConstVertexIterator it(v0->colocals()); !it.isDone(); it.advance())
+    {
+        if (it.current() == v1)
+        {
+            // Skip degenerate edges.
+            return false;
+        }
+    }
+
+    // Make sure edge has not been added yet.
+    Edge * edge = findEdge(i, j);
+
+    return edge == NULL || edge->face == NULL; // We ignore edges that don't have an adjacent face yet, since this face could become the edge's face.
+}
+
+Edge * Mesh::addEdge(uint i, uint j)
+{
+    nvCheck(i != j);
+
+    Edge * edge = findEdge(i, j);
+
+    if (edge != NULL) {
+        // Edge may already exist, but its face must not be set.
+        nvDebugCheck(edge->face == NULL);
+
+        // Nothing else to do!
+
+    }
+    else {
+        // Add new edge.
+
+        // Lookup pair.
+        Edge * pair = findEdge(j, i);
+
+        if (pair != NULL)
+        {
+            // Create edge with same id.
+            edge = new Edge(pair->id + 1);
+
+            // Link edge pairs.
+            edge->pair = pair;
+            pair->pair = edge;
+
+            // @@ I'm not sure this is necessary!
+            pair->vertex->setEdge(pair);
+        }
+        else
+        {
+            // Create edge.
+            edge = new Edge(2*m_edgeArray.count());
+
+            // Add only unpaired edges.
+            m_edgeArray.append(edge);
+        }
+
+        edge->vertex = m_vertexArray[i];
+        m_edgeMap.add(Key(i,j), edge);
+    }
+
+    // Face and Next are set by addFace.
+
+    return edge;
+}
+
+
+/// Find edge, test all colocals.
+Edge * Mesh::findEdge(uint i, uint j) const
+{
+    Edge * edge = NULL;
+
+    const Vertex * v0 = vertexAt(i);
+    const Vertex * v1 = vertexAt(j);
+
+    // Test all colocal pairs.
+    for(Vertex::ConstVertexIterator it0(v0->colocals()); !it0.isDone(); it0.advance())
+    {
+        for(Vertex::ConstVertexIterator it1(v1->colocals()); !it1.isDone(); it1.advance())
+        {
+            Key key(it0.current()->id, it1.current()->id);
+
+            if (edge == NULL) {
+                m_edgeMap.get(key, &edge);
+#if !defined(_DEBUG)
+                if (edge != NULL) return edge;
+#endif
+            }
+            else {
+                // Make sure that only one edge is found.
+                nvDebugCheck(!m_edgeMap.get(key));
+            }
+        }
+    }
+
+    return edge;
+}
+
+/// Link boundary edges once the mesh has been created.
+void Mesh::linkBoundary()
+{
+    nvDebug("--- Linking boundaries:\n");
+
+    int num = 0;
+
+    // Create boundary edges.
+    uint edgeCount = this->edgeCount();
+    for(uint e = 0; e < edgeCount; e++)
+    {
+        Edge * edge = edgeAt(e);
+        if (edge != NULL && edge->pair == NULL) {
+            Edge * pair = new Edge(edge->id + 1);
+
+            uint i = edge->from()->id;
+            uint j = edge->next->from()->id;
+
+            Key key(j,i);
+            nvCheck(!m_edgeMap.get(key));
+
+            pair->vertex = m_vertexArray[j];
+            m_edgeMap.add(key, pair);
+
+            edge->pair = pair;
+            pair->pair = edge;
+
+            num++;
+        }
+    }
+
+    // Link boundary edges.
+    for (uint e = 0; e < edgeCount; e++) {
+        Edge * edge = edgeAt(e);
+        if (edge != NULL && edge->pair->face == NULL) {
+            linkBoundaryEdge(edge->pair);
+        }
+    }
+
+    nvDebug("---   %d boundary edges.\n", num);
+}
+
+/// Link this boundary edge.
+void Mesh::linkBoundaryEdge(Edge * edge)
+{
+    nvCheck(edge->face == NULL);
+
+    // Make sure next pointer has not been set. @@ We want to be able to relink boundary edges after mesh changes.
+    //nvCheck(edge->next() == NULL);
+
+    Edge * next = edge;
+    while(next->pair->face != NULL) {
+        // Get pair prev
+        Edge * e = next->pair->next;
+        while (e->next != next->pair) {
+            e = e->next;
+        }
+        next = e;
+    }
+    edge->setNext(next->pair);
+
+    // Adjust vertex edge, so that it's the boundary edge. (required for isBoundary())
+    if (edge->vertex->edge != edge)
+    {
+        // Multiple boundaries in the same edge.
+        //nvCheck( edge->vertex()->edge() == NULL || edge->vertex()->edge()->face() != NULL );
+        edge->vertex->edge = edge;
+    }
+}
+
+
+/// Convert to tri mesh.
+TriMesh * Mesh::toTriMesh() const
+{
+    uint triangleCount = 0;
+
+    // Count triangle faces.
+    const uint faceCount = this->faceCount();
+    for(uint f = 0; f < faceCount; f++)
+    {
+        const Face * face = faceAt(f);
+        triangleCount += face->edgeCount() - 2;
+    }
+
+    TriMesh * triMesh = new TriMesh(triangleCount, vertexCount());
+
+    // Add vertices.
+    Array<TriMesh::Vertex> & vertices = triMesh->vertices();
+
+    const uint vertexCount = this->vertexCount();
+    for(uint v = 0; v < vertexCount; v++)
+    {
+        const Vertex * vertex = vertexAt(v);
+
+        TriMesh::Vertex triVertex;
+        triVertex.id = vertices.count();
+        triVertex.pos = vertex->pos;
+        triVertex.nor = vertex->nor;
+        triVertex.tex = vertex->tex;
+
+        vertices.append(triVertex);
+    }
+
+    // Add triangles.
+    Array<TriMesh::Face> & triangles = triMesh->faces();
+
+    for(uint f = 0; f < faceCount; f++)
+    {
+        const Face * face = faceAt(f);
+
+        // @@ Triangulate arbitrary polygons correctly.
+        const uint v0 = face->edge->vertex->id;
+        uint v1 = face->edge->next->vertex->id;
+
+        for(Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            uint v2 = it.current()->vertex->id;
+
+            // Skip the first two vertices.
+            if (v2 == v0 || v2 == v1) continue;
+
+            TriMesh::Face triangle;
+            triangle.id = triangles.count();
+            triangle.v[0] = v0;
+            triangle.v[1] = v1;
+            triangle.v[2] = v2;
+
+            v1 = v2;
+
+            triangles.append(triangle);
+        }
+    }
+
+    return triMesh;
+}
+
+QuadTriMesh * Mesh::toQuadTriMesh() const
+{
+    MeshBuilder builder;
+
+    const uint vertexCount = this->vertexCount();
+    builder.hintVertexCount(vertexCount);
+
+    for(uint v = 0; v < vertexCount; v++)
+    {
+        const Vertex * vertex = vertexAt(v);
+
+        builder.addPosition(vertex->pos);
+        builder.addNormal(vertex->nor);
+        builder.addTexCoord(vertex->tex);
+    }
+
+    const uint faceCount = this->faceCount();
+    builder.hintTriangleCount(faceCount);
+
+    for(uint f = 0; f < faceCount; f++)
+    {
+        const Face * face = faceAt(f);
+
+        builder.beginPolygon();
+
+        for(Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            uint v = it.current()->vertex->id;
+            builder.addVertex(v, v, v);
+        }
+
+        builder.endPolygon();
+    }
+
+    builder.done();
+
+    return builder.buildQuadTriMesh();
+}
+
+
+// Triangulate in place.
+void Mesh::triangulate() {
+
+    bool all_triangles = true;
+
+    const uint faceCount = m_faceArray.count();
+    for (uint f = 0; f < faceCount; f++) {
+        Face * face = m_faceArray[f];
+        if (face->edgeCount() != 3) {
+            all_triangles = false;
+            break;
+        }
+    }
+
+    if (all_triangles) {
+        return;
+    }
+
+
+    // Do not touch vertices, but rebuild edges and faces.
+    Array<Edge *> edgeArray;
+    Array<Face *> faceArray;
+
+    swap(edgeArray, m_edgeArray);
+    swap(faceArray, m_faceArray);
+    m_edgeMap.clear();
+
+    for (uint f = 0; f < faceCount; f++) {
+        Face * face = faceArray[f];
+
+        // Trivial fan-like triangulation.
+        const uint v0 = face->edge->vertex->id;
+        uint v2, v1 = -1;
+
+        for (Face::EdgeIterator it(face->edges()); !it.isDone(); it.advance()) {
+            Edge * edge = it.current();
+            v2 = edge->to()->id;
+            if (v2 == v0) break;
+            if (v1 != -1) addFace(v0, v1, v2);
+            v1 = v2;
+        }
+    }
+
+    nvDebugCheck(m_faceArray.count() > faceCount); // triangle count > face count
+
+    linkBoundary();
+
+    deleteAll(edgeArray);
+    deleteAll(faceArray);
+}
+
+
+/*
+Fixing T-junctions.
+
+- Find T-junctions. Find  vertices that are on an edge. 
+    - This test is approximate.
+    - Insert edges on a spatial index to speedup queries.
+    - Consider only open edges, that is edges that have no pairs.
+    - Consider only vertices on boundaries.
+- Close T-junction.
+    - Split edge.
+
+*/
+bool Mesh::splitBoundaryEdges() {
+    
+    Array<Vertex *> boundaryVertices;
+
+    foreach(i, m_vertexArray) {
+        Vertex * v = m_vertexArray[i];
+        if (v->isBoundary()) {
+            boundaryVertices.append(v);
+        }
+    }
+
+    nvDebug("Fixing T-junctions:\n");
+
+    int splitCount = 0;
+
+    foreach(v, boundaryVertices) {
+        Vertex * vertex = boundaryVertices[v];
+
+        Vector3 x0 = vertex->pos;
+
+        // Find edges that this vertex overlaps with.
+        foreach(e, m_edgeArray) {
+        //for (uint e = 0; e < m_edgeArray.count(); e++) {
+            Edge * edge = m_edgeArray[e];
+            if (edge != NULL && edge->isBoundary()) {
+
+                if (edge->from() == vertex || edge->to() == vertex) {
+                    continue;
+                }
+
+                Vector3 x1 = edge->from()->pos;
+                Vector3 x2 = edge->to()->pos;
+
+                Vector3 v01 = x0 - x1;
+                Vector3 v21 = x2 - x1;
+
+                float l = length(v21);
+                float d = length(cross(v01, v21)) / l;
+
+                if (isZero(d)) {
+                    float t = dot(v01, v21) / (l * l);
+
+                    // @@ Snap x0 to x1 or x2, if too close? No, do vertex snapping elsewhere.
+                    /*if (equal(t, 0.0f, 0.01f)) {
+                        //vertex->setPos(x1);
+                    }
+                    else if (equal(t, 1.0f, 0.01f)) {
+                        //vertex->setPos(x2);
+                    }
+                    else*/
+                    if (t > 0.0f + NV_EPSILON && t < 1.0f - NV_EPSILON) {
+                        nvDebugCheck(equal(lerp(x1, x2, t), x0));
+
+                        Vertex * splitVertex = splitBoundaryEdge(edge, t, x0);
+                        vertex->linkColocal(splitVertex);   // @@ Should we do this here?
+                        splitCount++;
+                    }
+                }
+            }
+        }
+    }
+
+    nvDebug(" - %d edges split.\n", splitCount);
+
+    nvDebugCheck(isValid());
+
+    return splitCount != 0;
+}
+
+
+// For this to be effective, we have to fix the boundary junctions first.
+Edge * Mesh::sewBoundary(Edge * startEdge) {
+    nvDebugCheck(startEdge->face == NULL);
+
+    // @@ We may want to be more conservative linking colocals in order to preserve the input topology. One way of doing that is by linking colocals only 
+    // if the vertices next to them are linked as well. That is, by sewing boundaries after detecting them. If any pair of consecutive edges have their first
+    // and last vertex in the same position, then it can be linked.
+
+    Edge * lastBoundarySeen = startEdge;
+
+    nvDebug("Sewing Boundary:\n");
+
+    int count = 0;
+    int sewnCount = 0;
+
+    Edge * edge = startEdge;
+    do {
+        nvDebugCheck(edge->face == NULL);
+
+        Edge * edge_a = edge;
+        Edge * edge_b = edge->prev;
+
+        Edge * pair_a = edge_a->pair;
+        Edge * pair_b = edge_b->pair;
+
+        Vertex * v0a = edge_a->to();
+        Vertex * v0b = edge_b->from();
+        Vertex * v1a = edge_a->from();
+        Vertex * v1b = edge_b->to();
+
+        nvDebugCheck(v1a->isColocal(v1b));
+
+        /*
+        v0b +      _+ v0a
+             \     /
+            b \   / a
+               \|/
+            v1b + v1a
+        */
+
+        // @@ This should not happen while sewing, but it may be produced somewhere else.
+        nvDebugCheck(edge_a != edge_b);
+
+        if (v0a->pos == v0b->pos) {
+
+            // Link vertices.
+            v0a->linkColocal(v0b);
+            
+            // Remove edges to be collapsed.
+            disconnect(edge_a);
+            disconnect(edge_b);
+            disconnect(pair_a);
+            disconnect(pair_b);
+
+            // Link new boundary edges.
+            Edge * prevBoundary = edge_b->prev;
+            Edge * nextBoundary = edge_a->next;
+            if (nextBoundary != NULL) {
+                nvDebugCheck(nextBoundary->face == NULL);
+                nvDebugCheck(prevBoundary->face == NULL);
+                nextBoundary->setPrev(prevBoundary);
+            
+                // Make sure boundary vertex points to boundary edge.
+                v0a->setEdge(nextBoundary); // This updates all colocals.
+            }
+            lastBoundarySeen = prevBoundary;
+
+            // Creat new edge.
+            Edge * newEdge_a = addEdge(v0a->id, v1a->id);   // pair_a->from()->id, pair_a->to()->id
+            Edge * newEdge_b = addEdge(v1b->id, v0b->id);
+
+            newEdge_a->pair = newEdge_b;
+            newEdge_b->pair = newEdge_a;
+
+            newEdge_a->face = pair_a->face;
+            newEdge_b->face = pair_b->face;
+
+            newEdge_a->setNext(pair_a->next);
+            newEdge_a->setPrev(pair_a->prev);
+
+            newEdge_b->setNext(pair_b->next);
+            newEdge_b->setPrev(pair_b->prev);
+
+            delete edge_a;
+            delete edge_b;
+            delete pair_a;
+            delete pair_b;
+
+            edge = nextBoundary;    // If nextBoundary is NULL we have closed the loop.
+            sewnCount++;
+        }
+        else {
+            edge = edge->next;
+        }
+        
+        count++;
+    } while(edge != NULL && edge != lastBoundarySeen);
+
+    nvDebug(" - Sewn %d out of %d.\n", sewnCount, count);
+
+    if (lastBoundarySeen != NULL) {
+        nvDebugCheck(lastBoundarySeen->face == NULL);
+    }
+
+    return lastBoundarySeen;
+}
+
+
+// @@ We must always disconnect edge pairs simultaneously.
+void Mesh::disconnect(Edge * edge) {
+    nvDebugCheck(edge != NULL);
+
+    // Remove from edge list.
+    if ((edge->id & 1) == 0) {
+        nvDebugCheck(m_edgeArray[edge->id / 2] == edge);
+        m_edgeArray[edge->id / 2] = NULL;
+    }
+
+    // Remove edge from map. @@ Store map key inside edge?
+    nvDebugCheck(edge->from() != NULL && edge->to() != NULL);
+    bool removed = m_edgeMap.remove(Key(edge->from()->id, edge->to()->id));
+    nvDebugCheck(removed == true);
+
+    // Disconnect from vertex.
+    if (edge->vertex != NULL) {
+        if (edge->vertex->edge == edge) {
+            if (edge->prev && edge->prev->pair) {
+                edge->vertex->edge = edge->prev->pair;
+            }
+            else if (edge->pair && edge->pair->next) {
+                edge->vertex->edge = edge->pair->next;
+            }
+            else {
+                edge->vertex->edge = NULL;
+                // @@ Remove disconnected vertex?
+            }
+        }
+        //edge->setVertex(NULL);
+    }
+
+    // Disconnect from face.
+    if (edge->face != NULL) {
+        if (edge->face->edge == edge) {
+            if (edge->next != NULL && edge->next != edge) {
+                edge->face->edge = edge->next;
+            }
+            else if (edge->prev != NULL && edge->prev != edge) {
+                edge->face->edge = edge->prev;
+            }
+            else {
+                edge->face->edge = NULL;
+                // @@ Remove disconnected face?
+            }
+        }
+        //edge->setFace(NULL);
+    }
+
+    // @@ Hack, we don't disconnect from pair, because pair needs us to remove itself from the map.
+    // Disconect from pair.
+    /*if (edge->pair != NULL) {
+        if (edge->pair->pair == edge) {
+            edge->pair->setPair(NULL);
+        }
+        //edge->setPair(NULL);
+    }*/
+
+    // Disconnect from previous.
+    if (edge->prev) {
+        if (edge->prev->next == edge) {
+            edge->prev->setNext(NULL);
+        }
+        //edge->setPrev(NULL);
+    }
+
+    // Disconnect from next.
+    if (edge->next) {
+        if (edge->next->prev == edge) {
+            edge->next->setPrev(NULL);
+        }
+        //edge->setNext(NULL);
+    }
+}
+
+
+void Mesh::remove(Edge * edge) {
+    nvDebugCheck(edge != NULL);
+
+    disconnect(edge);
+
+    delete edge;
+}
+
+void Mesh::remove(Vertex * vertex) {
+    nvDebugCheck(vertex != NULL);
+
+    // Remove from vertex list.
+    m_vertexArray[vertex->id] = NULL;
+
+    // Disconnect from colocals.
+    vertex->unlinkColocal();
+
+    // Disconnect from edges.
+    if (vertex->edge != NULL) {
+        // @@ Removing a connected vertex is asking for trouble...
+        if (vertex->edge->vertex == vertex) {
+            // @@ Connect edge to a colocal?
+            vertex->edge->vertex = NULL;
+        }
+
+        vertex->setEdge(NULL);
+    }
+
+    delete vertex;
+}
+
+void Mesh::remove(Face * face) {
+    nvDebugCheck(face != NULL);
+
+    // Remove from face list.
+    m_faceArray[face->id] = NULL;
+
+    // Disconnect from edges.
+    if (face->edge != NULL) {
+        nvDebugCheck(face->edge->face == face);
+
+        face->edge->face = NULL;
+
+        face->edge = NULL;
+    }
+
+    delete face;
+}
+
+
+void Mesh::compactEdges() {
+    const uint edgeCount = m_edgeArray.count();
+
+    uint c = 0;
+    for (uint i = 0; i < edgeCount; i++) {
+        if (m_edgeArray[i] != NULL) {
+            if (i != c) {
+                m_edgeArray[c] = m_edgeArray[i];
+                m_edgeArray[c]->id = 2 * c;
+                if (m_edgeArray[c]->pair != NULL) {
+                    m_edgeArray[c]->pair->id = 2 * c + 1;
+                }
+            }
+            c++;
+        }
+    }
+
+    m_edgeArray.resize(c);
+}
+
+
+void Mesh::compactVertices() {
+    const uint vertexCount = m_vertexArray.count();
+
+    uint c = 0;
+    for (uint i = 0; i < vertexCount; i++) {
+        if (m_vertexArray[i] != NULL) {
+            if (i != c) {
+                m_vertexArray[c] = m_vertexArray[i];
+                m_vertexArray[c]->id = c;
+            }
+            c++;
+        }
+    }
+
+    m_vertexArray.resize(c);
+
+    // @@ Generate xref array for external attributes.
+}
+
+
+void Mesh::compactFaces() {
+    const uint faceCount = m_faceArray.count();
+
+    uint c = 0;
+    for (uint i = 0; i < faceCount; i++) {
+        if (m_faceArray[i] != NULL) {
+            if (i != c) {
+                m_faceArray[c] = m_faceArray[i];
+                m_faceArray[c]->id = c;
+            }
+            c++;
+        }
+    }
+
+    m_faceArray.resize(c);
+}
+
+
+Vertex * Mesh::splitBoundaryEdge(Edge * edge, float t, const Vector3 & pos) {
+
+    /*
+      We want to go from this configuration:
+           
+            +   +
+            |   ^
+       edge |<->|  pair
+            v   |
+            +   +
+      
+      To this one:
+
+            +   +
+            |   ^
+         e0 |<->| p0
+            v   |
+     vertex +   + 
+            |   ^
+         e1 |<->| p1
+            v   |
+            +   +
+
+    */
+
+
+    Edge * pair = edge->pair;
+
+    // Make sure boundaries are linked.
+    nvDebugCheck(pair != NULL); 
+
+    // Make sure edge is a boundary edge.
+    nvDebugCheck(pair->face == NULL);
+
+    // Add new vertex.
+    Vertex * vertex = addVertex(pos);
+    vertex->nor = lerp(edge->from()->nor, edge->to()->nor, t);
+    vertex->tex = lerp(edge->from()->tex, edge->to()->tex, t);
+    vertex->col = lerp(edge->from()->col, edge->to()->col, t);
+
+    disconnect(edge);
+    disconnect(pair);
+
+    // Add edges.
+    Edge * e0 = addEdge(edge->from()->id, vertex->id);
+    Edge * p0 = addEdge(vertex->id, pair->to()->id);
+
+    Edge * e1 = addEdge(vertex->id, edge->to()->id);
+    Edge * p1 = addEdge(pair->from()->id, vertex->id);
+
+    // Link edges.
+    e0->setNext(e1);
+    p1->setNext(p0);
+
+    e0->setPrev(edge->prev);
+    e1->setNext(edge->next);
+
+    p1->setPrev(pair->prev);
+    p0->setNext(pair->next);
+
+    nvDebugCheck(e0->next == e1);
+    nvDebugCheck(e1->prev == e0);
+
+    nvDebugCheck(p1->next == p0);
+    nvDebugCheck(p0->prev == p1);
+
+    nvDebugCheck(p0->pair == e0);
+    nvDebugCheck(e0->pair == p0);
+
+    nvDebugCheck(p1->pair == e1);
+    nvDebugCheck(e1->pair == p1);
+
+    // Link faces.
+    e0->face = edge->face;
+    e1->face = edge->face;
+
+    // Link vertices.
+    edge->from()->setEdge(e0);
+    vertex->setEdge(e1);
+
+    delete edge;
+    delete pair;
+
+    return vertex;
+}
+
+#if 0
+// Without introducing new vertices.
+void Mesh::splitBoundaryEdge(Edge * edge, Vertex * vertex) {
+
+    /*
+      We want to go from this configuration:
+
+            |   | pn
+            +   +
+            |   ^
+            |   |
+       edge |<->| pair
+            |   |
+            v   |
+            +   +
+            |   | pp
+      
+      To this one:
+          \       /
+           \     /
+            +   +
+            |   ^
+         e0 |<->| p0
+            v   |
+     vertex +   + 
+            |   ^
+         e1 |<->| p1
+            v   |
+            +   +
+           /     \
+          /       \
+    */
+
+
+    Edge * pair = edge->pair;
+    Edge * pn = pair->next();
+    Edge * pp = pair->prev();
+
+    // Make sure boundaries are linked.
+    nvDebugCheck(pair != NULL);
+
+    // Make sure edge is a boundary edge.
+    nvDebugCheck(pair->face() == NULL);
+
+    nvDebugCheck(edge->isValid());
+    nvDebugCheck(pair->isValid());
+
+    disconnect(edge);
+    disconnect(pair);
+
+    // Add edges.
+    Edge * e0 = addEdge(edge->from()->id(), vertex->id());
+    Edge * e1 = addEdge(vertex->id(), edge->to()->id());
+
+    // Link faces.
+    e0->setFace(edge->face());
+    e1->setFace(edge->face());
+
+    // Link pairs.
+    Edge * p0 = findEdge(vertex->id(), pair->to()->id());
+    if (p0 == NULL) {
+        p0 = addEdge(vertex->id(), pair->to()->id());
+        pn->setPrev(p0);
+    }
+    else {
+        nvDebugCheck(p0->face() != NULL);
+        if (e0->prev() != NULL) {
+            pn->setPrev(e0->prev());
+        }
+        else {
+            nvDebugCheck(pn == e0);
+        }
+    }
+    
+    Edge * p1 = findEdge(pair->from()->id(), vertex->id());
+    if (p1 == NULL) {
+        p1 = addEdge(pair->from()->id(), vertex->id());
+        pp->setNext(p1);
+    }
+    else {
+        nvDebugCheck(p1->face() != NULL);
+        if (e1->next() != NULL) {
+            pp->setPrev(e1->next());
+        }
+        else {
+            nvDebugCheck(pp == e1);
+        }
+    }
+
+    // Link edges.
+    e0->setNext(e1); // e1->setPrev(e0)
+
+    if (p0->face() == p1->face()) { // can be null
+        p1->setNext(p0); // p0->setPrev(p1)
+    }
+    else {
+        //if (p1->face() == NULL) p1->setNext(
+    }
+    
+
+    e0->setPrev(edge->prev());
+    e1->setNext(edge->next());
+
+    nvDebugCheck(e0->pair == p0);
+    nvDebugCheck(e1->pair == p1);
+    nvDebugCheck(p0->pair == e0);
+    nvDebugCheck(p1->pair == e1);
+
+    nvDebugCheck(e0->isValid());
+    nvDebugCheck(e1->isValid());
+    nvDebugCheck(pp->isValid());
+    nvDebugCheck(pn->isValid());
+
+    nvDebugCheck(e0->pair->isValid());
+    nvDebugCheck(e1->pair->isValid());
+    nvDebugCheck(pp->pair->isValid());
+    nvDebugCheck(pn->pair->isValid());
+
+    nvDebugCheck(edge->face->isValid());
+
+    if (pn->pair->face != NULL) {
+        nvDebugCheck(pn->pair->face->isValid());
+    }
+
+    if (pp->pair->face() != NULL) {
+        nvDebugCheck(pn->pair->face->isValid());
+    }
+
+    if (p0->face != NULL) {
+        nvDebugCheck(p0->face->isValid());
+    }
+
+    if (p1->face() != NULL) {
+        nvDebugCheck(p1->face()->isValid());
+    }
+
+    nvDebugCheck(isValid()); // Only for extreme debugging.
+
+    // Link vertices.
+    edge->from()->setEdge(e0);
+    vertex->setEdge(p0);
+
+    delete edge;
+    delete pair;
+}
+#endif
+
+bool Mesh::isValid() const
+{
+    // Make sure all edges are valid.
+    const uint edgeCount = m_edgeArray.count();
+    for (uint e = 0; e < edgeCount; e++) {
+        Edge * edge = m_edgeArray[e];
+        if (edge != NULL) {
+            if (edge->id != 2*e) {
+                return false;
+            }
+            if (!edge->isValid()) {
+                return false;
+            }
+
+            if (edge->pair->id != 2*e+1) {
+                return false;
+            }
+            if (!edge->pair->isValid()) {
+                return false;
+            }
+        }
+    }
+
+    // @@ Make sure all faces are valid.
+
+    // @@ Make sure all vertices are valid.
+
+    return true;
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/halfedge/Mesh.h b/thirdparty/thekla_atlas/nvmesh/halfedge/Mesh.h
new file mode 100644
index 0000000000..c202c2ef9a
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/halfedge/Mesh.h
@@ -0,0 +1,274 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MESH_HALFEDGE_MESH_H
+#define NV_MESH_HALFEDGE_MESH_H
+
+#include "nvmesh/nvmesh.h"
+#include "nvcore/Array.h"
+#include "nvcore/HashMap.h"
+
+/*
+If I were to redo this again, there are a number of things that I would do differently.
+- Edge map is only useful when importing a mesh to guarantee the result is two-manifold. However, when manipulating the mesh
+  it's a pain to maintain the map up to date.
+- Edge array only points to the even vertices. There's no good reason for that. The map becomes required to traverse all edges
+  or you have to make sure edges are properly paired.
+- Linked boundaries. It's cleaner to assume a NULL pair means a boundary edge. Makes easier to seal boundaries. The only reason
+  why we link boundaries is to simplify traversal, but that could be done with two helper functions (nextBoundary, prevBoundary).
+- Minimize the amount of state that needs to be set in a certain way:
+    - boundary vertices point to boundary edge.
+- Remove parenthesis! Make some members public.
+- Remove member functions with side effects:
+    - e->setNext(n) modifies e->next and n->prev, instead use "link(e, n)", or "e->next = n, n->prev = e"
+*/
+
+
+namespace nv
+{
+    class Vector3;
+    class TriMesh;
+    class QuadTriMesh;
+    //template <typename T> struct Hash<Mesh::Key>;
+
+    namespace HalfEdge
+    {
+        class Edge;
+        class Face;
+        class Vertex;
+
+        /// Simple half edge mesh designed for dynamic mesh manipulation.
+        class Mesh
+        {
+        public:
+
+            Mesh();
+            Mesh(const Mesh * mesh);
+            ~Mesh();
+
+            void clear();
+
+            Vertex * addVertex(const Vector3 & pos);
+            //Vertex * addVertex(uint id, const Vector3 & pos);
+            //void addVertices(const Mesh * mesh);
+
+            void linkColocals();
+            void linkColocalsWithCanonicalMap(const Array<uint> & canonicalMap);
+            void resetColocalLinks();
+
+            Face * addFace();
+            Face * addFace(uint v0, uint v1, uint v2);
+            Face * addFace(uint v0, uint v1, uint v2, uint v3);
+            Face * addFace(const Array<uint> & indexArray);
+            Face * addFace(const Array<uint> & indexArray, uint first, uint num);
+            //void addFaces(const Mesh * mesh);
+
+            // These functions disconnect the given element from the mesh and delete it.
+            void disconnect(Edge * edge);
+            void disconnectPair(Edge * edge);
+            void disconnect(Vertex * vertex);
+            void disconnect(Face * face);
+
+            void remove(Edge * edge);
+            void remove(Vertex * vertex);
+            void remove(Face * face);
+
+            // Remove holes from arrays and reassign indices.
+            void compactEdges();
+            void compactVertices();
+            void compactFaces();
+
+            void triangulate();
+
+            void linkBoundary();
+            
+            bool splitBoundaryEdges(); // Returns true if any split was made.
+
+            // Sew the boundary that starts at the given edge, returns one edge that still belongs to boundary, or NULL if boundary closed.
+            HalfEdge::Edge * sewBoundary(Edge * startEdge);
+
+
+            // Vertices
+            uint vertexCount() const { return m_vertexArray.count(); }
+            const Vertex * vertexAt(int i) const { return m_vertexArray[i]; }
+            Vertex * vertexAt(int i) { return m_vertexArray[i]; }
+
+            uint colocalVertexCount() const { return m_colocalVertexCount; }
+
+            // Faces
+            uint faceCount() const { return m_faceArray.count(); }
+            const Face * faceAt(int i) const { return m_faceArray[i]; }
+            Face * faceAt(int i) { return m_faceArray[i]; }
+
+            // Edges
+            uint edgeCount() const { return m_edgeArray.count();  }
+            const Edge * edgeAt(int i) const { return m_edgeArray[i]; }
+            Edge * edgeAt(int i) { return m_edgeArray[i]; }
+
+            class ConstVertexIterator;
+
+            class VertexIterator
+            {
+                friend class ConstVertexIterator;
+            public:
+                VertexIterator(Mesh * mesh) : m_mesh(mesh), m_current(0) { }
+
+                virtual void advance() { m_current++; }
+                virtual bool isDone() const { return m_current == m_mesh->vertexCount(); }
+                virtual Vertex * current() const { return m_mesh->vertexAt(m_current); }
+
+            private:
+                HalfEdge::Mesh * m_mesh;
+                uint m_current;
+            };
+            VertexIterator vertices() { return VertexIterator(this); }
+
+            class ConstVertexIterator
+            {
+            public:
+                ConstVertexIterator(const Mesh * mesh) : m_mesh(mesh), m_current(0) { }
+                ConstVertexIterator(class VertexIterator & it) : m_mesh(it.m_mesh), m_current(it.m_current) { }
+
+                virtual void advance() { m_current++; }
+                virtual bool isDone() const { return m_current == m_mesh->vertexCount(); }
+                virtual const Vertex * current() const { return m_mesh->vertexAt(m_current); }
+
+            private:
+                const HalfEdge::Mesh * m_mesh;
+                uint m_current;
+            };
+            ConstVertexIterator vertices() const { return ConstVertexIterator(this); }
+
+            class ConstFaceIterator;
+
+            class FaceIterator
+            {
+                friend class ConstFaceIterator;
+            public:
+                FaceIterator(Mesh * mesh) : m_mesh(mesh), m_current(0) { }
+
+                virtual void advance() { m_current++; }
+                virtual bool isDone() const { return m_current == m_mesh->faceCount(); }
+                virtual Face * current() const { return m_mesh->faceAt(m_current); }
+
+            private:
+                HalfEdge::Mesh * m_mesh;
+                uint m_current;
+            };
+            FaceIterator faces() { return FaceIterator(this); }
+
+            class ConstFaceIterator
+            {
+            public:
+                ConstFaceIterator(const Mesh * mesh) : m_mesh(mesh), m_current(0) { }
+                ConstFaceIterator(const FaceIterator & it) : m_mesh(it.m_mesh), m_current(it.m_current) { }
+
+                virtual void advance() { m_current++; }
+                virtual bool isDone() const { return m_current == m_mesh->faceCount(); }
+                virtual const Face * current() const { return m_mesh->faceAt(m_current); }
+
+            private:
+                const HalfEdge::Mesh * m_mesh;
+                uint m_current;
+            };
+            ConstFaceIterator faces() const { return ConstFaceIterator(this); }
+
+            class ConstEdgeIterator;
+
+            class EdgeIterator
+            {
+                friend class ConstEdgeIterator;
+            public:
+                EdgeIterator(Mesh * mesh) : m_mesh(mesh), m_current(0) { }
+
+                virtual void advance() { m_current++; }
+                virtual bool isDone() const { return m_current == m_mesh->edgeCount(); }
+                virtual Edge * current() const { return m_mesh->edgeAt(m_current); }
+
+            private:
+                HalfEdge::Mesh * m_mesh;
+                uint m_current;
+            };
+            EdgeIterator edges() { return EdgeIterator(this); }
+
+            class ConstEdgeIterator
+            {
+            public:
+                ConstEdgeIterator(const Mesh * mesh) : m_mesh(mesh), m_current(0) { }
+                ConstEdgeIterator(const EdgeIterator & it) : m_mesh(it.m_mesh), m_current(it.m_current) { }
+
+                virtual void advance() { m_current++; }
+                virtual bool isDone() const { return m_current == m_mesh->edgeCount(); }
+                virtual const Edge * current() const { return m_mesh->edgeAt(m_current); }
+
+            private:
+                const HalfEdge::Mesh * m_mesh;
+                uint m_current;
+            };
+            ConstEdgeIterator edges() const { return ConstEdgeIterator(this); }
+
+            // @@ Add half-edge iterator.
+
+
+
+            // Convert to tri mesh.
+            TriMesh * toTriMesh() const;
+            QuadTriMesh * toQuadTriMesh() const;
+
+            bool isValid() const;
+
+        public:
+
+            // Error status:
+            mutable uint errorCount;
+            mutable uint errorIndex0;
+            mutable uint errorIndex1;
+
+        private:
+
+            bool canAddFace(const Array<uint> & indexArray, uint first, uint num) const;
+            bool canAddEdge(uint i, uint j) const;
+            Edge * addEdge(uint i, uint j);
+
+            Edge * findEdge(uint i, uint j) const;
+
+            void linkBoundaryEdge(Edge * edge);
+            Vertex * splitBoundaryEdge(Edge * edge, float t, const Vector3 & pos);
+            void splitBoundaryEdge(Edge * edge, Vertex * vertex);
+
+        private:
+
+            Array<Vertex *> m_vertexArray;
+            Array<Edge *> m_edgeArray;
+            Array<Face *> m_faceArray;
+
+            struct Key {
+                Key() {}
+                Key(const Key & k) : p0(k.p0), p1(k.p1) {}
+                Key(uint v0, uint v1) : p0(v0), p1(v1) {}
+                void operator=(const Key & k) { p0 = k.p0; p1 = k.p1; }
+                bool operator==(const Key & k) const { return p0 == k.p0 && p1 == k.p1; }
+
+                uint p0;
+                uint p1;
+            };
+            friend struct Hash<Mesh::Key>;
+
+            HashMap<Key, Edge *> m_edgeMap;
+
+            uint m_colocalVertexCount;
+
+        };
+        /*
+        // This is a much better hash than the default and greatly improves performance!
+        template <> struct hash<Mesh::Key>
+        {
+        uint operator()(const Mesh::Key & k) const { return k.p0 + k.p1; }
+        };
+        */
+
+    } // HalfEdge namespace
+
+} // nv namespace
+
+#endif // NV_MESH_HALFEDGE_MESH_H
diff --git a/thirdparty/thekla_atlas/nvmesh/halfedge/Vertex.cpp b/thirdparty/thekla_atlas/nvmesh/halfedge/Vertex.cpp
new file mode 100644
index 0000000000..66dad69f8a
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/halfedge/Vertex.cpp
@@ -0,0 +1,94 @@
+// This code is in the public domain -- castano@gmail.com
+
+#include "nvmesh.h" // pch
+
+#include "Vertex.h"
+
+#include "nvmath/Vector.inl"
+
+using namespace nv;
+using namespace HalfEdge;
+
+
+// Set first edge of all colocals.
+void Vertex::setEdge(Edge * e)
+{
+    for (VertexIterator it(colocals()); !it.isDone(); it.advance()) { 
+        it.current()->edge = e;
+    }
+}
+
+// Update position of all colocals.
+void Vertex::setPos(const Vector3 & p)
+{
+    for (VertexIterator it(colocals()); !it.isDone(); it.advance()) {
+        it.current()->pos = p;
+    }
+}
+
+
+uint HalfEdge::Vertex::colocalCount() const
+{
+    uint count = 0;
+    for (ConstVertexIterator it(colocals()); !it.isDone(); it.advance()) { ++count; }
+    return count;
+}
+
+uint HalfEdge::Vertex::valence() const
+{
+    uint count = 0;
+    for (ConstEdgeIterator it(edges()); !it.isDone(); it.advance()) { ++count; }
+    return count;
+}
+
+const HalfEdge::Vertex * HalfEdge::Vertex::firstColocal() const
+{
+    uint firstId = id;
+    const Vertex * vertex = this;
+
+    for (ConstVertexIterator it(colocals()); !it.isDone(); it.advance())
+    {
+        if (it.current()->id < firstId) {
+            firstId = vertex->id;
+            vertex = it.current();
+        }
+    }
+
+    return vertex;
+}
+
+HalfEdge::Vertex * HalfEdge::Vertex::firstColocal()
+{
+    Vertex * vertex = this;
+    uint firstId = id;
+
+    for (VertexIterator it(colocals()); !it.isDone(); it.advance())
+    {
+        if (it.current()->id < firstId) {
+            firstId = vertex->id;
+            vertex = it.current();
+        }
+    }
+
+    return vertex;
+}
+
+bool HalfEdge::Vertex::isFirstColocal() const
+{
+    return firstColocal() == this;
+}
+
+bool HalfEdge::Vertex::isColocal(const Vertex * v) const {
+    if (this == v) return true;
+    if (pos != v->pos) return false;
+
+    for (ConstVertexIterator it(colocals()); !it.isDone(); it.advance())
+    {
+        if (v == it.current()) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
diff --git a/thirdparty/thekla_atlas/nvmesh/halfedge/Vertex.h b/thirdparty/thekla_atlas/nvmesh/halfedge/Vertex.h
new file mode 100644
index 0000000000..1c5c8d7141
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/halfedge/Vertex.h
@@ -0,0 +1,221 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MESH_HALFEDGE_VERTEX_H
+#define NV_MESH_HALFEDGE_VERTEX_H
+
+#include "nvmesh/halfedge/Edge.h"
+
+namespace nv
+{
+    namespace HalfEdge { class Vertex; class Face; class Edge; }
+
+    // Half edge vertex.
+    class HalfEdge::Vertex
+    {
+        NV_FORBID_COPY(Vertex);
+    public:
+
+        uint id;
+
+        Edge * edge;
+        Vertex * next;
+        Vertex * prev;
+
+        Vector3 pos;
+        Vector3 nor;
+        Vector2 tex;
+        Vector4 col;
+
+
+        Vertex(uint id) : id(id), edge(NULL), pos(0.0f), nor(0.0f), tex(0.0f), col(0.0f) {
+            next = this;
+            prev = this;
+        }
+
+
+        void setEdge(Edge * e);
+        void setPos(const Vector3 & p);
+
+        uint colocalCount() const;
+        uint valence() const;
+        bool isFirstColocal() const;
+        const Vertex * firstColocal() const;
+        Vertex * firstColocal();
+
+        bool isColocal(const Vertex * v) const;
+
+        
+        void linkColocal(Vertex * v) {
+            next->prev = v;
+            v->next = next; 
+            next = v;
+            v->prev = this;
+        }
+        void unlinkColocal() {
+            next->prev = prev;
+            prev->next = next;
+            next = this;
+            prev = this;
+        }
+
+
+        // @@ Note: This only works if linkBoundary has been called.
+        bool isBoundary() const {
+            return (edge && !edge->face);
+        }
+
+
+        //	for(EdgeIterator it(iterator()); !it.isDone(); it.advance()) { ... }
+        //
+        //	EdgeIterator it(iterator());
+        //	while(!it.isDone()) {
+        //		...
+        //		id.advance(); 
+        //	}
+
+        // Iterator that visits the edges around this vertex in counterclockwise order.
+        class EdgeIterator //: public Iterator<Edge *>
+        {
+        public:
+            EdgeIterator(Edge * e) : m_end(NULL), m_current(e) { }
+
+            virtual void advance()
+            {
+                if (m_end == NULL) m_end = m_current;
+                m_current = m_current->pair->next;
+                //m_current = m_current->prev->pair;
+            }
+
+            virtual bool isDone() const { return m_end == m_current; }
+            virtual Edge * current() const { return m_current; }
+            Vertex * vertex() const { return m_current->vertex; }
+
+        private:
+            Edge * m_end;
+            Edge * m_current;
+        };
+
+        EdgeIterator edges() { return EdgeIterator(edge); }
+        EdgeIterator edges(Edge * e) { return EdgeIterator(e); }
+
+        // Iterator that visits the edges around this vertex in counterclockwise order.
+        class ConstEdgeIterator //: public Iterator<Edge *>
+        {
+        public:
+            ConstEdgeIterator(const Edge * e) : m_end(NULL), m_current(e) { }
+            ConstEdgeIterator(EdgeIterator it) : m_end(NULL), m_current(it.current()) { }
+
+            virtual void advance()
+            {
+                if (m_end == NULL) m_end = m_current;
+                m_current = m_current->pair->next;
+                //m_current = m_current->prev->pair;
+            }
+
+            virtual bool isDone() const { return m_end == m_current; }
+            virtual const Edge * current() const { return m_current; }
+            const Vertex * vertex() const { return m_current->to(); }
+
+        private:
+            const Edge * m_end;
+            const Edge * m_current;
+        };
+
+        ConstEdgeIterator edges() const { return ConstEdgeIterator(edge); }
+        ConstEdgeIterator edges(const Edge * e) const { return ConstEdgeIterator(e); }
+
+
+        // Iterator that visits the edges around this vertex in counterclockwise order.
+        class ReverseEdgeIterator //: public Iterator<Edge *>
+        {
+        public:
+            ReverseEdgeIterator(Edge * e) : m_end(NULL), m_current(e) { }
+
+            virtual void advance()
+            {
+                if (m_end == NULL) m_end = m_current;
+                m_current = m_current->prev->pair;
+            }
+
+            virtual bool isDone() const { return m_end == m_current; }
+            virtual Edge * current() const { return m_current; }
+            Vertex * vertex() const { return m_current->vertex; }
+
+        private:
+            Edge * m_end;
+            Edge * m_current;
+        };
+
+        // Iterator that visits the edges around this vertex in counterclockwise order.
+        class ReverseConstEdgeIterator //: public Iterator<Edge *>
+        {
+        public:
+            ReverseConstEdgeIterator(const Edge * e) : m_end(NULL), m_current(e) { }
+
+            virtual void advance()
+            {
+                if (m_end == NULL) m_end = m_current;
+                m_current = m_current->prev->pair;
+            }
+
+            virtual bool isDone() const { return m_end == m_current; }
+            virtual const Edge * current() const { return m_current; }
+            const Vertex * vertex() const { return m_current->to(); }
+
+        private:
+            const Edge * m_end;
+            const Edge * m_current;
+        };
+
+
+
+        // Iterator that visits all the colocal vertices.
+        class VertexIterator //: public Iterator<Edge *>
+        {
+        public:
+            VertexIterator(Vertex * v) : m_end(NULL), m_current(v) { }
+
+            virtual void advance()
+            {
+                if (m_end == NULL) m_end = m_current;
+                m_current = m_current->next;
+            }
+
+            virtual bool isDone() const { return m_end == m_current; }
+            virtual Vertex * current() const { return m_current; }
+
+        private:
+            Vertex * m_end;
+            Vertex * m_current;
+        };
+
+        VertexIterator colocals() { return VertexIterator(this); }
+
+        // Iterator that visits all the colocal vertices.
+        class ConstVertexIterator //: public Iterator<Edge *>
+        {
+        public:
+            ConstVertexIterator(const Vertex * v) : m_end(NULL), m_current(v) { }
+
+            virtual void advance()
+            {
+                if (m_end == NULL) m_end = m_current;
+                m_current = m_current->next;
+            }
+
+            virtual bool isDone() const { return m_end == m_current; }
+            virtual const Vertex * current() const { return m_current; }
+
+        private:
+            const Vertex * m_end;
+            const Vertex * m_current;
+        };
+
+        ConstVertexIterator colocals() const { return ConstVertexIterator(this); }
+
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_HALFEDGE_VERTEX_H
diff --git a/thirdparty/thekla_atlas/nvmesh/nvmesh.cpp b/thirdparty/thekla_atlas/nvmesh/nvmesh.cpp
new file mode 100644
index 0000000000..d007eda332
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/nvmesh.cpp
@@ -0,0 +1,2 @@
+#include "nvmesh.h" // pch
+
diff --git a/thirdparty/thekla_atlas/nvmesh/nvmesh.h b/thirdparty/thekla_atlas/nvmesh/nvmesh.h
new file mode 100644
index 0000000000..eb6819675d
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/nvmesh.h
@@ -0,0 +1,34 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MESH_H
+#define NV_MESH_H
+
+#include "nvcore/nvcore.h"
+
+// Function linkage
+#if NVMESH_SHARED
+#ifdef NVMESH_EXPORTS
+#define NVMESH_API DLL_EXPORT
+#define NVMESH_CLASS DLL_EXPORT_CLASS
+#else
+#define NVMESH_API DLL_IMPORT
+#define NVMESH_CLASS DLL_IMPORT
+#endif
+#else
+#define NVMESH_API
+#define NVMESH_CLASS
+#endif
+
+#if 1 //USE_PRECOMPILED_HEADERS // If using precompiled headers:
+//#include <string.h> // strlen, strcmp, etc.
+//#include "nvcore/StrLib.h"
+//#include "nvcore/StdStream.h"
+//#include "nvcore/Memory.h"
+//#include "nvcore/Debug.h"
+//#include "nvmath/Vector.h"
+//#include "nvcore/Array.h"
+//#include "nvcore/HashMap.h"
+#endif
+
+#endif // NV_MESH_H
diff --git a/thirdparty/thekla_atlas/nvmesh/param/Atlas.cpp b/thirdparty/thekla_atlas/nvmesh/param/Atlas.cpp
new file mode 100644
index 0000000000..98f92cef96
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/Atlas.cpp
@@ -0,0 +1,1519 @@
+// Copyright NVIDIA Corporation 2006 -- Ignacio Castano <icastano@nvidia.com>
+
+#include "nvmesh.h" // pch
+
+#include "Atlas.h"
+#include "Util.h"
+#include "AtlasBuilder.h"
+#include "AtlasPacker.h"
+#include "SingleFaceMap.h"
+#include "OrthogonalProjectionMap.h"
+#include "LeastSquaresConformalMap.h"
+#include "ParameterizationQuality.h"
+
+//#include "nvmesh/export/MeshExportOBJ.h"
+
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Face.h"
+#include "nvmesh/halfedge/Vertex.h"
+
+#include "nvmesh/MeshBuilder.h"
+#include "nvmesh/MeshTopology.h"
+#include "nvmesh/param/Util.h"
+#include "nvmesh/geometry/Measurements.h"
+
+#include "nvmath/Vector.inl"
+#include "nvmath/Fitting.h"
+#include "nvmath/Box.inl"
+#include "nvmath/ProximityGrid.h"
+#include "nvmath/Morton.h"
+
+#include "nvcore/StrLib.h"
+#include "nvcore/Array.inl"
+#include "nvcore/HashMap.inl"
+
+using namespace nv;
+
+
+/// Ctor.
+Atlas::Atlas()
+{
+    failed=false;
+}
+
+// Dtor.
+Atlas::~Atlas()
+{
+    deleteAll(m_meshChartsArray);
+}
+
+uint Atlas::chartCount() const
+{
+    uint count = 0;
+    foreach(c, m_meshChartsArray) {
+        count += m_meshChartsArray[c]->chartCount();
+    }
+    return count;
+}
+
+const Chart * Atlas::chartAt(uint i) const
+{
+    foreach(c, m_meshChartsArray) {
+        uint count = m_meshChartsArray[c]->chartCount();
+
+        if (i < count) {
+            return m_meshChartsArray[c]->chartAt(i);
+        }
+
+        i -= count;
+    }
+
+    return NULL;
+}
+
+Chart * Atlas::chartAt(uint i) 
+{
+    foreach(c, m_meshChartsArray) {
+        uint count = m_meshChartsArray[c]->chartCount();
+
+        if (i < count) {
+            return m_meshChartsArray[c]->chartAt(i);
+        }
+
+        i -= count;
+    }
+
+    return NULL;
+}
+
+// Extract the charts and add to this atlas.
+void Atlas::addMeshCharts(MeshCharts * meshCharts)
+{
+    m_meshChartsArray.append(meshCharts);
+}
+
+void Atlas::extractCharts(const HalfEdge::Mesh * mesh)
+{
+    MeshCharts * meshCharts = new MeshCharts(mesh);
+    meshCharts->extractCharts();
+    addMeshCharts(meshCharts);
+}
+
+void Atlas::computeCharts(const HalfEdge::Mesh * mesh, const SegmentationSettings & settings, const Array<uint> & unchartedMaterialArray)
+{
+    failed=false;
+    MeshCharts * meshCharts = new MeshCharts(mesh);
+    meshCharts->computeCharts(settings, unchartedMaterialArray);
+    addMeshCharts(meshCharts);
+}
+
+
+
+
+#if 0
+
+/// Compute a seamless texture atlas.
+bool Atlas::computeSeamlessTextureAtlas(bool groupFaces/*= true*/, bool scaleTiles/*= false*/, uint w/*= 1024*/, uint h/* = 1024*/)
+{
+    // Implement seamless texture atlas similar to what ZBrush does. See also:
+    // "Meshed Atlases for Real-Time Procedural Solid Texturing"
+    // http://graphics.cs.uiuc.edu/~jch/papers/rtpst.pdf
+
+    // Other methods that we should experiment with:
+    // 
+    // Seamless Texture Atlases:
+    // http://www.cs.jhu.edu/~bpurnomo/STA/index.html
+    // 
+    // Rectangular Multi-Chart Geometry Images:
+    // http://graphics.cs.uiuc.edu/~jch/papers/rmcgi.pdf
+    // 
+    // Discrete differential geometry also provide a way of constructing  
+    // seamless quadrangulations as shown in:
+    // http://www.geometry.caltech.edu/pubs/TACD06.pdf
+    // 
+
+#pragma message(NV_FILE_LINE "TODO: Implement seamless texture atlas.")
+
+    if (groupFaces)
+    {
+        // @@ TODO.
+    }
+    else
+    {
+        // @@ Create one atlas per face.
+    }
+
+    if (scaleTiles)
+    {
+        // @@ TODO
+    }
+
+    /*
+    if (!isQuadMesh(m_mesh)) {
+        // Only handle quads for now.
+        return false;
+    }
+
+    // Each face is a chart.
+    const uint faceCount = m_mesh->faceCount();
+    m_chartArray.resize(faceCount);
+
+    for(uint f = 0; f < faceCount; f++) {
+        m_chartArray[f].faceArray.clear();
+        m_chartArray[f].faceArray.append(f);
+    }
+
+    // Map each face to a separate square.
+
+    // Determine face layout according to width and height.
+    float aspect = float(m_width) / float(m_height);
+
+    uint i = 2;
+    uint total = (m_width / (i+1)) * (m_height / (i+1));
+    while(total > faceCount) {
+        i *= 2;
+        total = (m_width / (i+1)) * (m_height / (i+1));
+    }
+
+    uint tileSize = i / 2;
+
+    int x = 0;
+    int y = 0;
+
+    m_result = new HalfEdge::Mesh();
+
+    // Once you have that it's just matter of traversing the faces.
+    for(uint f = 0; f < faceCount; f++) {
+        // Compute texture coordinates.
+        Vector2 tex[4];
+        tex[0] = Vector2(float(x), float(y));
+        tex[1] = Vector2(float(x+tileSize), float(y));
+        tex[2] = Vector2(float(x+tileSize), float(y+tileSize));
+        tex[3] = Vector2(float(x), float(y+tileSize));
+
+        Array<uint> indexArray(4);
+
+        const HalfEdge::Face * face = m_mesh->faceAt(f);
+
+        int i = 0;
+        for(HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance(), i++) {
+            const HalfEdge::Edge * edge = it.current();
+            const HalfEdge::Vertex * vertex = edge->from();
+
+            HalfEdge::Vertex * newVertex = m_result->addVertex(vertex->id(), vertex->pos());
+
+            newVertex->setTex(Vector3(tex[i], 0));
+            newVertex->setNor(vertex->nor());
+
+            indexArray.append(m_result->vertexCount() + 1);
+        }
+
+        m_result->addFace(indexArray);
+
+        // Move to the next tile.
+        x += tileSize + 1;
+        if (x + tileSize > m_width) {
+            x = 0;
+            y += tileSize + 1;
+        }
+    }
+    */
+
+    return false;
+}
+
+#endif
+
+
+void Atlas::parameterizeCharts()
+{
+    foreach(i, m_meshChartsArray) {
+        m_meshChartsArray[i]->parameterizeCharts();
+    }
+}
+
+
+float Atlas::packCharts(int quality, float texelsPerUnit, bool blockAlign, bool conservative)
+{
+    AtlasPacker packer(this);
+    packer.packCharts(quality, texelsPerUnit, blockAlign, conservative);
+    if (hasFailed())
+        return 0;
+    return packer.computeAtlasUtilization();
+}
+
+
+
+
+/// Ctor.
+MeshCharts::MeshCharts(const HalfEdge::Mesh * mesh) : m_mesh(mesh)
+{
+}
+
+// Dtor.
+MeshCharts::~MeshCharts()
+{
+    deleteAll(m_chartArray);
+}
+
+
+void MeshCharts::extractCharts()
+{
+    const uint faceCount = m_mesh->faceCount();
+
+    int first = 0;
+    Array<uint> queue(faceCount);
+
+    BitArray bitFlags(faceCount);
+    bitFlags.clearAll();
+
+    for (uint f = 0; f < faceCount; f++)
+    {
+        if (bitFlags.bitAt(f) == false)
+        {
+            // Start new patch. Reset queue.
+            first = 0;
+            queue.clear();
+            queue.append(f);
+            bitFlags.setBitAt(f);
+
+            while (first != queue.count())
+            {
+                const HalfEdge::Face * face = m_mesh->faceAt(queue[first]);
+
+                // Visit face neighbors of queue[first]
+                for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+                {
+                    const HalfEdge::Edge * edge = it.current();
+                    nvDebugCheck(edge->pair != NULL);
+
+                    if (!edge->isBoundary() && /*!edge->isSeam()*/ 
+                        //!(edge->from()->tex() != edge->pair()->to()->tex() || edge->to()->tex() != edge->pair()->from()->tex()))
+                        !(edge->from() != edge->pair->to() || edge->to() != edge->pair->from())) // Preserve existing seams (not just texture seams).
+                    {
+                        const HalfEdge::Face * neighborFace = edge->pair->face;
+                        nvDebugCheck(neighborFace != NULL);
+
+                        if (bitFlags.bitAt(neighborFace->id) == false)
+                        {
+                            queue.append(neighborFace->id);
+                            bitFlags.setBitAt(neighborFace->id);
+                        }
+                    }
+                }
+
+                first++;
+            }
+
+            Chart * chart = new Chart();
+            chart->build(m_mesh, queue);
+
+            m_chartArray.append(chart);
+        }
+    }
+}
+
+
+/*
+LSCM:
+- identify sharp features using local dihedral angles.
+- identify seed faces farthest from sharp features.
+- grow charts from these seeds.
+
+MCGIM:
+- phase 1: chart growth
+  - grow all charts simultaneously using dijkstra search on the dual graph of the mesh.
+  - graph edges are weighted based on planarity metric.
+  - metric uses distance to global chart normal.
+  - terminate when all faces have been assigned.
+- phase 2: seed computation:
+  - place new seed of the chart at the most interior face.
+  - most interior is evaluated using distance metric only.
+
+- method repeates the two phases, until the location of the seeds does not change.
+  - cycles are detected by recording all the previous seeds and chartification terminates.
+
+D-Charts:
+
+- Uniaxial conic metric:
+  - N_c = axis of the generalized cone that best fits the chart. (cone can a be cylinder or a plane).
+  - omega_c = angle between the face normals and the axis.
+  - Fitting error between chart C and tringle t: F(c,t) = (N_c*n_t - cos(omega_c))^2
+
+- Compactness metrics:
+  - Roundness:
+    - C(c,t) = pi * D(S_c,t)^2 / A_c
+    - S_c = chart seed.
+    - D(S_c,t) = length of the shortest path inside the chart betwen S_c and t.
+    - A_c = chart area.
+  - Straightness:
+    - P(c,t) = l_out(c,t) / l_in(c,t)
+    - l_out(c,t) = lenght of the edges not shared between C and t.
+    - l_in(c,t) = lenght of the edges shared between C and t.
+
+- Combined metric:
+  - Cost(c,t) = F(c,t)^alpha + C(c,t)^beta + P(c,t)^gamma
+  - alpha = 1, beta = 0.7, gamma = 0.5
+
+
+
+
+Our basic approach:
+- Just one iteration of k-means?
+- Avoid dijkstra by greedily growing charts until a threshold is met. Increase threshold and repeat until no faces left.
+- If distortion metric is too high, split chart, add two seeds.
+- If chart size is low, try removing chart.
+
+
+Postprocess:
+- If topology is not disk:
+  - Fill holes, if new faces fit proxy.
+  - Find best cut, otherwise.
+- After parameterization:
+  - If boundary self-intersects: 
+    - cut chart along the closest two diametral boundary vertices, repeat parametrization.
+    - what if the overlap is on an appendix? How do we find that out and cut appropiately?
+      - emphasize roundness metrics to prevent those cases.
+  - If interior self-overlaps: preserve boundary parameterization and use mean-value map.
+
+*/
+
+
+SegmentationSettings::SegmentationSettings()
+{
+    // Charts have no area or boundary limits right now.
+    maxChartArea = NV_FLOAT_MAX;
+    maxBoundaryLength = NV_FLOAT_MAX;
+
+    proxyFitMetricWeight = 1.0f;
+    roundnessMetricWeight = 0.1f;
+    straightnessMetricWeight = 0.25f;
+    normalSeamMetricWeight = 1.0f;
+    textureSeamMetricWeight = 0.1f;
+}
+
+
+
+void MeshCharts::computeCharts(const SegmentationSettings & settings, const Array<uint> & unchartedMaterialArray)
+{
+    Chart * vertexMap = NULL;
+    
+    if (unchartedMaterialArray.count() != 0) {
+        vertexMap = new Chart();
+        vertexMap->buildVertexMap(m_mesh, unchartedMaterialArray);
+
+        if (vertexMap->faceCount() == 0) {
+            delete vertexMap;
+            vertexMap = NULL;
+        }
+    }
+    
+
+    AtlasBuilder builder(m_mesh);
+
+    if (vertexMap != NULL) {
+        // Mark faces that do not need to be charted.
+        builder.markUnchartedFaces(vertexMap->faceArray());
+
+        m_chartArray.append(vertexMap);
+    }
+
+    if (builder.facesLeft != 0) {
+
+        // Tweak these values:
+        const float maxThreshold = 2;
+        const uint growFaceCount = 32;
+        const uint maxIterations = 4;
+        
+        builder.settings = settings;
+
+        //builder.settings.proxyFitMetricWeight *= 0.75; // relax proxy fit weight during initial seed placement.
+        //builder.settings.roundnessMetricWeight = 0;
+        //builder.settings.straightnessMetricWeight = 0;
+
+        // This seems a reasonable estimate.
+        uint maxSeedCount = max(6U, builder.facesLeft);
+
+        // Create initial charts greedely.
+        nvDebug("### Placing seeds\n");
+        builder.placeSeeds(maxThreshold, maxSeedCount);
+        nvDebug("###   Placed %d seeds (max = %d)\n", builder.chartCount(), maxSeedCount);
+
+        builder.updateProxies();
+
+        builder.mergeCharts();
+
+    #if 1
+        nvDebug("### Relocating seeds\n");
+        builder.relocateSeeds();
+
+        nvDebug("### Reset charts\n");
+        builder.resetCharts();
+
+        if (vertexMap != NULL) {
+            builder.markUnchartedFaces(vertexMap->faceArray());
+        }
+
+        builder.settings = settings;
+
+        nvDebug("### Growing charts\n");
+
+        // Restart process growing charts in parallel.
+        uint iteration = 0;
+        while (true)
+        {
+            if (!builder.growCharts(maxThreshold, growFaceCount))
+            {
+                nvDebug("### Can't grow anymore\n");
+
+                // If charts cannot grow more: fill holes, merge charts, relocate seeds and start new iteration.
+
+                nvDebug("### Filling holes\n");
+                builder.fillHoles(maxThreshold);
+                nvDebug("###   Using %d charts now\n", builder.chartCount());
+
+                builder.updateProxies();
+
+                nvDebug("### Merging charts\n");
+                builder.mergeCharts();
+                nvDebug("###   Using %d charts now\n", builder.chartCount());
+
+                nvDebug("### Reseeding\n");
+                if (!builder.relocateSeeds())
+                {
+                    nvDebug("### Cannot relocate seeds anymore\n");
+
+                    // Done!
+                    break;
+                }
+
+                if (iteration == maxIterations)
+                {
+                    nvDebug("### Reached iteration limit\n");
+                    break;
+                }
+                iteration++;
+
+                nvDebug("### Reset charts\n");
+                builder.resetCharts();
+
+                if (vertexMap != NULL) {
+                    builder.markUnchartedFaces(vertexMap->faceArray());
+                }
+
+                nvDebug("### Growing charts\n");
+            }
+        };
+    #endif
+
+        // Make sure no holes are left!
+        nvDebugCheck(builder.facesLeft == 0);
+
+        const uint chartCount = builder.chartArray.count();
+        for (uint i = 0; i < chartCount; i++)
+        {
+            Chart * chart = new Chart();
+            m_chartArray.append(chart);
+
+            chart->build(m_mesh, builder.chartFaces(i));
+        }
+    }
+
+
+    const uint chartCount = m_chartArray.count();
+
+    // Build face indices.
+    m_faceChart.resize(m_mesh->faceCount());
+    m_faceIndex.resize(m_mesh->faceCount());
+
+    for (uint i = 0; i < chartCount; i++)
+    {
+        const Chart * chart = m_chartArray[i];
+
+        const uint faceCount = chart->faceCount();
+        for (uint f = 0; f < faceCount; f++)
+        {
+            uint idx = chart->faceAt(f);
+            m_faceChart[idx] = i;
+            m_faceIndex[idx] = f;
+        }
+    }
+
+    // Build an exclusive prefix sum of the chart vertex counts.
+    m_chartVertexCountPrefixSum.resize(chartCount);
+    
+    if (chartCount > 0)
+    {
+        m_chartVertexCountPrefixSum[0] = 0;
+        
+        for (uint i = 1; i < chartCount; i++)
+        {
+            const Chart * chart = m_chartArray[i-1];
+            m_chartVertexCountPrefixSum[i] = m_chartVertexCountPrefixSum[i-1] + chart->vertexCount();
+        }
+
+        m_totalVertexCount = m_chartVertexCountPrefixSum[chartCount - 1] + m_chartArray[chartCount-1]->vertexCount();
+    }
+    else
+    {
+        m_totalVertexCount = 0;
+    }
+}
+
+
+void MeshCharts::parameterizeCharts()
+{
+    ParameterizationQuality globalParameterizationQuality;
+
+    // Parameterize the charts.
+    uint diskCount = 0;
+    const uint chartCount = m_chartArray.count();
+    for (uint i = 0; i < chartCount; i++)\
+    {
+        Chart * chart = m_chartArray[i];
+
+        bool isValid = false;
+
+        if (chart->isVertexMapped()) {
+            continue;
+        }
+
+        if (chart->isDisk())
+        {
+            diskCount++;
+
+            ParameterizationQuality chartParameterizationQuality;
+
+            if (chart->faceCount() == 1) {
+                computeSingleFaceMap(chart->unifiedMesh());
+
+                chartParameterizationQuality = ParameterizationQuality(chart->unifiedMesh());
+            }
+            else {
+                computeOrthogonalProjectionMap(chart->unifiedMesh());
+                ParameterizationQuality orthogonalQuality(chart->unifiedMesh());
+
+                computeLeastSquaresConformalMap(chart->unifiedMesh());
+                ParameterizationQuality lscmQuality(chart->unifiedMesh());
+                
+                // If the orthogonal projection produces better results, just use that.
+                // @@ It may be dangerous to do this, because isValid() does not detect self-overlaps.
+                // @@ Another problem is that with very thin patches with nearly zero parametric area, the results of our metric are not accurate.
+                /*if (orthogonalQuality.isValid() && orthogonalQuality.rmsStretchMetric() < lscmQuality.rmsStretchMetric()) {
+                    computeOrthogonalProjectionMap(chart->unifiedMesh());
+                    chartParameterizationQuality = orthogonalQuality;
+                }
+                else*/ {
+                    chartParameterizationQuality = lscmQuality;
+                }
+
+                // If conformal map failed, 
+
+                // @@ Experiment with other parameterization methods.
+                //computeCircularBoundaryMap(chart->unifiedMesh());
+                //computeConformalMap(chart->unifiedMesh());
+                //computeNaturalConformalMap(chart->unifiedMesh());
+                //computeGuidanceGradientMap(chart->unifiedMesh());
+            }
+
+            //ParameterizationQuality chartParameterizationQuality(chart->unifiedMesh());
+
+            isValid = chartParameterizationQuality.isValid();
+
+            if (!isValid)
+            {
+                nvDebug("*** Invalid parameterization.\n");
+#if 0
+                // Dump mesh to inspect problem:
+                static int pieceCount = 0;
+            
+                StringBuilder fileName;
+                fileName.format("invalid_chart_%d.obj", pieceCount++);
+                exportMesh(chart->unifiedMesh(), fileName.str()); 
+#endif
+            }
+
+            // @@ Check that parameterization quality is above a certain threshold.
+
+            // @@ Detect boundary self-intersections.
+
+            globalParameterizationQuality += chartParameterizationQuality;
+        }
+
+        if (!isValid)
+        {
+            //nvDebugBreak();
+            // @@ Run the builder again, but only on this chart.
+            //AtlasBuilder builder(chart->chartMesh());
+        }
+
+        // Transfer parameterization from unified mesh to chart mesh.
+        chart->transferParameterization();
+
+    }
+
+    nvDebug("  Parameterized %d/%d charts.\n", diskCount, chartCount);
+    nvDebug("  RMS stretch metric: %f\n", globalParameterizationQuality.rmsStretchMetric());
+    nvDebug("  MAX stretch metric: %f\n", globalParameterizationQuality.maxStretchMetric());
+    nvDebug("  RMS conformal metric: %f\n", globalParameterizationQuality.rmsConformalMetric());
+    nvDebug("  RMS authalic metric: %f\n", globalParameterizationQuality.maxAuthalicMetric());
+}
+
+
+
+Chart::Chart() : m_chartMesh(NULL), m_unifiedMesh(NULL), m_isDisk(false), m_isVertexMapped(false)
+{
+}
+
+void Chart::build(const HalfEdge::Mesh * originalMesh, const Array<uint> & faceArray)
+{
+    // Copy face indices.
+    m_faceArray = faceArray;
+
+    const uint meshVertexCount = originalMesh->vertexCount();
+
+    m_chartMesh = new HalfEdge::Mesh();
+    m_unifiedMesh = new HalfEdge::Mesh();
+
+    Array<uint> chartMeshIndices;
+    chartMeshIndices.resize(meshVertexCount, ~0);
+
+    Array<uint> unifiedMeshIndices;
+    unifiedMeshIndices.resize(meshVertexCount, ~0);
+
+    // Add vertices.
+    const uint faceCount = faceArray.count();
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = originalMesh->faceAt(faceArray[f]);
+        nvDebugCheck(face != NULL);
+
+        for(HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            const HalfEdge::Vertex * vertex = it.current()->vertex;
+            const HalfEdge::Vertex * unifiedVertex = vertex->firstColocal();
+
+            if (unifiedMeshIndices[unifiedVertex->id] == ~0)
+            {
+                unifiedMeshIndices[unifiedVertex->id] = m_unifiedMesh->vertexCount();
+
+                nvDebugCheck(vertex->pos == unifiedVertex->pos);
+                m_unifiedMesh->addVertex(vertex->pos);
+            }
+
+            if (chartMeshIndices[vertex->id] == ~0)
+            {
+                chartMeshIndices[vertex->id] = m_chartMesh->vertexCount();
+                m_chartToOriginalMap.append(vertex->id);
+                m_chartToUnifiedMap.append(unifiedMeshIndices[unifiedVertex->id]);
+
+                HalfEdge::Vertex * v = m_chartMesh->addVertex(vertex->pos);
+                v->nor = vertex->nor;
+                v->tex = vertex->tex;
+            }
+        }
+    }
+
+    // This is ignoring the canonical map:
+    // - Is it really necessary to link colocals?
+
+    m_chartMesh->linkColocals();    
+    //m_unifiedMesh->linkColocals();  // Not strictly necessary, no colocals in the unified mesh. # Wrong.
+
+    // This check is not valid anymore, if the original mesh vertices were linked with a canonical map, then it might have
+    // some colocal vertices that were unlinked. So, the unified mesh might have some duplicate vertices, because firstColocal()
+    // is not guaranteed to return the same vertex for two colocal vertices.
+    //nvCheck(m_chartMesh->colocalVertexCount() == m_unifiedMesh->vertexCount());
+
+    // Is that OK? What happens in meshes were that happens? Does anything break? Apparently not...
+    
+
+
+    Array<uint> faceIndices(7);
+
+    // Add faces.
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = originalMesh->faceAt(faceArray[f]);
+        nvDebugCheck(face != NULL);
+
+        faceIndices.clear();
+
+        for(HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            const HalfEdge::Vertex * vertex = it.current()->vertex;
+            nvDebugCheck(vertex != NULL);
+
+            faceIndices.append(chartMeshIndices[vertex->id]);
+        }
+
+        m_chartMesh->addFace(faceIndices);
+
+        faceIndices.clear();
+
+        for(HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            const HalfEdge::Vertex * vertex = it.current()->vertex;
+            nvDebugCheck(vertex != NULL);
+
+            vertex = vertex->firstColocal();
+
+            faceIndices.append(unifiedMeshIndices[vertex->id]);
+        }
+
+        m_unifiedMesh->addFace(faceIndices);
+    }
+
+    m_chartMesh->linkBoundary();
+    m_unifiedMesh->linkBoundary();
+
+    //exportMesh(m_unifiedMesh.ptr(), "debug_input.obj");
+
+    if (m_unifiedMesh->splitBoundaryEdges()) {
+        m_unifiedMesh = unifyVertices(m_unifiedMesh.ptr());
+    }
+
+    //exportMesh(m_unifiedMesh.ptr(), "debug_split.obj");
+
+    // Closing the holes is not always the best solution and does not fix all the problems.
+    // We need to do some analysis of the holes and the genus to:
+    // - Find cuts that reduce genus.
+    // - Find cuts to connect holes.
+    // - Use minimal spanning trees or seamster.
+    if (!closeHoles()) {
+        /*static int pieceCount = 0;
+        StringBuilder fileName;
+        fileName.format("debug_hole_%d.obj", pieceCount++);
+        exportMesh(m_unifiedMesh.ptr(), fileName.str());*/
+    }
+
+    m_unifiedMesh = triangulate(m_unifiedMesh.ptr());
+    
+    //exportMesh(m_unifiedMesh.ptr(), "debug_triangulated.obj");
+
+
+    // Analyze chart topology.
+    MeshTopology topology(m_unifiedMesh.ptr());
+    m_isDisk = topology.isDisk();
+
+    // This is sometimes failing, when triangulate fails to add a triangle, it generates a hole in the mesh.
+    //nvDebugCheck(m_isDisk);
+
+    /*if (!m_isDisk) {
+        static int pieceCount = 0;
+        StringBuilder fileName;
+        fileName.format("debug_hole_%d.obj", pieceCount++);
+        exportMesh(m_unifiedMesh.ptr(), fileName.str());
+    }*/
+
+
+#if 0
+    if (!m_isDisk) {
+        nvDebugBreak();
+
+        static int pieceCount = 0;
+        
+        StringBuilder fileName;
+        fileName.format("debug_nodisk_%d.obj", pieceCount++);
+        exportMesh(m_chartMesh.ptr(), fileName.str()); 
+    }
+#endif
+
+}
+
+
+void Chart::buildVertexMap(const HalfEdge::Mesh * originalMesh, const Array<uint> & unchartedMaterialArray)
+{
+    nvCheck(m_chartMesh == NULL && m_unifiedMesh == NULL);
+
+    m_isVertexMapped = true;
+
+    // Build face indices.
+    m_faceArray.clear();
+
+    const uint meshFaceCount = originalMesh->faceCount();
+    for (uint f = 0; f < meshFaceCount; f++) {
+        const HalfEdge::Face * face = originalMesh->faceAt(f);
+
+        if (unchartedMaterialArray.contains(face->material)) {
+            m_faceArray.append(f);
+        }
+    }
+
+    const uint faceCount = m_faceArray.count();
+
+    if (faceCount == 0) {
+        return;
+    }
+
+
+    // @@ The chartMesh construction is basically the same as with regular charts, don't duplicate!
+
+    const uint meshVertexCount = originalMesh->vertexCount();
+
+    m_chartMesh = new HalfEdge::Mesh();
+
+    Array<uint> chartMeshIndices;
+    chartMeshIndices.resize(meshVertexCount, ~0);
+
+    // Vertex map mesh only has disconnected vertices.
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = originalMesh->faceAt(m_faceArray[f]);
+        nvDebugCheck(face != NULL);
+
+        for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            const HalfEdge::Vertex * vertex = it.current()->vertex;
+
+            if (chartMeshIndices[vertex->id] == ~0)
+            {
+                chartMeshIndices[vertex->id] = m_chartMesh->vertexCount();
+                m_chartToOriginalMap.append(vertex->id);
+
+                HalfEdge::Vertex * v = m_chartMesh->addVertex(vertex->pos);
+                v->nor = vertex->nor;
+                v->tex = vertex->tex; // @@ Not necessary.
+            }
+        }
+    }
+
+    // @@ Link colocals using the original mesh canonical map? Build canonical map on the fly? Do we need to link colocals at all for this?
+    //m_chartMesh->linkColocals();
+
+    Array<uint> faceIndices(7);
+
+    // Add faces.
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = originalMesh->faceAt(m_faceArray[f]);
+        nvDebugCheck(face != NULL);
+
+        faceIndices.clear();
+
+        for(HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            const HalfEdge::Vertex * vertex = it.current()->vertex;
+            nvDebugCheck(vertex != NULL);
+            nvDebugCheck(chartMeshIndices[vertex->id] != ~0);
+
+            faceIndices.append(chartMeshIndices[vertex->id]);
+        }
+
+        HalfEdge::Face * new_face = m_chartMesh->addFace(faceIndices);
+        nvDebugCheck(new_face != NULL);
+    }
+
+    m_chartMesh->linkBoundary();
+
+
+    const uint chartVertexCount = m_chartMesh->vertexCount();
+
+    Box bounds;
+    bounds.clearBounds();
+
+    for (uint i = 0; i < chartVertexCount; i++) {
+        HalfEdge::Vertex * vertex = m_chartMesh->vertexAt(i);
+        bounds.addPointToBounds(vertex->pos);
+    }
+
+    ProximityGrid grid;
+    grid.init(bounds, chartVertexCount);
+
+    for (uint i = 0; i < chartVertexCount; i++) {
+        HalfEdge::Vertex * vertex = m_chartMesh->vertexAt(i);
+        grid.add(vertex->pos, i);
+    }
+
+
+#if 0
+    // Arrange vertices in a rectangle.
+    vertexMapWidth = ftoi_ceil(sqrtf(float(chartVertexCount)));
+    vertexMapHeight = (chartVertexCount + vertexMapWidth - 1) / vertexMapWidth;
+    nvDebugCheck(vertexMapWidth >= vertexMapHeight);
+
+    int x = 0, y = 0;
+    for (uint i = 0; i < chartVertexCount; i++) {
+        HalfEdge::Vertex * vertex = m_chartMesh->vertexAt(i);
+
+        vertex->tex.x = float(x);
+        vertex->tex.y = float(y);
+
+        x++;
+        if (x == vertexMapWidth) {
+            x = 0;
+            y++;
+            nvCheck(y < vertexMapHeight);
+        }
+    }
+
+#elif 0
+    // Arrange vertices in a rectangle, traversing grid in 3D morton order and laying them down in 2D morton order.
+    vertexMapWidth = ftoi_ceil(sqrtf(float(chartVertexCount)));
+    vertexMapHeight = (chartVertexCount + vertexMapWidth - 1) / vertexMapWidth;
+    nvDebugCheck(vertexMapWidth >= vertexMapHeight);
+
+    int n = 0;
+    uint32 texelCode = 0;
+
+    uint cellsVisited = 0;
+
+    const uint32 cellCodeCount = grid.mortonCount();
+    for (uint32 cellCode = 0; cellCode < cellCodeCount; cellCode++) {
+        int cell = grid.mortonIndex(cellCode);
+        if (cell < 0) continue;
+
+        cellsVisited++;
+
+        const Array<uint> & indexArray = grid.cellArray[cell].indexArray;
+
+        foreach(i, indexArray) {
+            uint idx = indexArray[i];
+            HalfEdge::Vertex * vertex = m_chartMesh->vertexAt(idx);
+
+            //vertex->tex.x = float(n % rectangleWidth) + 0.5f;
+            //vertex->tex.y = float(n / rectangleWidth) + 0.5f;
+
+            // Lay down the points in z order too.
+            uint x, y;
+            do {
+                x = decodeMorton2X(texelCode);
+                y = decodeMorton2Y(texelCode);
+                texelCode++;
+            } while (x >= U32(vertexMapWidth) || y >= U32(vertexMapHeight));
+            
+            vertex->tex.x = float(x);
+            vertex->tex.y = float(y);
+
+            n++;
+        }
+    }
+
+    nvDebugCheck(cellsVisited == grid.cellArray.count());
+    nvDebugCheck(n == chartVertexCount);
+
+#else
+
+    uint texelCount = 0;
+
+    const float positionThreshold = 0.01f;
+    const float normalThreshold = 0.01f;
+
+    uint verticesVisited = 0;
+    uint cellsVisited = 0;
+
+    Array<int> vertexIndexArray;
+    vertexIndexArray.resize(chartVertexCount, -1); // Init all indices to -1.
+
+    // Traverse vertices in morton order. @@ It may be more interesting to sort them based on orientation.
+    const uint cellCodeCount = grid.mortonCount();
+    for (uint cellCode = 0; cellCode < cellCodeCount; cellCode++) {
+        int cell = grid.mortonIndex(cellCode);
+        if (cell < 0) continue;
+
+        cellsVisited++;
+
+        const Array<uint> & indexArray = grid.cellArray[cell].indexArray;
+
+        foreach(i, indexArray) {
+            uint idx = indexArray[i];
+            HalfEdge::Vertex * vertex = m_chartMesh->vertexAt(idx);
+
+            nvDebugCheck(vertexIndexArray[idx] == -1);
+
+            Array<uint> neighbors;
+            grid.gather(vertex->pos, positionThreshold, /*ref*/neighbors);
+
+            // Compare against all nearby vertices, cluster greedily.
+            foreach(j, neighbors) {
+                uint otherIdx = neighbors[j];
+
+                if (vertexIndexArray[otherIdx] != -1) {
+                    HalfEdge::Vertex * otherVertex = m_chartMesh->vertexAt(otherIdx);
+
+                    if (distance(vertex->pos, otherVertex->pos) < positionThreshold &&
+                        distance(vertex->nor, otherVertex->nor) < normalThreshold) 
+                    {
+                        vertexIndexArray[idx] = vertexIndexArray[otherIdx];
+                        break;
+                    }
+                }
+            }
+
+            // If index not assigned, assign new one.
+            if (vertexIndexArray[idx] == -1) {
+                vertexIndexArray[idx] = texelCount++;
+            }
+
+            verticesVisited++;
+        }
+    }
+
+    nvDebugCheck(cellsVisited == grid.cellArray.count());
+    nvDebugCheck(verticesVisited == chartVertexCount);
+
+    vertexMapWidth = ftoi_ceil(sqrtf(float(texelCount)));
+    vertexMapWidth = (vertexMapWidth + 3) & ~3;                             // Width aligned to 4.
+    vertexMapHeight = vertexMapWidth == 0 ? 0 : (texelCount + vertexMapWidth - 1) / vertexMapWidth;
+    //vertexMapHeight = (vertexMapHeight + 3) & ~3;                           // Height aligned to 4.
+    nvDebugCheck(vertexMapWidth >= vertexMapHeight);
+
+    nvDebug("Reduced vertex count from %d to %d.\n", chartVertexCount, texelCount);
+
+#if 0
+    // This lays down the clustered vertices linearly.
+    for (uint i = 0; i < chartVertexCount; i++) {
+        HalfEdge::Vertex * vertex = m_chartMesh->vertexAt(i);
+
+        int idx = vertexIndexArray[i];
+
+        vertex->tex.x = float(idx % vertexMapWidth);
+        vertex->tex.y = float(idx / vertexMapWidth);
+    }
+#else
+    // Lay down the clustered vertices in morton order.
+
+    Array<uint> texelCodes;
+    texelCodes.resize(texelCount);
+
+    // For each texel, assign one morton code.
+    uint texelCode = 0;
+    for (uint i = 0; i < texelCount; i++) {
+        uint x, y;
+        do {
+            x = decodeMorton2X(texelCode);
+            y = decodeMorton2Y(texelCode);
+            texelCode++;
+        } while (x >= U32(vertexMapWidth) || y >= U32(vertexMapHeight));
+
+        texelCodes[i] = texelCode - 1;
+    }
+
+    for (uint i = 0; i < chartVertexCount; i++) {
+        HalfEdge::Vertex * vertex = m_chartMesh->vertexAt(i);
+
+        int idx = vertexIndexArray[i];
+        if (idx != -1) {
+            uint texelCode = texelCodes[idx];
+            uint x = decodeMorton2X(texelCode);
+            uint y = decodeMorton2Y(texelCode);
+
+            vertex->tex.x = float(x);
+            vertex->tex.y = float(y);
+        }
+    }
+
+#endif
+   
+#endif
+
+}
+
+
+
+static void getBoundaryEdges(HalfEdge::Mesh * mesh, Array<HalfEdge::Edge *> & boundaryEdges)
+{
+    nvDebugCheck(mesh != NULL);
+
+    const uint edgeCount = mesh->edgeCount();
+
+    BitArray bitFlags(edgeCount);
+    bitFlags.clearAll();
+
+    boundaryEdges.clear();
+
+    // Search for boundary edges. Mark all the edges that belong to the same boundary.
+    for (uint e = 0; e < edgeCount; e++)
+    {
+        HalfEdge::Edge * startEdge = mesh->edgeAt(e);
+
+        if (startEdge != NULL && startEdge->isBoundary() && bitFlags.bitAt(e) == false)
+        {
+            nvDebugCheck(startEdge->face != NULL);
+            nvDebugCheck(startEdge->pair->face == NULL);
+
+            startEdge = startEdge->pair;
+
+            const HalfEdge::Edge * edge = startEdge;
+            do {
+                nvDebugCheck(edge->face == NULL);
+                nvDebugCheck(bitFlags.bitAt(edge->id/2) == false);
+
+                bitFlags.setBitAt(edge->id / 2);
+                edge = edge->next;
+            } while(startEdge != edge);
+
+            boundaryEdges.append(startEdge);
+        }
+    }
+}
+
+
+bool Chart::closeLoop(uint start, const Array<HalfEdge::Edge *> & loop)
+{
+    const uint vertexCount = loop.count() - start;
+
+    nvDebugCheck(vertexCount >= 3);
+    if (vertexCount < 3) return false;
+
+    nvDebugCheck(loop[start]->vertex->isColocal(loop[start+vertexCount-1]->to()));
+
+    // If the hole is planar, then we add a single face that will be properly triangulated later.
+    // If the hole is not planar, we add a triangle fan with a vertex at the hole centroid.
+    // This is still a bit of a hack. There surely are better hole filling algorithms out there.
+
+    Array<Vector3> points;
+    points.resize(vertexCount);
+    for (uint i = 0; i < vertexCount; i++) {
+        points[i] = loop[start+i]->vertex->pos;
+    }
+
+    bool isPlanar = Fit::isPlanar(vertexCount, points.buffer());
+
+    if (isPlanar) {
+        // Add face and connect edges.
+        HalfEdge::Face * face = m_unifiedMesh->addFace();
+        for (uint i = 0; i < vertexCount; i++) {
+            HalfEdge::Edge * edge = loop[start + i];
+            
+            edge->face = face;
+            edge->setNext(loop[start + (i + 1) % vertexCount]);
+        }
+        face->edge = loop[start];
+
+        nvDebugCheck(face->isValid());
+    }
+    else {
+        // If the polygon is not planar, we just cross our fingers, and hope this will work:
+
+        // Compute boundary centroid:
+        Vector3 centroidPos(0);
+
+        for (uint i = 0; i < vertexCount; i++) {
+            centroidPos += points[i];
+        }
+
+        centroidPos *= (1.0f / vertexCount);
+
+        HalfEdge::Vertex * centroid = m_unifiedMesh->addVertex(centroidPos);
+
+        // Add one pair of edges for each boundary vertex.
+        for (uint j = vertexCount-1, i = 0; i < vertexCount; j = i++) {
+            HalfEdge::Face * face = m_unifiedMesh->addFace(centroid->id, loop[start+j]->vertex->id, loop[start+i]->vertex->id);
+            nvDebugCheck(face != NULL);
+        }
+    }
+
+    return true;
+}
+
+
+bool Chart::closeHoles()
+{
+    nvDebugCheck(!m_isVertexMapped);
+
+    Array<HalfEdge::Edge *> boundaryEdges;
+    getBoundaryEdges(m_unifiedMesh.ptr(), boundaryEdges);
+
+    uint boundaryCount = boundaryEdges.count();
+    if (boundaryCount <= 1)
+    {
+        // Nothing to close.
+        return true;
+    }
+
+    // Compute lengths and areas.
+    Array<float> boundaryLengths;
+    //Array<Vector3> boundaryCentroids;
+
+    for (uint i = 0; i < boundaryCount; i++)
+    {
+        const HalfEdge::Edge * startEdge = boundaryEdges[i];
+        nvCheck(startEdge->face == NULL);
+
+        //float boundaryEdgeCount = 0;
+        float boundaryLength = 0.0f;
+        //Vector3 boundaryCentroid(zero);
+
+        const HalfEdge::Edge * edge = startEdge;
+        do {
+            Vector3 t0 = edge->from()->pos;
+            Vector3 t1 = edge->to()->pos;
+
+            //boundaryEdgeCount++;
+            boundaryLength += length(t1 - t0);
+            //boundaryCentroid += edge->vertex()->pos;
+
+            edge = edge->next;
+        } while(edge != startEdge);
+
+        boundaryLengths.append(boundaryLength);
+        //boundaryCentroids.append(boundaryCentroid / boundaryEdgeCount);
+    }
+
+
+    // Find disk boundary.
+    uint diskBoundary = 0;
+    float maxLength = boundaryLengths[0];
+
+    for (uint i = 1; i < boundaryCount; i++)
+    {
+        if (boundaryLengths[i] > maxLength)
+        {
+            maxLength = boundaryLengths[i];
+            diskBoundary = i;
+        }
+    }
+
+
+    // Sew holes.
+    /*for (uint i = 0; i < boundaryCount; i++)
+    {
+        if (diskBoundary == i)
+        {
+            // Skip disk boundary.
+            continue;
+        }
+
+        HalfEdge::Edge * startEdge = boundaryEdges[i];
+        nvCheck(startEdge->face() == NULL);
+
+        boundaryEdges[i] = m_unifiedMesh->sewBoundary(startEdge);
+    }
+
+    exportMesh(m_unifiedMesh.ptr(), "debug_sewn.obj");*/
+
+    //bool hasNewHoles = false;
+
+    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    // @@ Close loop is wrong, after closing a loop, we do not only have to add the face, but make sure that every edge in he loop is pointing to the right place.
+
+    // Close holes.
+    for (uint i = 0; i < boundaryCount; i++)
+    {
+        if (diskBoundary == i)
+        {
+            // Skip disk boundary.
+            continue;
+        }
+
+        HalfEdge::Edge * startEdge = boundaryEdges[i];
+        nvDebugCheck(startEdge != NULL);
+        nvDebugCheck(startEdge->face == NULL);
+
+#if 1
+        Array<HalfEdge::Vertex *> vertexLoop;
+        Array<HalfEdge::Edge *> edgeLoop;
+
+        HalfEdge::Edge * edge = startEdge;
+        do {
+            HalfEdge::Vertex * vertex = edge->next->vertex; // edge->to()
+
+            uint i;
+            for (i = 0; i < vertexLoop.count(); i++) {
+                if (vertex->isColocal(vertexLoop[i])) {
+                    break;
+                }
+            }
+            
+            bool isCrossing = (i != vertexLoop.count());
+
+            if (isCrossing) {
+
+                HalfEdge::Edge * prev = edgeLoop[i];    // Previous edge before the loop.
+                HalfEdge::Edge * next = edge->next;   // Next edge after the loop.
+
+                nvDebugCheck(prev->to()->isColocal(next->from()));
+
+                // Close loop.
+                edgeLoop.append(edge);
+                closeLoop(i+1, edgeLoop);
+
+                // Link boundary loop.
+                prev->setNext(next);
+                vertex->setEdge(next);
+
+                // Start over again.
+                vertexLoop.clear();
+                edgeLoop.clear();
+                
+                edge = startEdge;
+                vertex = edge->to();
+            }
+
+            vertexLoop.append(vertex);
+            edgeLoop.append(edge);
+
+            edge = edge->next;
+        } while(edge != startEdge);
+
+        closeLoop(0, edgeLoop);
+#endif
+
+        /*
+
+        // Add face and connect boundary edges.
+        HalfEdge::Face * face = m_unifiedMesh->addFace();
+        face->setEdge(startEdge);
+
+        HalfEdge::Edge * edge = startEdge;
+        do {
+            edge->setFace(face);
+
+            edge = edge->next();
+        } while(edge != startEdge);
+
+        */
+
+
+        /*
+        uint edgeCount = 0;
+        HalfEdge::Edge * edge = startEdge;
+        do {
+            edgeCount++;
+            edge = edge->next();
+        } while(edge != startEdge);
+
+
+
+        // Count edges in this boundary.
+        uint edgeCount = 0;
+        HalfEdge::Edge * edge = startEdge;
+        do {
+            edgeCount++;
+            edge = edge->next();
+        } while(edge != startEdge);
+
+        // Trivial hole, fill with one triangle. This actually works for all convex boundaries with non colinear vertices.
+        if (edgeCount == 3) {
+            // Add face and connect boundary edges.
+            HalfEdge::Face * face = m_unifiedMesh->addFace();
+            face->setEdge(startEdge);
+
+            edge = startEdge;
+            do {
+                edge->setFace(face);
+
+                edge = edge->next();
+            } while(edge != startEdge);
+
+            // @@ Implement the above using addFace, it should now work with existing edges, as long as their face pointers is zero.
+
+        }
+        else {
+            // Ideally we should:
+            // - compute best fit plane of boundary vertices.
+            // - project boundary polygon onto plane.
+            // - triangulate boundary polygon.
+            // - add faces of the resulting triangulation.
+
+            // I don't have a good triangulator available. A more simple solution that works in more (but not all) cases:
+            // - compute boundary centroid.
+            // - add vertex centroid.
+            // - connect centroid vertex with boundary vertices.
+            // - connect radial edges with boundary edges.
+
+            // This should work for non-convex boundaries with colinear vertices as long as the kernel of the polygon is not empty.
+
+            // Compute boundary centroid:
+            Vector3 centroid_pos(0);
+            Vector2 centroid_tex(0);
+
+            HalfEdge::Edge * edge = startEdge;
+            do {
+                centroid_pos += edge->vertex()->pos;
+                centroid_tex += edge->vertex()->tex;
+                edge = edge->next();
+            } while(edge != startEdge);
+
+            centroid_pos *= (1.0f / edgeCount);
+            centroid_tex *= (1.0f / edgeCount);
+
+            HalfEdge::Vertex * centroid = m_unifiedMesh->addVertex(centroid_pos);
+            centroid->tex = centroid_tex;
+
+            // Add one pair of edges for each boundary vertex.
+            edge = startEdge;
+            do {
+                HalfEdge::Edge * next = edge->next();
+
+                nvCheck(edge->face() == NULL);
+                HalfEdge::Face * face = m_unifiedMesh->addFace(centroid->id(), edge->from()->id(), edge->to()->id());
+                
+                if (face != NULL) {
+                    nvCheck(edge->face() == face);
+                }
+                else {
+                    hasNewHoles = true;
+                }
+
+                edge = next;
+            } while(edge != startEdge);
+        }
+        */
+    }
+
+    /*nvDebugCheck(!hasNewHoles);
+
+    if (hasNewHoles) {
+        // Link boundary again, in case closeHoles created new holes!
+        m_unifiedMesh->linkBoundary();
+    }*/
+
+    // Because some algorithms do not expect sparse edge buffers.
+    //m_unifiedMesh->compactEdges();
+
+    // In case we messed up:
+    //m_unifiedMesh->linkBoundary();
+
+    getBoundaryEdges(m_unifiedMesh.ptr(), boundaryEdges);
+
+    boundaryCount = boundaryEdges.count();
+    nvDebugCheck(boundaryCount == 1);
+
+    //exportMesh(m_unifiedMesh.ptr(), "debug_hole_filled.obj");
+
+    return boundaryCount == 1;
+}
+
+
+// Transfer parameterization from unified mesh to chart mesh.
+void Chart::transferParameterization() {
+    nvDebugCheck(!m_isVertexMapped);
+
+    uint vertexCount = m_chartMesh->vertexCount();
+    for (uint v = 0; v < vertexCount; v++) {
+        HalfEdge::Vertex * vertex = m_chartMesh->vertexAt(v);
+        HalfEdge::Vertex * unifiedVertex = m_unifiedMesh->vertexAt(mapChartVertexToUnifiedVertex(v));
+        vertex->tex = unifiedVertex->tex;
+    }
+}
+
+float Chart::computeSurfaceArea() const {
+    return nv::computeSurfaceArea(m_chartMesh.ptr()) * scale;
+}
+
+float Chart::computeParametricArea() const {
+    // This only makes sense in parameterized meshes.
+    nvDebugCheck(m_isDisk);            
+    nvDebugCheck(!m_isVertexMapped);
+
+    return nv::computeParametricArea(m_chartMesh.ptr());
+}
+
+Vector2 Chart::computeParametricBounds() const {
+    // This only makes sense in parameterized meshes.
+    nvDebugCheck(m_isDisk);
+    nvDebugCheck(!m_isVertexMapped);
+
+    Box bounds;
+    bounds.clearBounds();
+
+    uint vertexCount = m_chartMesh->vertexCount();
+    for (uint v = 0; v < vertexCount; v++) {
+        HalfEdge::Vertex * vertex = m_chartMesh->vertexAt(v);
+        bounds.addPointToBounds(Vector3(vertex->tex, 0));
+    }
+
+    return bounds.extents().xy();
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/param/Atlas.h b/thirdparty/thekla_atlas/nvmesh/param/Atlas.h
new file mode 100644
index 0000000000..41cfaea9cb
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/Atlas.h
@@ -0,0 +1,186 @@
+// Copyright NVIDIA Corporation 2006 -- Ignacio Castano <icastano@nvidia.com>
+
+#pragma once
+#ifndef NV_MESH_ATLAS_H
+#define NV_MESH_ATLAS_H
+
+#include "nvcore/Array.h"
+#include "nvcore/Ptr.h"
+#include "nvmath/Vector.h"
+#include "nvmesh/nvmesh.h"
+#include "nvmesh/halfedge/Mesh.h"
+
+
+namespace nv
+{
+    namespace HalfEdge { class Mesh; }
+
+    class Chart;
+    class MeshCharts;
+    class VertexMap;
+
+    struct SegmentationSettings
+    {
+        SegmentationSettings();
+
+        float maxChartArea;
+        float maxBoundaryLength;
+
+        float proxyFitMetricWeight;
+        float roundnessMetricWeight;
+        float straightnessMetricWeight;
+        float normalSeamMetricWeight;
+        float textureSeamMetricWeight;
+    };
+
+
+    /// An atlas is a set of charts.
+    class Atlas
+    {
+    public:
+
+        Atlas();
+        ~Atlas();
+
+        uint meshCount() const { return m_meshChartsArray.count(); }
+        const MeshCharts * meshAt(uint i) const { return m_meshChartsArray[i]; }
+        MeshCharts * meshAt(uint i) { return m_meshChartsArray[i]; }
+
+        uint chartCount() const;
+        const Chart * chartAt(uint i) const;
+        Chart * chartAt(uint i);
+
+        // Add mesh charts and takes ownership.
+        void addMeshCharts(MeshCharts * meshCharts);
+
+        void extractCharts(const HalfEdge::Mesh * mesh);
+        void computeCharts(const HalfEdge::Mesh * mesh, const SegmentationSettings & settings, const Array<uint> & unchartedMaterialArray);
+
+
+        // Compute a trivial seamless texture similar to ZBrush.
+        //bool computeSeamlessTextureAtlas(bool groupFaces = true, bool scaleTiles = false, uint w = 1024, uint h = 1024);
+
+        void parameterizeCharts();
+
+        // Pack charts in the smallest possible rectangle.
+        float packCharts(int quality, float texelArea, bool blockAlign, bool conservative);
+        void setFailed() { failed = true; }
+        bool hasFailed() const { return failed; }
+
+    private:
+
+        bool failed;
+        Array<MeshCharts *> m_meshChartsArray;
+
+    };
+
+
+    // Set of charts corresponding to a single mesh.
+    class MeshCharts
+    {
+    public:
+        MeshCharts(const HalfEdge::Mesh * mesh);
+        ~MeshCharts();
+
+        uint chartCount() const { return m_chartArray.count(); }
+        uint vertexCount () const { return m_totalVertexCount; }
+
+        const Chart * chartAt(uint i) const { return m_chartArray[i]; }
+        Chart * chartAt(uint i) { return m_chartArray[i]; }
+
+        void computeVertexMap(const Array<uint> & unchartedMaterialArray);
+
+        // Extract the charts of the input mesh.
+        void extractCharts();
+
+        // Compute charts using a simple segmentation algorithm.
+        void computeCharts(const SegmentationSettings & settings, const Array<uint> & unchartedMaterialArray);
+
+        void parameterizeCharts();
+
+        uint faceChartAt(uint i) const { return m_faceChart[i]; }
+        uint faceIndexWithinChartAt(uint i) const { return m_faceIndex[i]; }
+
+        uint vertexCountBeforeChartAt(uint i) const { return m_chartVertexCountPrefixSum[i]; }
+
+    private:
+
+        const HalfEdge::Mesh * m_mesh;
+
+        Array<Chart *> m_chartArray;
+        
+        Array<uint> m_chartVertexCountPrefixSum;
+        uint m_totalVertexCount;
+
+        Array<uint> m_faceChart; // the chart of every face of the input mesh.
+        Array<uint> m_faceIndex; // the index within the chart for every face of the input mesh.
+    };
+
+
+    /// A chart is a connected set of faces with a certain topology (usually a disk).
+    class Chart
+    {
+    public:
+
+        Chart();
+
+        void build(const HalfEdge::Mesh * originalMesh, const Array<uint> & faceArray);
+        void buildVertexMap(const HalfEdge::Mesh * originalMesh, const Array<uint> & unchartedMaterialArray);
+
+        bool closeHoles();
+
+        bool isDisk() const { return m_isDisk; }
+        bool isVertexMapped() const { return m_isVertexMapped; }
+
+        uint vertexCount() const { return m_chartMesh->vertexCount(); }
+        uint colocalVertexCount() const { return m_unifiedMesh->vertexCount(); }
+
+        uint faceCount() const { return m_faceArray.count(); }
+        uint faceAt(uint i) const { return m_faceArray[i]; }
+
+        const HalfEdge::Mesh * chartMesh() const { return m_chartMesh.ptr(); }
+        HalfEdge::Mesh * chartMesh() { return m_chartMesh.ptr(); }
+        const HalfEdge::Mesh * unifiedMesh() const { return m_unifiedMesh.ptr(); }
+        HalfEdge::Mesh * unifiedMesh() { return m_unifiedMesh.ptr(); }
+
+        //uint vertexIndex(uint i) const { return m_vertexIndexArray[i]; }
+
+        uint mapChartVertexToOriginalVertex(uint i) const { return m_chartToOriginalMap[i]; }
+        uint mapChartVertexToUnifiedVertex(uint i) const { return m_chartToUnifiedMap[i]; }
+
+        const Array<uint> & faceArray() const { return m_faceArray; }
+
+        void transferParameterization();
+
+        float computeSurfaceArea() const;
+        float computeParametricArea() const;
+        Vector2 computeParametricBounds() const;
+
+
+        float scale = 1.0f;
+        uint vertexMapWidth;
+        uint vertexMapHeight;
+
+    private:
+
+        bool closeLoop(uint start, const Array<HalfEdge::Edge *> & loop);
+
+        // Chart mesh.
+        AutoPtr<HalfEdge::Mesh> m_chartMesh;
+        AutoPtr<HalfEdge::Mesh> m_unifiedMesh;
+
+        bool m_isDisk;
+        bool m_isVertexMapped;
+
+        // List of faces of the original mesh that belong to this chart.
+        Array<uint> m_faceArray;
+
+        // Map vertices of the chart mesh to vertices of the original mesh.
+        Array<uint> m_chartToOriginalMap;
+
+        Array<uint> m_chartToUnifiedMap;
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_ATLAS_H
diff --git a/thirdparty/thekla_atlas/nvmesh/param/AtlasBuilder.cpp b/thirdparty/thekla_atlas/nvmesh/param/AtlasBuilder.cpp
new file mode 100644
index 0000000000..bd2140c2f3
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/AtlasBuilder.cpp
@@ -0,0 +1,1320 @@
+// This code is in the public domain -- castano@gmail.com
+
+#include "nvmesh.h" // pch
+
+#include "AtlasBuilder.h"
+#include "Util.h"
+
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Face.h"
+#include "nvmesh/halfedge/Vertex.h"
+
+#include "nvmath/Matrix.inl"
+#include "nvmath/Vector.inl"
+
+//#include "nvcore/IntroSort.h"
+#include "nvcore/Array.inl"
+
+#include <algorithm> // std::sort
+
+#include <float.h> // FLT_MAX
+#include <limits.h> // UINT_MAX
+
+using namespace nv;
+
+namespace
+{
+
+    // Dummy implementation of a priority queue using sort at insertion.
+    // - Insertion is o(n)
+    // - Smallest element goes at the end, so that popping it is o(1).
+    // - Resorting is n*log(n)
+    // @@ Number of elements in the queue is usually small, and we'd have to rebalance often. I'm not sure it's worth implementing a heap.
+    // @@ Searcing at removal would remove the need for sorting when priorities change.
+    struct PriorityQueue
+    {
+        PriorityQueue(uint size = UINT_MAX) : maxSize(size) {}
+
+        void push(float priority, uint face) {
+            uint i = 0;
+            const uint count = pairs.count();
+            for (; i < count; i++) {
+                if (pairs[i].priority > priority) break;
+            }
+
+            Pair p = { priority, face };
+            pairs.insertAt(i, p);
+
+            if (pairs.count() > maxSize) {
+                pairs.removeAt(0);
+            }
+        }
+
+        // push face out of order, to be sorted later.
+        void push(uint face) {
+            Pair p = { 0.0f, face };
+            pairs.append(p);
+        }
+
+        uint pop() {
+            uint f = pairs.back().face;
+            pairs.pop_back();
+            return f;
+        }
+
+        void sort() {
+            //nv::sort(pairs); // @@ My intro sort appears to be much slower than it should!
+            std::sort(pairs.buffer(), pairs.buffer() + pairs.count());
+        }
+
+        void clear() {
+            pairs.clear();
+        }
+
+        uint count() const { return pairs.count(); }
+
+        float firstPriority() const { return pairs.back().priority; }
+
+
+        const uint maxSize;
+        
+        struct Pair {
+            bool operator <(const Pair & p) const { return priority > p.priority; } // !! Sort in inverse priority order!
+            float priority;
+            uint face;
+        };
+        
+
+        Array<Pair> pairs;
+    };
+
+    static bool isNormalSeam(const HalfEdge::Edge * edge) {
+        return (edge->vertex->nor != edge->pair->next->vertex->nor || edge->next->vertex->nor != edge->pair->vertex->nor);
+    }
+
+    static bool isTextureSeam(const HalfEdge::Edge * edge) {
+        return (edge->vertex->tex != edge->pair->next->vertex->tex || edge->next->vertex->tex != edge->pair->vertex->tex);
+    }
+
+} // namespace
+
+
+struct nv::ChartBuildData
+{
+    ChartBuildData(int id) : id(id) {
+        planeNormal = Vector3(0);
+        centroid = Vector3(0);
+        coneAxis = Vector3(0);
+        coneAngle = 0;
+        area = 0;
+        boundaryLength = 0;
+        normalSum = Vector3(0);
+        centroidSum = Vector3(0);
+    }
+
+    int id;
+
+    // Proxy info:
+    Vector3 planeNormal;
+    Vector3 centroid;
+    Vector3 coneAxis;
+    float coneAngle;
+    
+    float area;
+    float boundaryLength;
+    Vector3 normalSum;
+    Vector3 centroidSum;
+    
+    Array<uint> seeds;  // @@ These could be a pointers to the HalfEdge faces directly.
+	Array<uint> faces;
+    PriorityQueue candidates;
+};
+
+
+
+AtlasBuilder::AtlasBuilder(const HalfEdge::Mesh * m) : mesh(m), facesLeft(m->faceCount())
+{
+    const uint faceCount = m->faceCount();
+    faceChartArray.resize(faceCount, -1);
+    faceCandidateArray.resize(faceCount, -1);
+
+    // @@ Floyd for the whole mesh is too slow. We could compute floyd progressively per patch as the patch grows. We need a better solution to compute most central faces.
+    //computeShortestPaths();
+
+    // Precompute edge lengths and face areas.
+    uint edgeCount = m->edgeCount();
+    edgeLengths.resize(edgeCount);
+
+    for (uint i = 0; i < edgeCount; i++) {
+        uint id = m->edgeAt(i)->id;
+        nvDebugCheck(id / 2 == i);
+
+        edgeLengths[i] = m->edgeAt(i)->length();
+    }
+
+    faceAreas.resize(faceCount);
+    for (uint i = 0; i < faceCount; i++) {
+        faceAreas[i] = m->faceAt(i)->area();
+    }
+}
+
+AtlasBuilder::~AtlasBuilder()
+{
+    const uint chartCount = chartArray.count();
+    for (uint i = 0; i < chartCount; i++)
+    {
+        delete chartArray[i];
+    }
+}
+
+
+void AtlasBuilder::markUnchartedFaces(const Array<uint> & unchartedFaces)
+{
+    const uint unchartedFaceCount = unchartedFaces.count();
+    for (uint i = 0; i < unchartedFaceCount; i++){ 
+        uint f = unchartedFaces[i];
+        faceChartArray[f] = -2;
+        //faceCandidateArray[f] = -2; // @@ ?
+
+        removeCandidate(f);
+    }
+
+    nvDebugCheck(facesLeft >= unchartedFaceCount);
+    facesLeft -= unchartedFaceCount;
+}
+
+
+void AtlasBuilder::computeShortestPaths()
+{
+    const uint faceCount = mesh->faceCount();
+    shortestPaths.resize(faceCount*faceCount, FLT_MAX);
+
+    // Fill edges:
+    for (uint i = 0; i < faceCount; i++)
+    {
+        shortestPaths[i*faceCount + i] = 0.0f;
+
+        const HalfEdge::Face * face_i = mesh->faceAt(i);
+        Vector3 centroid_i = face_i->centroid();
+
+        for (HalfEdge::Face::ConstEdgeIterator it(face_i->edges()); !it.isDone(); it.advance())
+        {
+            const HalfEdge::Edge * edge = it.current();
+
+            if (!edge->isBoundary())
+            {
+                const HalfEdge::Face * face_j = edge->pair->face;
+
+                uint j = face_j->id;
+                Vector3 centroid_j = face_j->centroid();
+
+                shortestPaths[i*faceCount + j] = shortestPaths[j*faceCount + i] = length(centroid_i - centroid_j);
+            }
+        }
+    }
+
+    // Use Floyd-Warshall algorithm to compute all paths:
+    for (uint k = 0; k < faceCount; k++)
+    {
+        for (uint i = 0; i < faceCount; i++)
+        {
+            for (uint j = 0; j < faceCount; j++)
+            {
+                shortestPaths[i*faceCount + j] = min(shortestPaths[i*faceCount + j], shortestPaths[i*faceCount + k]+shortestPaths[k*faceCount + j]);
+            }
+        }
+    }
+}
+
+
+void AtlasBuilder::placeSeeds(float threshold, uint maxSeedCount)
+{
+    // Instead of using a predefiened number of seeds:
+    // - Add seeds one by one, growing chart until a certain treshold.
+    // - Undo charts and restart growing process.
+
+    // @@ How can we give preference to faces far from sharp features as in the LSCM paper?
+    //   - those points can be found using a simple flood filling algorithm.
+    //   - how do we weight the probabilities?
+
+    for (uint i = 0; i < maxSeedCount; i++)
+    {
+        if (facesLeft == 0) {
+            // No faces left, stop creating seeds.
+            break;
+        }
+
+        createRandomChart(threshold);
+    }
+}
+
+
+void AtlasBuilder::createRandomChart(float threshold)
+{
+    ChartBuildData * chart = new ChartBuildData(chartArray.count());
+    chartArray.append(chart);
+
+    // Pick random face that is not used by any chart yet.
+    uint randomFaceIdx = rand.getRange(facesLeft - 1);
+    uint i = 0;
+    for (uint f = 0; f != randomFaceIdx; f++, i++)
+    {
+        while (faceChartArray[i] != -1) i++;
+    }
+    while (faceChartArray[i] != -1) i++;
+
+    chart->seeds.append(i);
+
+    addFaceToChart(chart, i, true);
+
+    // Grow the chart as much as possible within the given threshold.
+    growChart(chart, threshold * 0.5f, facesLeft);
+    //growCharts(threshold - threshold * 0.75f / chartCount(), facesLeft);
+}
+
+void AtlasBuilder::addFaceToChart(ChartBuildData * chart, uint f, bool recomputeProxy)
+{
+    // Add face to chart.
+    chart->faces.append(f);
+
+    nvDebugCheck(faceChartArray[f] == -1);
+    faceChartArray[f] = chart->id;
+
+    facesLeft--;
+
+    // Update area and boundary length.
+    chart->area = evaluateChartArea(chart, f);
+    chart->boundaryLength = evaluateBoundaryLength(chart, f);
+    chart->normalSum = evaluateChartNormalSum(chart, f);
+    chart->centroidSum = evaluateChartCentroidSum(chart, f);
+
+    if (recomputeProxy) {
+        // Update proxy and candidate's priorities.
+        updateProxy(chart);
+    }
+
+    // Update candidates.
+    removeCandidate(f);
+    updateCandidates(chart, f);
+    updatePriorities(chart);
+}
+
+// @@ Get N best candidates in one pass.
+const AtlasBuilder::Candidate & AtlasBuilder::getBestCandidate() const
+{
+    uint best = 0;
+    float bestCandidateMetric = FLT_MAX;
+
+    const uint candidateCount = candidateArray.count();
+    nvCheck(candidateCount > 0);
+
+    for (uint i = 0; i < candidateCount; i++)
+    {
+        const Candidate & candidate = candidateArray[i];
+    
+        if (candidate.metric < bestCandidateMetric) {
+            bestCandidateMetric = candidate.metric;
+            best = i;
+        }
+    }
+
+    return candidateArray[best];
+}
+
+
+// Returns true if any of the charts can grow more.
+bool AtlasBuilder::growCharts(float threshold, uint faceCount)
+{
+#if 1 // Using one global list.
+
+    faceCount = min(faceCount, facesLeft);
+
+    for (uint i = 0; i < faceCount; i++)
+    {
+        const Candidate & candidate = getBestCandidate();
+        
+        if (candidate.metric > threshold) {
+            return false; // Can't grow more.
+        }
+
+        addFaceToChart(candidate.chart, candidate.face);
+    }
+
+    return facesLeft != 0; // Can continue growing.
+
+#else // Using one list per chart.
+    bool canGrowMore = false;
+
+    const uint chartCount = chartArray.count();
+    for (uint i = 0; i < chartCount; i++)
+    {
+        if (growChart(chartArray[i], threshold, faceCount))
+        {
+            canGrowMore = true;
+        }
+    }
+
+    return canGrowMore;
+#endif
+}
+
+bool AtlasBuilder::growChart(ChartBuildData * chart, float threshold, uint faceCount)
+{
+    // Try to add faceCount faces within threshold to chart.
+    for (uint i = 0; i < faceCount; )
+    {
+        if (chart->candidates.count() == 0 || chart->candidates.firstPriority() > threshold)
+        {
+            return false;
+        }
+
+        uint f = chart->candidates.pop();
+        if (faceChartArray[f] == -1)
+        {
+            addFaceToChart(chart, f);
+            i++;
+        }
+    }
+
+    if (chart->candidates.count() == 0 || chart->candidates.firstPriority() > threshold)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+
+void AtlasBuilder::resetCharts()
+{
+    const uint faceCount = mesh->faceCount();
+    for (uint i = 0; i < faceCount; i++)
+    {
+        faceChartArray[i] = -1;
+        faceCandidateArray[i] = -1;
+    }
+
+    facesLeft = faceCount;
+
+    candidateArray.clear();
+
+    const uint chartCount = chartArray.count();
+    for (uint i = 0; i < chartCount; i++)
+    {
+        ChartBuildData * chart = chartArray[i];
+
+        const uint seed = chart->seeds.back();
+
+        chart->area = 0.0f;
+        chart->boundaryLength = 0.0f;
+        chart->normalSum = Vector3(0);
+        chart->centroidSum = Vector3(0);
+
+        chart->faces.clear();
+        chart->candidates.clear();
+
+        addFaceToChart(chart, seed);
+    }
+}
+
+
+void AtlasBuilder::updateCandidates(ChartBuildData * chart, uint f)
+{
+    const HalfEdge::Face * face = mesh->faceAt(f);
+
+    // Traverse neighboring faces, add the ones that do not belong to any chart yet.
+    for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+    {
+        const HalfEdge::Edge * edge = it.current()->pair;
+
+        if (!edge->isBoundary())
+        {
+            uint f = edge->face->id;
+
+            if (faceChartArray[f] == -1)
+            {
+                chart->candidates.push(f);
+            }
+        }
+    }
+}
+
+
+void AtlasBuilder::updateProxies()
+{
+    const uint chartCount = chartArray.count();
+    for (uint i = 0; i < chartCount; i++)
+    {
+        updateProxy(chartArray[i]);
+    }
+}
+
+
+namespace {
+
+    float absoluteSum(Vector4::Arg v)
+    {
+        return fabs(v.x) + fabs(v.y) + fabs(v.z) + fabs(v.w);
+    }
+
+    //#pragma message(NV_FILE_LINE "FIXME: Using the c=cos(teta) substitution, the equation system becomes linear and we can avoid the newton solver.")
+
+    struct ConeFitting
+    {
+        ConeFitting(const HalfEdge::Mesh * m, float g, float tf, float tx) : mesh(m), gamma(g), tolf(tf), tolx(tx), F(0), D(0), H(0) {
+        }
+
+        void addTerm(Vector3 N, float A)
+        {
+            const float c = cosf(X.w);
+            const float s = sinf(X.w);
+            const float tmp = dot(X.xyz(), N) - c;
+
+            F += tmp * tmp;
+
+            D.x += 2 * X.x * tmp;
+            D.y += 2 * X.y * tmp;
+            D.z += 2 * X.z * tmp;
+            D.w += 2 * s * tmp;
+
+            H(0,0) = 2 * X.x * N.x + 2 * tmp;
+            H(0,1) = 2 * X.x * N.y;
+            H(0,2) = 2 * X.x * N.z;
+            H(0,3) = 2 * X.x * s;
+
+            H(1,0) = 2 * X.y * N.x;
+            H(1,1) = 2 * X.y * N.y + 2 * tmp;
+            H(1,2) = 2 * X.y * N.z;
+            H(1,3) = 2 * X.y * s;
+
+            H(2,0) = 2 * X.z * N.x;
+            H(2,1) = 2 * X.z * N.y;
+            H(2,2) = 2 * X.z * N.z + 2 * tmp;
+            H(2,3) = 2 * X.z * s;
+
+            H(3,0) = 2 * s * N.x;
+            H(3,1) = 2 * s * N.y;
+            H(3,2) = 2 * s * N.z;
+            H(3,3) = 2 * s * s + 2 * c * tmp;
+        }
+
+        Vector4 solve(ChartBuildData * chart, Vector4 start)
+        {
+            const uint faceCount = chart->faces.count();
+
+            X = start;
+            
+            Vector4 dX;
+
+            do {
+                for (uint i = 0; i < faceCount; i++)
+                {
+                    const HalfEdge::Face * face = mesh->faceAt(chart->faces[i]);
+
+                    addTerm(face->normal(), face->area());
+                }
+
+                Vector4 dX;
+                //solveKramer(H, D, &dX);
+                solveLU(H, D, &dX);
+
+                // @@ Do a full newton step and reduce by half if F doesn't decrease.
+                X -= gamma * dX;
+
+                // Constrain normal to be normalized.
+                X = Vector4(normalize(X.xyz()), X.w);
+                
+            } while(absoluteSum(D) > tolf || absoluteSum(dX) > tolx);
+
+            return X;
+        }
+
+        HalfEdge::Mesh const * const mesh;
+        const float gamma;
+        const float tolf;
+        const float tolx;
+
+        Vector4 X;
+
+        float F;
+        Vector4 D;
+        Matrix H;
+    };
+
+    // Unnormalized face normal assuming it's a triangle.
+    static Vector3 triangleNormal(const HalfEdge::Face * face)
+    {
+        Vector3 p0 = face->edge->vertex->pos;
+        Vector3 p1 = face->edge->next->vertex->pos;
+        Vector3 p2 = face->edge->next->next->vertex->pos;
+
+        Vector3 e0 = p2 - p0;
+        Vector3 e1 = p1 - p0;
+
+        return normalizeSafe(cross(e0, e1), Vector3(0), 0.0f);
+    }
+
+    static Vector3 triangleNormalAreaScaled(const HalfEdge::Face * face)
+    {
+        Vector3 p0 = face->edge->vertex->pos;
+        Vector3 p1 = face->edge->next->vertex->pos;
+        Vector3 p2 = face->edge->next->next->vertex->pos;
+
+        Vector3 e0 = p2 - p0;
+        Vector3 e1 = p1 - p0;
+
+        return cross(e0, e1);
+    }
+
+    // Average of the edge midpoints weighted by the edge length.
+    // I want a point inside the triangle, but closer to the cirumcenter.
+    static Vector3 triangleCenter(const HalfEdge::Face * face)
+    {
+        Vector3 p0 = face->edge->vertex->pos;
+        Vector3 p1 = face->edge->next->vertex->pos;
+        Vector3 p2 = face->edge->next->next->vertex->pos;
+
+        float l0 = length(p1 - p0);
+        float l1 = length(p2 - p1);
+        float l2 = length(p0 - p2);
+
+        Vector3 m0 = (p0 + p1) * l0 / (l0 + l1 + l2);
+        Vector3 m1 = (p1 + p2) * l1 / (l0 + l1 + l2);
+        Vector3 m2 = (p2 + p0) * l2 / (l0 + l1 + l2);
+
+        return m0 + m1 + m2;
+    }
+
+} // namespace
+
+void AtlasBuilder::updateProxy(ChartBuildData * chart)
+{
+    //#pragma message(NV_FILE_LINE "TODO: Use best fit plane instead of average normal.")
+
+    chart->planeNormal = normalizeSafe(chart->normalSum, Vector3(0), 0.0f);
+    chart->centroid = chart->centroidSum / float(chart->faces.count());
+
+    //#pragma message(NV_FILE_LINE "TODO: Experiment with conic fitting.")
+
+    // F = (Nc*Nt - cos Oc)^2 = (x*Nt_x + y*Nt_y + z*Nt_z - cos w)^2
+    // dF/dx = 2 * x * (x*Nt_x + y*Nt_y + z*Nt_z - cos w)
+    // dF/dy = 2 * y * (x*Nt_x + y*Nt_y + z*Nt_z - cos w)
+    // dF/dz = 2 * z * (x*Nt_x + y*Nt_y + z*Nt_z - cos w)
+    // dF/dw = 2 * sin w * (x*Nt_x + y*Nt_y + z*Nt_z - cos w)
+
+    // JacobianMatrix({
+    // 2 * x * (x*Nt_x + y*Nt_y + z*Nt_z - Cos(w)),
+    // 2 * y * (x*Nt_x + y*Nt_y + z*Nt_z - Cos(w)),
+    // 2 * z * (x*Nt_x + y*Nt_y + z*Nt_z - Cos(w)),
+    // 2 * Sin(w) * (x*Nt_x + y*Nt_y + z*Nt_z - Cos(w))}, {x,y,z,w})
+
+    // H[0,0] = 2 * x * Nt_x + 2 * (x*Nt_x + y*Nt_y + z*Nt_z - cos(w));
+    // H[0,1] = 2 * x * Nt_y;
+    // H[0,2] = 2 * x * Nt_z;
+    // H[0,3] = 2 * x * sin(w);
+
+    // H[1,0] = 2 * y * Nt_x;
+    // H[1,1] = 2 * y * Nt_y + 2 * (x*Nt_x + y*Nt_y + z*Nt_z - cos(w));
+    // H[1,2] = 2 * y * Nt_z;
+    // H[1,3] = 2 * y * sin(w);
+
+    // H[2,0] = 2 * z * Nt_x;
+    // H[2,1] = 2 * z * Nt_y;
+    // H[2,2] = 2 * z * Nt_z + 2 * (x*Nt_x + y*Nt_y + z*Nt_z - cos(w));
+    // H[2,3] = 2 * z * sin(w);
+
+    // H[3,0] = 2 * sin(w) * Nt_x;
+    // H[3,1] = 2 * sin(w) * Nt_y;
+    // H[3,2] = 2 * sin(w) * Nt_z;
+    // H[3,3] = 2 * sin(w) * sin(w) + 2 * cos(w) * (x*Nt_x + y*Nt_y + z*Nt_z - cos(w));
+
+    // @@ Cone fitting might be quite slow.
+
+    /*ConeFitting coneFitting(mesh, 0.1f, 0.001f, 0.001f);
+
+    Vector4 start = Vector4(chart->coneAxis, chart->coneAngle);
+    Vector4 solution = coneFitting.solve(chart, start);
+
+    chart->coneAxis = solution.xyz();
+    chart->coneAngle = solution.w;*/
+}
+
+
+
+bool AtlasBuilder::relocateSeeds()
+{
+    bool anySeedChanged = false;
+
+    const uint chartCount = chartArray.count();
+    for (uint i = 0; i < chartCount; i++)
+    {
+        if (relocateSeed(chartArray[i]))
+        {
+            anySeedChanged = true;
+        }
+    }
+
+    return anySeedChanged;
+}
+
+
+bool AtlasBuilder::relocateSeed(ChartBuildData * chart)
+{
+    Vector3 centroid = computeChartCentroid(chart);
+
+    const uint N = 10;  // @@ Hardcoded to 10?
+    PriorityQueue bestTriangles(N); 
+
+    // Find the first N triangles that fit the proxy best.
+    const uint faceCount = chart->faces.count();
+    for (uint i = 0; i < faceCount; i++)
+    {
+        float priority = evaluateProxyFitMetric(chart, chart->faces[i]);
+        bestTriangles.push(priority, chart->faces[i]);
+    }
+
+    // Of those, choose the most central triangle.
+    uint mostCentral;
+    float maxDistance = -1;
+
+    const uint bestCount = bestTriangles.count();
+    for (uint i = 0; i < bestCount; i++)
+    {
+        const HalfEdge::Face * face = mesh->faceAt(bestTriangles.pairs[i].face);
+        Vector3 faceCentroid = triangleCenter(face);
+
+        float distance = length(centroid - faceCentroid);
+
+        /*#pragma message(NV_FILE_LINE "TODO: Implement evaluateDistanceToBoundary.")
+        float distance = evaluateDistanceToBoundary(chart, bestTriangles.pairs[i].face);*/
+        
+        if (distance > maxDistance)
+        {
+            maxDistance = distance;
+            mostCentral = bestTriangles.pairs[i].face;
+        }
+    }
+    nvDebugCheck(maxDistance >= 0);
+
+    // In order to prevent k-means cyles we record all the previously chosen seeds.
+    uint index;
+    if (chart->seeds.find(mostCentral, &index))
+    {
+        // Move new seed to the end of the seed array.
+        uint last = chart->seeds.count() - 1;
+        swap(chart->seeds[index], chart->seeds[last]);
+        return false;
+    }
+    else
+    {
+        // Append new seed.
+        chart->seeds.append(mostCentral);
+        return true;
+    }
+}
+
+void AtlasBuilder::removeCandidate(uint f)
+{
+    int c = faceCandidateArray[f];
+    if (c != -1) {
+        faceCandidateArray[f] = -1;
+
+        if (c == candidateArray.count() - 1) {
+            candidateArray.popBack();
+        }
+        else {
+            candidateArray.replaceWithLast(c);
+            faceCandidateArray[candidateArray[c].face] = c;
+        }
+    }
+}
+
+void AtlasBuilder::updateCandidate(ChartBuildData * chart, uint f, float metric)
+{
+    if (faceCandidateArray[f] == -1) {
+        const uint index = candidateArray.count();
+        faceCandidateArray[f] = index;
+        candidateArray.resize(index + 1);
+        candidateArray[index].face = f;
+        candidateArray[index].chart = chart;
+        candidateArray[index].metric = metric;
+    }
+    else {
+        int c = faceCandidateArray[f];
+        nvDebugCheck(c != -1);
+
+        Candidate & candidate = candidateArray[c];
+        nvDebugCheck(candidate.face == f);
+
+        if (metric < candidate.metric || chart == candidate.chart) {
+            candidate.metric = metric;
+            candidate.chart = chart;
+        }
+    }
+
+}
+
+
+void AtlasBuilder::updatePriorities(ChartBuildData * chart)
+{
+    // Re-evaluate candidate priorities.
+    uint candidateCount = chart->candidates.count();
+    for (uint i = 0; i < candidateCount; i++)
+    {
+        chart->candidates.pairs[i].priority = evaluatePriority(chart, chart->candidates.pairs[i].face);
+
+        if (faceChartArray[chart->candidates.pairs[i].face] == -1)
+        {
+            updateCandidate(chart, chart->candidates.pairs[i].face, chart->candidates.pairs[i].priority);
+        }
+    }
+
+    // Sort candidates.
+    chart->candidates.sort();
+}
+
+
+// Evaluate combined metric.
+float AtlasBuilder::evaluatePriority(ChartBuildData * chart, uint face)
+{
+    // Estimate boundary length and area:
+    float newBoundaryLength = evaluateBoundaryLength(chart, face);
+    float newChartArea = evaluateChartArea(chart, face);
+
+    float F = evaluateProxyFitMetric(chart, face);
+    float C = evaluateRoundnessMetric(chart, face, newBoundaryLength, newChartArea);
+    float P = evaluateStraightnessMetric(chart, face);
+
+    // Penalize faces that cross seams, reward faces that close seams or reach boundaries.
+    float N = evaluateNormalSeamMetric(chart, face);
+    float T = evaluateTextureSeamMetric(chart, face);
+
+    //float R = evaluateCompletenessMetric(chart, face);
+
+    //float D = evaluateDihedralAngleMetric(chart, face);
+    // @@ Add a metric based on local dihedral angle.
+
+    // @@ Tweaking the normal and texture seam metrics.
+    // - Cause more impedance. Never cross 90 degree edges.
+    // - 
+
+    float cost = float(
+        settings.proxyFitMetricWeight * F + 
+        settings.roundnessMetricWeight * C + 
+        settings.straightnessMetricWeight * P +
+        settings.normalSeamMetricWeight * N +
+        settings.textureSeamMetricWeight * T);
+
+    /*cost = settings.proxyFitMetricWeight * powf(F, settings.proxyFitMetricExponent);
+    cost = max(cost, settings.roundnessMetricWeight * powf(C, settings.roundnessMetricExponent));
+    cost = max(cost, settings.straightnessMetricWeight * pow(P, settings.straightnessMetricExponent));
+    cost = max(cost, settings.normalSeamMetricWeight * N);
+    cost = max(cost, settings.textureSeamMetricWeight * T);*/
+
+    // Enforce limits strictly:
+    if (newChartArea > settings.maxChartArea) cost = FLT_MAX;
+    if (newBoundaryLength > settings.maxBoundaryLength) cost = FLT_MAX;
+
+    // Make sure normal seams are fully respected:
+    if (settings.normalSeamMetricWeight >= 1000 && N != 0) cost = FLT_MAX;
+
+    nvCheck(isFinite(cost));
+    return cost;
+}
+
+
+// Returns a value in [0-1].
+float AtlasBuilder::evaluateProxyFitMetric(ChartBuildData * chart, uint f)
+{
+    const HalfEdge::Face * face = mesh->faceAt(f);
+    Vector3 faceNormal = triangleNormal(face);
+    //return square(dot(chart->coneAxis, faceNormal) - cosf(chart->coneAngle));
+
+    // Use plane fitting metric for now:
+    //return square(1 - dot(faceNormal, chart->planeNormal)); // @@ normal deviations should be weighted by face area
+    return 1 - dot(faceNormal, chart->planeNormal); // @@ normal deviations should be weighted by face area
+
+    // Find distance to chart.
+    /*Vector3 faceCentroid = face->centroid();
+
+    float dist = 0;
+    int count = 0;
+
+    for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+    {
+        const HalfEdge::Edge * edge = it.current();
+
+        if (!edge->isBoundary()) {
+            const HalfEdge::Face * neighborFace = edge->pair()->face();
+            if (faceChartArray[neighborFace->id()] == chart->id) {
+                dist += length(neighborFace->centroid() - faceCentroid);
+                count++;
+            }
+        }
+    }
+
+    dist /= (count * count);
+
+    return (1 - dot(faceNormal, chart->planeNormal)) * dist;*/
+
+    //return (1 - dot(faceNormal, chart->planeNormal));
+}
+
+float AtlasBuilder::evaluateDistanceToBoundary(ChartBuildData * chart, uint face)
+{
+//#pragma message(NV_FILE_LINE "TODO: Evaluate distance to boundary metric.")
+
+    // @@ This is needed for the seed relocation code.
+    // @@ This could provide a better roundness metric.
+    
+    return 0.0f;
+}
+
+float AtlasBuilder::evaluateDistanceToSeed(ChartBuildData * chart, uint f)
+{
+    //const uint seed = chart->seeds.back();
+    //const uint faceCount = mesh->faceCount();
+    //return shortestPaths[seed * faceCount + f];
+
+    const HalfEdge::Face * seed = mesh->faceAt(chart->seeds.back());
+    const HalfEdge::Face * face = mesh->faceAt(f);
+    return length(triangleCenter(seed) - triangleCenter(face));
+}
+
+
+float AtlasBuilder::evaluateRoundnessMetric(ChartBuildData * chart, uint face, float newBoundaryLength, float newChartArea)
+{
+    // @@ D-charts use distance to seed.
+    // C(c,t) = pi * D(S_c,t)^2 / A_c
+    //return PI * square(evaluateDistanceToSeed(chart, face)) / chart->area;
+    //return PI * square(evaluateDistanceToSeed(chart, face)) / chart->area;
+    //return 2 * PI * evaluateDistanceToSeed(chart, face) / chart->boundaryLength;
+
+    // Garland's Hierarchical Face Clustering paper uses ratio between boundary and area, which is easier to compute and might work as well:
+    // roundness = D^2/4*pi*A -> circle = 1, non circle greater than 1
+
+    //return square(newBoundaryLength) / (newChartArea * 4 * PI);
+    float roundness = square(chart->boundaryLength) / chart->area;
+    float newRoundness = square(newBoundaryLength) / newChartArea;
+    if (newRoundness > roundness) {
+        return square(newBoundaryLength) / (newChartArea * 4 * PI);
+    }
+    else {
+        // Offer no impedance to faces that improve roundness.
+        return 0;
+    }
+
+    //return square(newBoundaryLength) / (4 * PI * newChartArea);
+    //return clamp(1 - (4 * PI * newChartArea) / square(newBoundaryLength), 0.0f, 1.0f);
+
+    // Use the ratio between the new roundness vs. the previous roundness.
+    // - If we use the absolute metric, when the initial face is very long, then it's hard to make any progress.
+    //return (square(newBoundaryLength) * chart->area) / (square(chart->boundaryLength) * newChartArea);
+    //return (4 * PI * newChartArea) / square(newBoundaryLength) - (4 * PI * chart->area) / square(chart->boundaryLength);
+
+    //if (square(newBoundaryLength) * chart->area) / (square(chart->boundaryLength) * newChartArea);
+
+}
+
+float AtlasBuilder::evaluateStraightnessMetric(ChartBuildData * chart, uint f)
+{
+    float l_out = 0.0f;
+    float l_in = 0.0f;
+
+    const HalfEdge::Face * face = mesh->faceAt(f);
+    for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+    {
+        const HalfEdge::Edge * edge = it.current();
+
+        //float l = edge->length();
+        float l = edgeLengths[edge->id/2];
+
+        if (edge->isBoundary())
+        {
+            l_out += l;
+        }
+        else
+        {
+            uint neighborFaceId = edge->pair->face->id;
+            if (faceChartArray[neighborFaceId] != chart->id) {
+                l_out += l;
+            }
+            else {
+                l_in += l;
+            }
+        }
+    }
+    nvDebugCheck(l_in != 0.0f); // Candidate face must be adjacent to chart. @@ This is not true if the input mesh has zero-length edges.
+
+    //return l_out / l_in;
+    float ratio = (l_out - l_in) / (l_out + l_in);
+    //if (ratio < 0) ratio *= 10; // Encourage closing gaps.
+    return min(ratio, 0.0f); // Only use the straightness metric to close gaps.
+    //return ratio;
+}
+
+
+float AtlasBuilder::evaluateNormalSeamMetric(ChartBuildData * chart, uint f)
+{
+    float seamFactor = 0.0f;
+    float totalLength = 0.0f;
+    
+    const HalfEdge::Face * face = mesh->faceAt(f);
+    for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+    {
+        const HalfEdge::Edge * edge = it.current();
+
+        if (edge->isBoundary()) {
+            continue;
+        }
+
+        const uint neighborFaceId = edge->pair->face->id;
+        if (faceChartArray[neighborFaceId] != chart->id) {
+            continue;
+        }
+
+        //float l = edge->length();
+        float l = edgeLengths[edge->id/2];
+
+        totalLength += l;
+
+        if (!edge->isSeam()) {
+            continue;
+        }
+
+        // Make sure it's a normal seam.
+        if (isNormalSeam(edge))
+        {
+            float d0 = clamp(dot(edge->vertex->nor, edge->pair->next->vertex->nor), 0.0f, 1.0f);
+            float d1 = clamp(dot(edge->next->vertex->nor, edge->pair->vertex->nor), 0.0f, 1.0f);
+            //float a0 = clamp(acosf(d0) / (PI/2), 0.0f, 1.0f);
+            //float a1 = clamp(acosf(d1) / (PI/2), 0.0f, 1.0f);
+            //l *= (a0 + a1) * 0.5f;
+
+            l *= 1 - (d0 + d1) * 0.5f;
+
+            seamFactor += l;
+        }
+    }
+
+    if (seamFactor == 0) return 0.0f;
+    return seamFactor / totalLength;
+}
+
+
+float AtlasBuilder::evaluateTextureSeamMetric(ChartBuildData * chart, uint f)
+{
+    float seamLength = 0.0f;
+    //float newSeamLength = 0.0f;
+    //float oldSeamLength = 0.0f;
+    float totalLength = 0.0f;
+    
+    const HalfEdge::Face * face = mesh->faceAt(f);
+    for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+    {
+        const HalfEdge::Edge * edge = it.current();
+
+        /*float l = edge->length();
+        totalLength += l;
+
+        if (edge->isBoundary() || !edge->isSeam()) {
+            continue;
+        }
+
+        // Make sure it's a texture seam.
+        if (isTextureSeam(edge))
+        {
+            uint neighborFaceId = edge->pair()->face()->id();
+            if (faceChartArray[neighborFaceId] != chart->id) {
+                newSeamLength += l;
+            }
+            else {
+                oldSeamLength += l;
+            }
+        }*/
+
+        if (edge->isBoundary()) {
+            continue;
+        }
+
+        const uint neighborFaceId = edge->pair->face->id;
+        if (faceChartArray[neighborFaceId] != chart->id) {
+            continue;
+        }
+
+        //float l = edge->length();
+        float l = edgeLengths[edge->id/2];
+        totalLength += l;
+
+        if (!edge->isSeam()) {
+            continue;
+        }
+
+        // Make sure it's a texture seam.
+        if (isTextureSeam(edge))
+        {
+            seamLength += l;
+        }
+    }
+
+    if (seamLength == 0.0f) {
+        return 0.0f; // Avoid division by zero.
+    }
+    
+    return seamLength / totalLength;
+}
+
+
+float AtlasBuilder::evaluateSeamMetric(ChartBuildData * chart, uint f)
+{
+    float newSeamLength = 0.0f;
+    float oldSeamLength = 0.0f;
+    float totalLength = 0.0f;
+    
+    const HalfEdge::Face * face = mesh->faceAt(f);
+    for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+    {
+        const HalfEdge::Edge * edge = it.current();
+
+        //float l = edge->length();
+        float l = edgeLengths[edge->id/2];
+
+        if (edge->isBoundary())
+        {
+            newSeamLength += l;
+        }
+        else
+        {
+            if (edge->isSeam())
+            {
+                uint neighborFaceId = edge->pair->face->id;
+                if (faceChartArray[neighborFaceId] != chart->id) {
+                    newSeamLength += l;
+                }
+                else {
+                    oldSeamLength += l;
+                }
+            }
+        }
+
+        totalLength += l;
+    }
+
+    return (newSeamLength - oldSeamLength) / totalLength;
+}
+
+
+float AtlasBuilder::evaluateChartArea(ChartBuildData * chart, uint f)
+{
+    const HalfEdge::Face * face = mesh->faceAt(f);
+    //return chart->area + face->area();
+    return chart->area + faceAreas[face->id];
+}
+
+
+float AtlasBuilder::evaluateBoundaryLength(ChartBuildData * chart, uint f)
+{
+    float boundaryLength = chart->boundaryLength;
+
+    // Add new edges, subtract edges shared with the chart.
+    const HalfEdge::Face * face = mesh->faceAt(f);
+    for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+    {
+        const HalfEdge::Edge * edge = it.current();
+        //float edgeLength = edge->length();
+        float edgeLength = edgeLengths[edge->id/2];
+
+        if (edge->isBoundary())
+        {
+            boundaryLength += edgeLength;
+        }
+        else
+        {
+            uint neighborFaceId = edge->pair->face->id;
+            if (faceChartArray[neighborFaceId] != chart->id) {
+                boundaryLength += edgeLength;
+            }
+            else {
+                boundaryLength -= edgeLength;
+            }
+        }
+    }
+    //nvDebugCheck(boundaryLength >= 0);
+
+    return max(0.0f, boundaryLength);  // @@ Hack!
+}
+
+Vector3 AtlasBuilder::evaluateChartNormalSum(ChartBuildData * chart, uint f)
+{
+    const HalfEdge::Face * face = mesh->faceAt(f);
+    return chart->normalSum + triangleNormalAreaScaled(face);
+}
+
+Vector3 AtlasBuilder::evaluateChartCentroidSum(ChartBuildData * chart, uint f)
+{
+    const HalfEdge::Face * face = mesh->faceAt(f);
+    return chart->centroidSum + face->centroid();
+}
+
+
+Vector3 AtlasBuilder::computeChartCentroid(const ChartBuildData * chart)
+{
+    Vector3 centroid(0);
+
+    const uint faceCount = chart->faces.count();
+    for (uint i = 0; i < faceCount; i++)
+    {
+        const HalfEdge::Face * face = mesh->faceAt(chart->faces[i]);
+        centroid += triangleCenter(face);
+    }
+
+    return centroid / float(faceCount);
+}
+
+
+void AtlasBuilder::fillHoles(float threshold)
+{
+    while (facesLeft > 0)
+    {
+        createRandomChart(threshold);
+    }
+}
+
+
+void AtlasBuilder::mergeChart(ChartBuildData * owner, ChartBuildData * chart, float sharedBoundaryLength)
+{
+    const uint faceCount = chart->faces.count();
+    for (uint i = 0; i < faceCount; i++)
+    {
+        uint f = chart->faces[i];
+        
+        nvDebugCheck(faceChartArray[f] == chart->id);
+        faceChartArray[f] = owner->id;
+
+        owner->faces.append(f);
+    }
+
+    // Update adjacencies?
+
+    owner->area += chart->area;
+    owner->boundaryLength += chart->boundaryLength - sharedBoundaryLength;
+
+    owner->normalSum += chart->normalSum;
+    owner->centroidSum += chart->centroidSum;
+
+    updateProxy(owner);
+}
+
+void AtlasBuilder::mergeCharts()
+{
+    Array<float> sharedBoundaryLengths;
+
+    const uint chartCount = chartArray.count();
+    for (int c = chartCount-1; c >= 0; c--)
+    {
+        sharedBoundaryLengths.clear();
+        sharedBoundaryLengths.resize(chartCount, 0.0f);
+
+        ChartBuildData * chart = chartArray[c];
+
+        float externalBoundary = 0.0f;
+
+        const uint faceCount = chart->faces.count();
+        for (uint i = 0; i < faceCount; i++)
+        {
+            uint f = chart->faces[i];
+            const HalfEdge::Face * face = mesh->faceAt(f);
+
+            for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+            {
+                const HalfEdge::Edge * edge = it.current();
+
+                //float l = edge->length();
+                float l = edgeLengths[edge->id/2];
+
+                if (edge->isBoundary()) {
+                    externalBoundary += l;
+                }
+                else {
+                    uint neighborFace = edge->pair->face->id;
+                    uint neighborChart = faceChartArray[neighborFace];
+
+                    if (neighborChart != c) {
+                        if ((edge->isSeam() && (isNormalSeam(edge) || isTextureSeam(edge))) || neighborChart == -2) {
+                            externalBoundary += l;
+                        }
+                        else {
+                            sharedBoundaryLengths[neighborChart] += l;
+                        }
+                    }
+                }
+            }
+        }
+
+        for (int cc = chartCount-1; cc >= 0; cc--)
+        {
+            if (cc == c) 
+                continue;
+
+            ChartBuildData * chart2 = chartArray[cc];
+            if (chart2 == NULL) 
+                continue;
+
+            if (sharedBoundaryLengths[cc] > 0.8 * max(0.0f, chart->boundaryLength - externalBoundary)) {
+
+                // Try to avoid degenerate configurations.
+                if (chart2->boundaryLength > sharedBoundaryLengths[cc])
+                {
+                    if (dot(chart2->planeNormal, chart->planeNormal) > -0.25) {
+                        mergeChart(chart2, chart, sharedBoundaryLengths[cc]);
+                        delete chart;
+                        chartArray[c] = NULL;
+                        break;
+                    }
+                }
+            }
+
+            if (sharedBoundaryLengths[cc] > 0.20 * max(0.0f, chart->boundaryLength - externalBoundary)) {
+
+                // Compare proxies.
+                if (dot(chart2->planeNormal, chart->planeNormal) > 0) {
+                    mergeChart(chart2, chart, sharedBoundaryLengths[cc]);
+                    delete chart;
+                    chartArray[c] = NULL;
+                    break;
+                }
+            }
+        }
+    }
+
+    // Remove deleted charts.
+    for (int c = 0; c < I32(chartArray.count()); /*do not increment if removed*/)
+    {
+        if (chartArray[c] == NULL) {
+            chartArray.removeAt(c);
+
+            // Update faceChartArray.
+            const uint faceCount = faceChartArray.count();
+            for (uint i = 0; i < faceCount; i++) {
+                nvDebugCheck (faceChartArray[i] != -1);
+                nvDebugCheck (faceChartArray[i] != c);
+                nvDebugCheck (faceChartArray[i] <= I32(chartArray.count()));
+
+                if (faceChartArray[i] > c) {
+                    faceChartArray[i]--;
+                }
+            }
+        }
+        else {
+            chartArray[c]->id = c;
+            c++;
+        }
+    }
+}
+
+
+
+const Array<uint> & AtlasBuilder::chartFaces(uint i) const
+{
+    return chartArray[i]->faces;
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/param/AtlasBuilder.h b/thirdparty/thekla_atlas/nvmesh/param/AtlasBuilder.h
new file mode 100644
index 0000000000..f25c724f7e
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/AtlasBuilder.h
@@ -0,0 +1,111 @@
+// This code is in the public domain -- castano@gmail.com
+
+#pragma once
+#ifndef NV_MESH_ATLASBUILDER_H
+#define NV_MESH_ATLASBUILDER_H
+
+#include "Atlas.h"
+
+#include "nvmath/Vector.h"
+#include "nvmath/Random.h"
+#include "nvmesh/nvmesh.h"
+
+#include "nvcore/Array.h"
+#include "nvcore/BitArray.h"
+
+
+
+namespace nv
+{
+    namespace HalfEdge { class Mesh; }
+
+    struct ChartBuildData;
+
+    struct AtlasBuilder
+    {
+        AtlasBuilder(const HalfEdge::Mesh * m);
+        ~AtlasBuilder();
+
+        void markUnchartedFaces(const Array<uint> & unchartedFaces);
+
+        void computeShortestPaths();
+
+        void placeSeeds(float threshold, uint maxSeedCount);
+        void createRandomChart(float threshold);
+
+        void addFaceToChart(ChartBuildData * chart, uint f, bool recomputeProxy=false);
+
+        bool growCharts(float threshold, uint faceCount);
+        bool growChart(ChartBuildData * chart, float threshold, uint faceCount);
+
+        void resetCharts();
+
+        void updateCandidates(ChartBuildData * chart, uint face);
+
+        void updateProxies();
+        void updateProxy(ChartBuildData * chart);
+
+        bool relocateSeeds();
+        bool relocateSeed(ChartBuildData * chart);
+
+        void updatePriorities(ChartBuildData * chart);
+
+        float evaluatePriority(ChartBuildData * chart, uint face);
+        float evaluateProxyFitMetric(ChartBuildData * chart, uint face);
+        float evaluateDistanceToBoundary(ChartBuildData * chart, uint face);
+        float evaluateDistanceToSeed(ChartBuildData * chart, uint face);
+        float evaluateRoundnessMetric(ChartBuildData * chart, uint face, float newBoundaryLength, float newChartArea);
+        float evaluateStraightnessMetric(ChartBuildData * chart, uint face);
+
+        float evaluateNormalSeamMetric(ChartBuildData * chart, uint f);
+        float evaluateTextureSeamMetric(ChartBuildData * chart, uint f);
+        float evaluateSeamMetric(ChartBuildData * chart, uint f);
+
+        float evaluateChartArea(ChartBuildData * chart, uint f);
+        float evaluateBoundaryLength(ChartBuildData * chart, uint f);
+        Vector3 evaluateChartNormalSum(ChartBuildData * chart, uint f);
+        Vector3 evaluateChartCentroidSum(ChartBuildData * chart, uint f);
+
+        Vector3 computeChartCentroid(const ChartBuildData * chart);
+
+
+        void fillHoles(float threshold);
+        void mergeCharts();
+
+        // @@ Cleanup.
+        struct Candidate {
+            uint face;
+            ChartBuildData * chart;
+            float metric;
+        };
+
+        const Candidate & getBestCandidate() const;
+        void removeCandidate(uint f);
+        void updateCandidate(ChartBuildData * chart, uint f, float metric);
+
+        void mergeChart(ChartBuildData * owner, ChartBuildData * chart, float sharedBoundaryLength);
+
+
+        uint chartCount() const { return chartArray.count(); }
+        const Array<uint> & chartFaces(uint i) const;
+
+        const HalfEdge::Mesh * mesh;
+        uint facesLeft;
+        Array<int> faceChartArray;
+        Array<ChartBuildData *> chartArray;
+        Array<float> shortestPaths;
+
+        Array<float> edgeLengths;
+        Array<float> faceAreas;
+
+        Array<Candidate> candidateArray; //
+        Array<uint> faceCandidateArray; // Map face index to candidate index.
+
+        MTRand rand;
+
+        SegmentationSettings settings;
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_ATLASBUILDER_H
diff --git a/thirdparty/thekla_atlas/nvmesh/param/AtlasPacker.cpp b/thirdparty/thekla_atlas/nvmesh/param/AtlasPacker.cpp
new file mode 100644
index 0000000000..5ce452cb9e
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/AtlasPacker.cpp
@@ -0,0 +1,1387 @@
+// This code is in the public domain -- castano@gmail.com
+
+#include "nvmesh.h" // pch
+
+#include "AtlasPacker.h"
+#include "nvmesh/halfedge/Vertex.h"
+#include "nvmesh/halfedge/Face.h"
+#include "nvmesh/param/Atlas.h"
+#include "nvmesh/param/Util.h"
+#include "nvmesh/raster/Raster.h"
+
+#include "nvmath/Vector.inl"
+#include "nvmath/ConvexHull.h"
+#include "nvmath/Color.h"
+#include "nvmath/ftoi.h"
+
+#include "nvcore/StrLib.h" // debug
+#include "nvcore/StdStream.h" // fileOpen
+
+#include <float.h> // FLT_MAX
+#include <limits.h> // UINT_MAX
+
+using namespace nv;
+
+#define DEBUG_OUTPUT 0
+
+#if DEBUG_OUTPUT
+
+#include "nvimage/ImageIO.h"
+
+namespace
+{
+    const uint TGA_TYPE_GREY = 3;
+    const uint TGA_TYPE_RGB = 2;
+    const uint TGA_ORIGIN_UPPER = 0x20;
+
+#pragma pack(push, 1)
+    struct TgaHeader {
+	    uint8	id_length;
+	    uint8	colormap_type;
+	    uint8	image_type;
+	    uint16	colormap_index;
+	    uint16	colormap_length;
+	    uint8	colormap_size;
+	    uint16	x_origin;
+	    uint16	y_origin;
+	    uint16	width;
+	    uint16	height;
+	    uint8	pixel_size;
+	    uint8	flags;
+
+	    enum { Size = 18 };		//const static int SIZE = 18;
+    };
+#pragma pack(pop)
+
+    static void outputDebugBitmap(const char * fileName, const BitMap & bitmap, int w, int h)
+    {
+        FILE * fp = fileOpen(fileName, "wb");
+        if (fp == NULL) return;
+        
+        nvStaticCheck(sizeof(TgaHeader) == TgaHeader::Size);
+	    TgaHeader tga;
+	    tga.id_length = 0;
+	    tga.colormap_type = 0;
+	    tga.image_type = TGA_TYPE_GREY;
+
+	    tga.colormap_index = 0;
+	    tga.colormap_length = 0;
+	    tga.colormap_size = 0;
+
+	    tga.x_origin = 0;
+	    tga.y_origin = 0;
+        tga.width = w;
+        tga.height = h;
+	    tga.pixel_size = 8;
+	    tga.flags = TGA_ORIGIN_UPPER;
+
+        fwrite(&tga, sizeof(TgaHeader), 1, fp);
+
+		for (int j = 0; j < h; j++) {
+            for (int i = 0; i < w; i++) {
+                uint8 color = bitmap.bitAt(i, j) ? 0xFF : 0x0;
+                fwrite(&color, 1, 1, fp);
+            }
+		}
+
+        fclose(fp);
+    }
+
+    static void outputDebugImage(const char * fileName, const Image & bitmap, int w, int h)
+    {
+        FILE * fp = fileOpen(fileName, "wb");
+        if (fp == NULL) return;
+        
+        nvStaticCheck(sizeof(TgaHeader) == TgaHeader::Size);
+	    TgaHeader tga;
+	    tga.id_length = 0;
+	    tga.colormap_type = 0;
+        tga.image_type = TGA_TYPE_RGB;
+
+	    tga.colormap_index = 0;
+	    tga.colormap_length = 0;
+	    tga.colormap_size = 0;
+
+	    tga.x_origin = 0;
+	    tga.y_origin = 0;
+        tga.width = w;
+        tga.height = h;
+	    tga.pixel_size = 24;
+	    tga.flags = TGA_ORIGIN_UPPER;
+
+        fwrite(&tga, sizeof(TgaHeader), 1, fp);
+
+		for (int j = 0; j < h; j++) {
+            for (int i = 0; i < w; i++) {
+                Color32 color = bitmap.pixel(i, j);
+                fwrite(&color.r, 1, 1, fp);
+                fwrite(&color.g, 1, 1, fp);
+                fwrite(&color.b, 1, 1, fp);
+            }
+		}
+
+        fclose(fp);
+    }
+}
+
+#endif // DEBUG_OUTPUT
+
+inline int align(int x, int a) {
+    //return a * ((x + a - 1) / a);
+    //return (x + a - 1) & -a;
+    return (x + a - 1) & ~(a - 1);
+}
+
+inline bool isAligned(int x, int a) {
+    return (x & (a - 1)) == 0;
+}
+
+
+
+AtlasPacker::AtlasPacker(Atlas * atlas) : m_atlas(atlas), m_bitmap(256, 256)
+{
+    m_width = 0;
+    m_height = 0;
+    
+    m_debug_bitmap.allocate(256, 256);
+    m_debug_bitmap.fill(Color32(0,0,0,0));
+}
+
+AtlasPacker::~AtlasPacker()
+{
+}
+
+// This should compute convex hull and use rotating calipers to find the best box. Currently it uses a brute force method.
+static bool computeBoundingBox(Chart * chart, Vector2 * majorAxis, Vector2 * minorAxis, Vector2 * minCorner, Vector2 * maxCorner)
+{
+    // Compute list of boundary points.
+    Array<Vector2> points(16);
+
+    HalfEdge::Mesh * mesh = chart->chartMesh();
+    const uint vertexCount = mesh->vertexCount();
+
+    for (uint i = 0; i < vertexCount; i++) {
+        HalfEdge::Vertex * vertex = mesh->vertexAt(i);
+        if (vertex->isBoundary()) {
+            points.append(vertex->tex);
+        }
+    }
+
+    // This is not valid anymore. The chart mesh may have multiple boundaries!
+    /*const HalfEdge::Vertex * vertex = findBoundaryVertex(chart->chartMesh());
+
+    // Traverse boundary.
+    const HalfEdge::Edge * const firstEdge = vertex->edge();
+    const HalfEdge::Edge * edge = firstEdge;
+    do {
+        vertex = edge->vertex();
+
+        nvDebugCheck (vertex->isBoundary());
+        points.append(vertex->tex);
+
+        edge = edge->next();
+    } while (edge != firstEdge);*/
+
+#if 1
+    Array<Vector2> hull;
+    if (points.size()==0) {
+        return false;    
+    }
+    
+    convexHull(points, hull, 0.00001f);
+
+    // @@ Ideally I should use rotating calipers to find the best box. Using brute force for now.
+
+    float best_area = FLT_MAX;
+    Vector2 best_min;
+    Vector2 best_max;
+    Vector2 best_axis;
+
+    const uint hullCount = hull.count();
+    for (uint i = 0, j = hullCount-1; i < hullCount; j = i, i++) {
+
+        if (equal(hull[i], hull[j])) {
+            continue;
+        }
+
+        Vector2 axis = normalize(hull[i] - hull[j], 0.0f);
+        nvDebugCheck(isFinite(axis));
+
+        // Compute bounding box.
+        Vector2 box_min(FLT_MAX, FLT_MAX);
+        Vector2 box_max(-FLT_MAX, -FLT_MAX);
+
+        for (uint v = 0; v < hullCount; v++) {
+
+           Vector2 point = hull[v];
+
+           float x = dot(axis, point);
+           if (x < box_min.x) box_min.x = x;
+           if (x > box_max.x) box_max.x = x;
+
+           float y = dot(Vector2(-axis.y, axis.x), point);
+           if (y < box_min.y) box_min.y = y;
+           if (y > box_max.y) box_max.y = y;
+        }
+    
+        // Compute box area.
+        float area = (box_max.x - box_min.x) * (box_max.y - box_min.y);
+
+        if (area < best_area) {
+            best_area = area;
+            best_min = box_min;
+            best_max = box_max;
+            best_axis = axis;
+        }
+    }
+
+    // Make sure the box contains all the input points since the convex hull is not 100% accurate.
+    /*const uint pointCount = points.count();
+    for (uint v = 0; v < pointCount; v++) {
+
+        Vector2 point = points[v];
+
+        float x = dot(best_axis, point);
+        if (x < best_min.x) best_min.x = x;
+
+        float y = dot(Vector2(-best_axis.y, best_axis.x), point);
+        if (y < best_min.y) best_min.y = y;
+    }*/
+
+    // Consider all points, not only boundary points, in case the input chart is malformed.
+    for (uint i = 0; i < vertexCount; i++) {
+        HalfEdge::Vertex * vertex = mesh->vertexAt(i);
+        Vector2 point = vertex->tex;
+
+        float x = dot(best_axis, point);
+        if (x < best_min.x) best_min.x = x;
+        if (x > best_max.x) best_max.x = x;
+
+        float y = dot(Vector2(-best_axis.y, best_axis.x), point);
+        if (y < best_min.y) best_min.y = y;
+        if (y > best_max.y) best_max.y = y;
+    }
+
+    *majorAxis = best_axis;
+    *minorAxis = Vector2(-best_axis.y, best_axis.x);
+    *minCorner = best_min;
+    *maxCorner = best_max;
+
+#else
+    // Approximate implementation: try 16 different directions and keep the best.
+
+    const uint N = 16;
+    Vector2 axis[N];
+
+    float minAngle = 0;
+    float maxAngle = PI / 2;
+
+    int best;
+    Vector2 mins[N];
+    Vector2 maxs[N];
+
+    const int iterationCount = 1;
+    for (int j = 0; j < iterationCount; j++)
+    {
+        // Init predefined directions.
+        for (int i = 0; i < N; i++)
+        {
+            float angle = lerp(minAngle, maxAngle, float(i)/N);
+            axis[i].set(cosf(angle), sinf(angle));
+        }
+
+        // Compute box for each direction.
+        for (int i = 0; i < N; i++)
+        {
+            mins[i].set(FLT_MAX, FLT_MAX);
+            maxs[i].set(-FLT_MAX, -FLT_MAX);
+        }
+
+        for (uint p = 0; p < points.count(); p++)
+        {
+            Vector2 point = points[p];
+
+            for (int i = 0; i < N; i++)
+            {
+               float x = dot(axis[i], point);
+               if (x < mins[i].x) mins[i].x = x;
+               if (x > maxs[i].x) maxs[i].x = x;
+
+               float y = dot(Vector2(-axis[i].y, axis[i].x), point);
+               if (y < mins[i].y) mins[i].y = y;
+               if (y > maxs[i].y) maxs[i].y = y;
+            }
+        }
+
+        // Find box with minimum area.
+        best = -1;
+        int second_best = -1;
+        float best_area = FLT_MAX;
+        float second_best_area = FLT_MAX;
+        
+        for (int i = 0; i < N; i++)
+        {
+            float area = (maxs[i].x - mins[i].x) * (maxs[i].y - mins[i].y);
+
+            if (area < best_area)
+            {
+                second_best_area = best_area;
+                second_best = best;
+
+                best_area = area;
+                best = i;
+            }
+            else if (area < second_best_area)
+            {
+                second_best_area = area;
+                second_best = i;
+            }
+        }
+        nvDebugCheck(best != -1);
+        nvDebugCheck(second_best != -1);
+        nvDebugCheck(best != second_best);
+
+        if (j != iterationCount-1)
+        {
+            // Handle wrap-around during the first iteration.
+            if (j == 0) {
+                if (best == 0 && second_best == N-1) best = N;
+                if (best == N-1 && second_best == 0) second_best = N;
+            }
+
+            if (best < second_best) swap(best, second_best);
+
+            // Update angles.
+            float deltaAngle = (maxAngle - minAngle) / N;
+            maxAngle = minAngle + (best - 0.5f)  * deltaAngle;
+            minAngle = minAngle + (second_best + 0.5f) * deltaAngle;
+        }
+    }
+
+    // Compute major and minor axis, and origin.
+    *majorAxis = axis[best];
+    *minorAxis = Vector2(-axis[best].y, axis[best].x);
+    *origin = mins[best];
+
+    // @@ If the parameterization is invalid, we could have an interior vertex outside the boundary.
+    // @@ In that case the returned bounding box would be incorrect. Compute updated bounds here.
+    /*for (uint p = 0; p < points.count(); p++)
+    {
+        Vector2 point = points[p];
+
+        for (int i = 0; i < N; i++)
+        {
+           float x = dot(*majorAxis, point);
+           float y = dot(*minorAxis, point);
+        }
+    }*/
+#endif
+
+    return true;
+}
+
+
+void AtlasPacker::packCharts(int quality, float texelsPerUnit, bool blockAligned, bool conservative)
+{
+    const uint chartCount = m_atlas->chartCount();
+    if (chartCount == 0) return;
+
+    Array<float> chartOrderArray;
+    chartOrderArray.resize(chartCount);
+
+    Array<Vector2> chartExtents;
+    chartExtents.resize(chartCount);
+    
+    float meshArea = 0;
+    for (uint c = 0; c < chartCount; c++)
+    {
+        Chart * chart = m_atlas->chartAt(c);
+        
+        if (!chart->isVertexMapped() && !chart->isDisk()) {
+            chartOrderArray[c] = 0;
+
+            // Skip non-disks.
+            continue;
+        }
+
+        Vector2 extents(0.0f);
+
+        if (chart->isVertexMapped()) {
+            // Let's assume vertex maps are arranged in a rectangle.
+            //HalfEdge::Mesh * mesh = chart->chartMesh();
+
+            // Arrange vertices in a rectangle.
+            extents.x = float(chart->vertexMapWidth);
+            extents.y = float(chart->vertexMapHeight);
+        }
+        else {
+            // Compute surface area to sort charts.
+            float chartArea = chart->computeSurfaceArea();
+            meshArea += chartArea;
+            //chartOrderArray[c] = chartArea;
+
+            // Compute chart scale
+            float parametricArea = fabs(chart->computeParametricArea());    // @@ There doesn't seem to be anything preventing parametric area to be negative.
+            if (parametricArea < NV_EPSILON) {
+                // When the parametric area is too small we use a rough approximation to prevent divisions by very small numbers.
+                Vector2 bounds = chart->computeParametricBounds();
+                parametricArea = bounds.x * bounds.y;
+            }
+            float scale = (chartArea / parametricArea) * texelsPerUnit;
+            if (parametricArea == 0) // < NV_EPSILON)
+            {
+                scale = 0;
+            }
+            nvCheck(isFinite(scale));
+
+            // Compute bounding box of chart.
+            Vector2 majorAxis, minorAxis, origin, end;
+            if (!computeBoundingBox(chart, &majorAxis, &minorAxis, &origin, &end)) {
+                m_atlas->setFailed();
+                return;
+            }
+
+            nvCheck(isFinite(majorAxis) && isFinite(minorAxis) && isFinite(origin));
+            
+            // Sort charts by perimeter. @@ This is sometimes producing somewhat unexpected results. Is this right?
+            //chartOrderArray[c] = ((end.x - origin.x) + (end.y - origin.y)) * scale;
+
+            // Translate, rotate and scale vertices. Compute extents.
+            HalfEdge::Mesh * mesh = chart->chartMesh();
+            const uint vertexCount = mesh->vertexCount();
+            for (uint i = 0; i < vertexCount; i++)
+            {
+                HalfEdge::Vertex * vertex = mesh->vertexAt(i);
+
+                //Vector2 t = vertex->tex - origin;
+                Vector2 tmp;
+                tmp.x = dot(vertex->tex, majorAxis);
+                tmp.y = dot(vertex->tex, minorAxis);
+                tmp -= origin;
+                tmp *= scale;
+                if (tmp.x < 0 || tmp.y < 0) {
+                    nvDebug("tmp: %f %f\n", tmp.x, tmp.y);
+                    nvDebug("scale: %f\n", scale);
+                    nvDebug("origin: %f %f\n", origin.x, origin.y);
+                    nvDebug("majorAxis: %f %f\n", majorAxis.x, majorAxis.y);
+                    nvDebug("minorAxis: %f %f\n", minorAxis.x, minorAxis.y);
+                    nvDebugBreak();
+                }
+                //nvCheck(tmp.x >= 0 && tmp.y >= 0);
+
+                vertex->tex = tmp;
+
+				nvCheck(isFinite(vertex->tex.x) && isFinite(vertex->tex.y));
+
+                extents = max(extents, tmp);
+            }
+            nvDebugCheck(extents.x >= 0 && extents.y >= 0);
+
+            // Limit chart size.
+            if (extents.x > 1024 || extents.y > 1024) {
+                float limit = max(extents.x, extents.y);
+
+                scale = 1024 / (limit + 1);
+
+                for (uint i = 0; i < vertexCount; i++)
+                {
+                    HalfEdge::Vertex * vertex = mesh->vertexAt(i);
+                    vertex->tex *= scale;
+                }
+
+                extents *= scale;
+
+                nvDebugCheck(extents.x <= 1024 && extents.y <= 1024);
+            }
+
+
+            // Scale the charts to use the entire texel area available. So, if the width is 0.1 we could scale it to 1 without increasing the lightmap usage and making a better 
+            // use of it. In many cases this also improves the look of the seams, since vertices on the chart boundaries have more chances of being aligned with the texel centers.
+
+            float scale_x = 1.0f;
+            float scale_y = 1.0f;
+
+            float divide_x = 1.0f;
+            float divide_y = 1.0f;
+
+            if (extents.x > 0) {
+                int cw = ftoi_ceil(extents.x);
+
+                if (blockAligned) {
+                    // Align all chart extents to 4x4 blocks, but taking padding into account.
+                    if (conservative) {
+                        cw = align(cw + 2, 4) - 2;
+                    }
+                    else {
+                        cw = align(cw + 1, 4) - 1;
+                    }
+                }
+
+                scale_x = (float(cw) - NV_EPSILON);
+                divide_x = extents.x;
+                extents.x = float(cw);
+            }
+
+            if (extents.y > 0) {
+                int ch = ftoi_ceil(extents.y);
+
+                if (blockAligned) {
+                    // Align all chart extents to 4x4 blocks, but taking padding into account.
+                    if (conservative) {
+                        ch = align(ch + 2, 4) - 2;
+                    }
+                    else {
+                        ch = align(ch + 1, 4) - 1;
+                    }
+                }
+
+                scale_y = (float(ch) - NV_EPSILON);
+                divide_y = extents.y;
+                extents.y = float(ch);
+            }
+
+            for (uint v = 0; v < vertexCount; v++) {
+                HalfEdge::Vertex * vertex = mesh->vertexAt(v);
+
+                vertex->tex.x /= divide_x;
+                vertex->tex.y /= divide_y;
+                vertex->tex.x *= scale_x;
+                vertex->tex.y *= scale_y;
+
+				nvCheck(isFinite(vertex->tex.x) && isFinite(vertex->tex.y));
+            }
+        }
+
+        chartExtents[c] = extents;
+
+        // Sort charts by perimeter.
+        chartOrderArray[c] = extents.x + extents.y;
+    }
+
+    // @@ We can try to improve compression of small charts by sorting them by proximity like we do with vertex samples.
+    // @@ How to do that? One idea: compute chart centroid, insert into grid, compute morton index of the cell, sort based on morton index.
+    // @@ We would sort by morton index, first, then quantize the chart sizes, so that all small charts have the same size, and sort by size preserving the morton order.
+
+    //nvDebug("Sorting charts.\n");
+
+    // Sort charts by area.
+    m_radix.sort(chartOrderArray);
+    const uint32 * ranks = m_radix.ranks();
+
+    // Estimate size of the map based on the mesh surface area and given texel scale.
+    float texelCount = meshArea * square(texelsPerUnit) / 0.75f; // Assume 75% utilization.
+    if (texelCount < 1) texelCount = 1;
+    uint approximateExtent = nextPowerOfTwo(uint(sqrtf(texelCount)));
+
+    //nvDebug("Init bitmap.\n");
+
+    // @@ Pack all charts smaller than a texel into a compact rectangle.
+    // @@ Start considering only 1x1 charts. Extend to 1xn charts later.
+
+    /*for (uint i = 0; i < chartCount; i++)
+    {
+        uint c = ranks[chartCount - i - 1]; // largest chart first
+
+        Chart * chart = m_atlas->chartAt(c);
+
+        if (!chart->isDisk()) continue;
+
+        if (iceil(chartExtents[c].x) == 1 && iceil(chartExtents[c].x) == 1) {
+            // @@ Add to 
+        }
+    }*/
+
+
+
+    // Init bit map.
+    m_bitmap.clearAll();
+    if (approximateExtent > m_bitmap.width()) {
+        m_bitmap.resize(approximateExtent, approximateExtent, false);
+        m_debug_bitmap.resize(approximateExtent, approximateExtent);
+        m_debug_bitmap.fill(Color32(0,0,0,0));
+    }
+
+    
+    int w = 0;
+    int h = 0;
+
+#if 1
+    // Add sorted charts to bitmap.
+    for (uint i = 0; i < chartCount; i++)
+    {
+        uint c = ranks[chartCount - i - 1]; // largest chart first
+
+        Chart * chart = m_atlas->chartAt(c);
+
+        if (!chart->isVertexMapped() && !chart->isDisk()) continue;
+
+        //float scale_x = 1;
+        //float scale_y = 1;
+
+        BitMap chart_bitmap;
+
+        if (chart->isVertexMapped()) {
+            // Init all bits to 1.
+            chart_bitmap.resize(ftoi_ceil(chartExtents[c].x), ftoi_ceil(chartExtents[c].y), /*initValue=*/true);
+
+            // @@ Another alternative would be to try to map each vertex to a different texel trying to fill all the available unused texels.
+        }
+        else {
+            // @@ Add special cases for dot and line charts. @@ Lightmap rasterizer also needs to handle these special cases.
+            // @@ We could also have a special case for chart quads. If the quad surface <= 4 texels, align vertices with texel centers and do not add padding. May be very useful for foliage.
+
+            // @@ In general we could reduce the padding of all charts by one texel by using a rasterizer that takes into account the 2-texel footprint of the tent bilinear filter. For example,
+            // if we have a chart that is less than 1 texel wide currently we add one texel to the left and one texel to the right creating a 3-texel-wide bitmap. However, if we know that the 
+            // chart is only 1 texel wide we could align it so that it only touches the footprint of two texels:
+
+            //      |   |      <- Touches texels 0, 1 and 2.
+            //    |   |        <- Only touches texels 0 and 1.
+            // \   \ / \ /   /
+            //  \   X   X   /
+            //   \ / \ / \ /
+            //    V   V   V
+            //    0   1   2
+
+            if (conservative) {
+                // Init all bits to 0.
+                chart_bitmap.resize(ftoi_ceil(chartExtents[c].x) + 2, ftoi_ceil(chartExtents[c].y) + 2, /*initValue=*/false);  // + 2 to add padding on both sides.
+
+                // Rasterize chart and dilate.
+                drawChartBitmapDilate(chart, &chart_bitmap, /*padding=*/1);
+            }
+            else {
+                // Init all bits to 0.
+                chart_bitmap.resize(ftoi_ceil(chartExtents[c].x) + 1, ftoi_ceil(chartExtents[c].y) + 1, /*initValue=*/false);  // Add half a texels on each side.
+
+                // Rasterize chart and dilate.
+                drawChartBitmap(chart, &chart_bitmap, Vector2(1), Vector2(0.5));
+            }
+        }
+
+        int best_x, best_y;
+        int best_cw, best_ch;   // Includes padding now.
+        int best_r;
+        findChartLocation(quality, &chart_bitmap, chartExtents[c], w, h, &best_x, &best_y, &best_cw, &best_ch, &best_r);
+        
+        /*if (w < best_x + best_cw || h < best_y + best_ch)
+        {
+            nvDebug("Resize extents to (%d, %d).\n", best_x + best_cw, best_y + best_ch);
+        }*/
+
+        // Update parametric extents.
+        w = max(w, best_x + best_cw);
+        h = max(h, best_y + best_ch);
+        
+        w = align(w, 4);
+        h = align(h, 4);
+
+        // Resize bitmap if necessary.
+        if (uint(w) > m_bitmap.width() || uint(h) > m_bitmap.height())
+        {
+            //nvDebug("Resize bitmap (%d, %d).\n", nextPowerOfTwo(w), nextPowerOfTwo(h));
+            m_bitmap.resize(nextPowerOfTwo(U32(w)), nextPowerOfTwo(U32(h)), false);
+            m_debug_bitmap.resize(nextPowerOfTwo(U32(w)), nextPowerOfTwo(U32(h)));
+        }
+
+        //nvDebug("Add chart at (%d, %d).\n", best_x, best_y);
+
+        addChart(&chart_bitmap, w, h, best_x, best_y, best_r, /*debugOutput=*/NULL);
+
+        // IC: Output chart again to debug bitmap.
+        if (chart->isVertexMapped()) {
+            addChart(&chart_bitmap, w, h, best_x, best_y, best_r, &m_debug_bitmap);
+        }
+        else {
+            addChart(chart, w, h, best_x, best_y, best_r, &m_debug_bitmap);
+        }
+
+        //float best_angle = 2 * PI * best_r;
+
+        // Translate and rotate chart texture coordinates.
+        HalfEdge::Mesh * mesh = chart->chartMesh();
+        const uint vertexCount = mesh->vertexCount();
+        for (uint v = 0; v < vertexCount; v++)
+        {
+            HalfEdge::Vertex * vertex = mesh->vertexAt(v);
+
+            Vector2 t = vertex->tex;
+            if (best_r) swap(t.x, t.y);
+            //vertex->tex.x = best_x + t.x * cosf(best_angle) - t.y * sinf(best_angle);
+            //vertex->tex.y = best_y + t.x * sinf(best_angle) + t.y * cosf(best_angle);
+
+            vertex->tex.x = best_x + t.x + 0.5f;
+            vertex->tex.y = best_y + t.y + 0.5f;
+
+            nvCheck(vertex->tex.x >= 0 && vertex->tex.y >= 0);
+			nvCheck(isFinite(vertex->tex.x) && isFinite(vertex->tex.y));
+        }
+
+#if DEBUG_OUTPUT && 0
+        StringBuilder fileName;
+        fileName.format("debug_packer_%d.tga", i);
+        //outputDebugBitmap(fileName.str(), m_bitmap, w, h);
+        outputDebugImage(fileName.str(), m_debug_bitmap, w, h);
+#endif
+    }
+
+#else // 0
+
+    // Add sorted charts to bitmap.
+    for (uint i = 0; i < chartCount; i++)
+    {
+        uint c = ranks[chartCount - i - 1]; // largest chart first
+
+        Chart * chart = m_atlas->chartAt(c);
+
+        if (!chart->isDisk()) continue;
+
+        Vector2 scale(1, 1);
+
+#if 0 // old method.
+        //m_padding_x = 2*padding;
+        //m_padding_y = 2*padding;
+#else
+        //m_padding_x = 0; //padding;
+        //m_padding_y = 0; //padding;
+#endif
+
+        int bw = ftoi_ceil(chartExtents[c].x + 1);
+        int bh = ftoi_ceil(chartExtents[c].y + 1);
+
+        if (chartExtents[c].x < 1.0f) {
+            scale.x = 0.01f; // @@ Ideally we would like to scale it to 0, but then our rasterizer would not touch any pixels.
+            bw = 1;
+        }
+        if (chartExtents[c].y < 1.0f) {
+            scale.y = 0.01f;
+            bh = 1;
+        }
+
+        //BitMap chart_bitmap(iceil(chartExtents[c].x) + 1 + m_padding_x * 2, iceil(chartExtents[c].y) + 1 + m_padding_y * 2);
+        //BitMap chart_bitmap(ftoi_ceil(chartExtents[c].x/2)*2, ftoi_ceil(chartExtents[c].y/2)*2);
+        BitMap chart_bitmap(bw, bh);
+        chart_bitmap.clearAll();
+        
+        Vector2 offset;
+        offset.x = 0; // (chart_bitmap.width() - chartExtents[c].x) * 0.5f;
+        offset.y = 0; // (chart_bitmap.height() - chartExtents[c].y) * 0.5f;
+
+        drawChartBitmap(chart, &chart_bitmap, scale, offset);
+
+        int best_x, best_y;
+        int best_cw, best_ch;
+        int best_r;
+        findChartLocation(quality, &chart_bitmap, chartExtents[c], w, h, &best_x, &best_y, &best_cw, &best_ch, &best_r);
+
+        /*if (w < best_x + best_cw || h < best_y + best_ch)
+        {
+            nvDebug("Resize extents to (%d, %d).\n", best_x + best_cw, best_y + best_ch);
+        }*/
+
+        // Update parametric extents.
+        w = max(w, best_x + best_cw);
+        h = max(h, best_y + best_ch);
+
+        // Resize bitmap if necessary.
+        if (uint(w) > m_bitmap.width() || uint(h) > m_bitmap.height())
+        {
+            //nvDebug("Resize bitmap (%d, %d).\n", nextPowerOfTwo(w), nextPowerOfTwo(h));
+            m_bitmap.resize(nextPowerOfTwo(w), nextPowerOfTwo(h), false);
+            m_debug_bitmap.resize(nextPowerOfTwo(w), nextPowerOfTwo(h));
+        }
+
+        //nvDebug("Add chart at (%d, %d).\n", best_x, best_y);
+
+#if 0 // old method.
+#if _DEBUG
+        checkCanAddChart(chart, w, h, best_x, best_y, best_r);
+#endif
+
+        // Add chart.
+        addChart(chart, w, h, best_x, best_y, best_r);
+#else
+        // Add chart reusing its bitmap.
+        addChart(&chart_bitmap, w, h, best_x, best_y, best_r);
+#endif
+
+        //float best_angle = 2 * PI * best_r;
+
+        // Translate and rotate chart texture coordinates.
+        HalfEdge::Mesh * mesh = chart->chartMesh();
+        const uint vertexCount = mesh->vertexCount();
+        for (uint v = 0; v < vertexCount; v++)
+        {
+            HalfEdge::Vertex * vertex = mesh->vertexAt(v);
+
+            Vector2 t = vertex->tex * scale + offset;
+            if (best_r) swap(t.x, t.y);
+            //vertex->tex.x = best_x + t.x * cosf(best_angle) - t.y * sinf(best_angle);
+            //vertex->tex.y = best_y + t.x * sinf(best_angle) + t.y * cosf(best_angle);
+            vertex->tex.x = best_x + t.x + 0.5f;
+            vertex->tex.y = best_y + t.y + 0.5f;
+
+            nvCheck(vertex->tex.x >= 0 && vertex->tex.y >= 0);
+        }
+
+#if DEBUG_OUTPUT && 0
+        StringBuilder fileName;
+        fileName.format("debug_packer_%d.tga", i);
+        //outputDebugBitmap(fileName.str(), m_bitmap, w, h);
+        outputDebugImage(fileName.str(), m_debug_bitmap, w, h);
+#endif
+    }
+
+#endif // 0
+
+    //w -= padding - 1; // Leave one pixel border!
+    //h -= padding - 1;
+
+    m_width = max(0, w);
+    m_height = max(0, h);
+
+    nvCheck(isAligned(m_width, 4));
+    nvCheck(isAligned(m_height, 4));
+
+    m_debug_bitmap.resize(m_width, m_height);
+    m_debug_bitmap.setFormat(Image::Format_ARGB);
+
+#if DEBUG_OUTPUT
+    //outputDebugBitmap("debug_packer_final.tga", m_bitmap, w, h);
+    //outputDebugImage("debug_packer_final.tga", m_debug_bitmap, w, h);
+    ImageIO::save("debug_packer_final.tga", &m_debug_bitmap);
+#endif
+}
+
+
+// IC: Brute force is slow, and random may take too much time to converge. We start inserting large charts in a small atlas. Using brute force is lame, because most of the space 
+// is occupied at this point. At the end we have many small charts and a large atlas with sparse holes. Finding those holes randomly is slow. A better approach would be to 
+// start stacking large charts as if they were tetris pieces. Once charts get small try to place them randomly. It may be interesting to try a intermediate strategy, first try 
+// along one axis and then try exhaustively along that axis.
+void AtlasPacker::findChartLocation(int quality, const BitMap * bitmap, Vector2::Arg extents, int w, int h, int * best_x, int * best_y, int * best_w, int * best_h, int * best_r)
+{
+    int attempts = 256;
+    if (quality == 1) attempts = 4096;
+    if (quality == 2) attempts = 2048;
+    if (quality == 3) attempts = 1024;
+    if (quality == 4) attempts = 512;
+
+    if (quality == 0 || w*h < attempts)
+    {
+        findChartLocation_bruteForce(bitmap, extents, w, h, best_x, best_y, best_w, best_h, best_r);
+    }
+    else
+    {
+        findChartLocation_random(bitmap, extents, w, h, best_x, best_y, best_w, best_h, best_r, attempts);
+    }
+}
+
+#define BLOCK_SIZE 4
+
+void AtlasPacker::findChartLocation_bruteForce(const BitMap * bitmap, Vector2::Arg extents, int w, int h, int * best_x, int * best_y, int * best_w, int * best_h, int * best_r)
+{
+    int best_metric = INT_MAX;
+
+    // Try two different orientations.
+    for (int r = 0; r < 2; r++)
+    {
+        int cw = bitmap->width();
+        int ch = bitmap->height();
+        if (r & 1) swap(cw, ch);
+
+        for (int y = 0; y <= h + 1; y += BLOCK_SIZE) // + 1 to extend atlas in case atlas full.
+        {
+            for (int x = 0; x <= w + 1; x += BLOCK_SIZE) // + 1 not really necessary here.
+            {
+                // Early out.
+                int area = max(w, x+cw) * max(h, y+ch);
+                //int perimeter = max(w, x+cw) + max(h, y+ch);
+                int extents = max(max(w, x+cw), max(h, y+ch));
+
+                int metric = extents*extents + area;
+
+                if (metric > best_metric) {
+                    continue;
+                }
+                if (metric == best_metric && max(x, y) >= max(*best_x, *best_y)) {
+                    // If metric is the same, pick the one closest to the origin.
+                    continue;
+                }
+
+                if (canAddChart(bitmap, w, h, x, y, r))
+                {
+                    best_metric = metric;
+                    *best_x = x;
+                    *best_y = y;
+                    *best_w = cw;
+                    *best_h = ch;
+                    *best_r = r;
+
+                    if (area == w*h)
+                    {
+                        // Chart is completely inside, do not look at any other location.
+                        goto done;
+                    }
+                }
+            }
+        }
+    }
+
+done:
+    nvDebugCheck (best_metric != INT_MAX);
+}
+
+
+void AtlasPacker::findChartLocation_random(const BitMap * bitmap, Vector2::Arg extents, int w, int h, int * best_x, int * best_y, int * best_w, int * best_h, int * best_r, int minTrialCount)
+{
+    int best_metric = INT_MAX;
+
+    for (int i = 0; i < minTrialCount || best_metric == INT_MAX; i++)
+    {
+        int r = m_rand.getRange(1);
+        int x = m_rand.getRange(w + 1); // + 1 to extend atlas in case atlas full. We may want to use a higher number to increase probability of extending atlas.
+        int y = m_rand.getRange(h + 1); // + 1 to extend atlas in case atlas full.
+
+        x = align(x, BLOCK_SIZE);
+        y = align(y, BLOCK_SIZE);
+
+        int cw = bitmap->width();
+        int ch = bitmap->height();
+        if (r & 1) swap(cw, ch);
+
+        // Early out.
+        int area = max(w, x+cw) * max(h, y+ch);
+        //int perimeter = max(w, x+cw) + max(h, y+ch);
+        int extents = max(max(w, x+cw), max(h, y+ch));
+
+        int metric = extents*extents + area;
+
+        if (metric > best_metric) {
+            continue;
+        }
+        if (metric == best_metric && min(x, y) > min(*best_x, *best_y)) {
+            // If metric is the same, pick the one closest to the origin.
+            continue;
+        }
+
+        if (canAddChart(bitmap, w, h, x, y, r))
+        {
+            best_metric = metric;
+            *best_x = x;
+            *best_y = y;
+            *best_w = cw;
+            *best_h = ch;
+            *best_r = r;
+
+            if (area == w*h)
+            {
+                // Chart is completely inside, do not look at any other location.
+                break;
+            }
+        }
+    }
+}
+
+
+void AtlasPacker::drawChartBitmapDilate(const Chart * chart, BitMap * bitmap, int padding)
+{
+    const int w = bitmap->width();
+    const int h = bitmap->height();
+    const Vector2 extents = Vector2(float(w), float(h));
+    
+    // Rasterize chart faces, check that all bits are not set.
+    const uint faceCount = chart->faceCount();
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = chart->chartMesh()->faceAt(f);
+        
+        Vector2 vertices[4];
+
+        uint edgeCount = 0;
+        for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            if (edgeCount < 4)
+            {
+                vertices[edgeCount] = it.vertex()->tex + Vector2(0.5) + Vector2(float(padding), float(padding));
+            }
+            edgeCount++;
+        }
+
+        if (edgeCount == 3)
+        {
+            Raster::drawTriangle(Raster::Mode_Antialiased, extents, true, vertices, AtlasPacker::setBitsCallback, bitmap);
+        }
+        else
+        {
+            Raster::drawQuad(Raster::Mode_Antialiased, extents, true, vertices, AtlasPacker::setBitsCallback, bitmap);
+        }
+    }
+
+    // Expand chart by padding pixels. (dilation)
+    BitMap tmp(w, h);
+    for (int i = 0; i < padding; i++) {
+        tmp.clearAll();
+
+        for (int y = 0; y < h; y++) {
+            for (int x = 0; x < w; x++) {
+                bool b = bitmap->bitAt(x, y);
+                if (!b) {
+                    if (x > 0) {
+                        b |= bitmap->bitAt(x - 1, y);
+                        if (y > 0) b |= bitmap->bitAt(x - 1, y - 1);
+                        if (y < h-1) b |= bitmap->bitAt(x - 1, y + 1);
+                    }
+                    if (y > 0) b |= bitmap->bitAt(x, y - 1);
+                    if (y < h-1) b |= bitmap->bitAt(x, y + 1);
+                    if (x < w-1) {
+                        b |= bitmap->bitAt(x + 1, y);
+                        if (y > 0) b |= bitmap->bitAt(x + 1, y - 1);
+                        if (y < h-1) b |= bitmap->bitAt(x + 1, y + 1);
+                    }
+                }
+                if (b) tmp.setBitAt(x, y);
+            }
+        }
+
+        swap(tmp, *bitmap);
+    }
+}
+
+
+void AtlasPacker::drawChartBitmap(const Chart * chart, BitMap * bitmap, const Vector2 & scale, const Vector2 & offset)
+{
+    const int w = bitmap->width();
+    const int h = bitmap->height();
+    const Vector2 extents = Vector2(float(w), float(h));
+    
+    static const Vector2 pad[4] = {
+        Vector2(-0.5, -0.5),
+        Vector2(0.5, -0.5),
+        Vector2(-0.5, 0.5),
+        Vector2(0.5, 0.5)
+    };
+    /*static const Vector2 pad[4] = {
+        Vector2(-1, -1),
+        Vector2(1, -1),
+        Vector2(-1, 1),
+        Vector2(1, 1)
+    };*/
+
+    // Rasterize 4 times to add proper padding.
+    for (int i = 0; i < 4; i++) {
+
+        // Rasterize chart faces, check that all bits are not set.
+        const uint faceCount = chart->chartMesh()->faceCount();
+        for (uint f = 0; f < faceCount; f++)
+        {
+            const HalfEdge::Face * face = chart->chartMesh()->faceAt(f);
+            
+            Vector2 vertices[4];
+
+            uint edgeCount = 0;
+            for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+            {
+                if (edgeCount < 4)
+                {
+                    vertices[edgeCount] = it.vertex()->tex * scale + offset + pad[i];
+                    nvCheck(ftoi_ceil(vertices[edgeCount].x) >= 0);
+                    nvCheck(ftoi_ceil(vertices[edgeCount].y) >= 0);
+                    nvCheck(ftoi_ceil(vertices[edgeCount].x) <= w);
+                    nvCheck(ftoi_ceil(vertices[edgeCount].y) <= h);
+                }
+                edgeCount++;
+            }
+
+            if (edgeCount == 3)
+            {
+                Raster::drawTriangle(Raster::Mode_Antialiased, extents, /*enableScissors=*/true, vertices, AtlasPacker::setBitsCallback, bitmap);
+            }
+            else
+            {
+                Raster::drawQuad(Raster::Mode_Antialiased, extents, /*enableScissors=*/true, vertices, AtlasPacker::setBitsCallback, bitmap);
+            }
+        }
+    }
+
+    // @@ This only allows us to expand the size in texel intervals.
+    /*if (m_padding_x != 0 && m_padding_y != 0)*/ {
+
+        // Expand chart by padding pixels. (dilation)
+        BitMap tmp(w, h);
+        //for (int i = 0; i < 1; i++) {
+            tmp.clearAll();
+
+            for (int y = 0; y < h; y++) {
+                for (int x = 0; x < w; x++) {
+                    bool b = bitmap->bitAt(x, y);
+                    if (!b) {
+                        if (x > 0) {
+                            b |= bitmap->bitAt(x - 1, y);
+                            if (y > 0) b |= bitmap->bitAt(x - 1, y - 1);
+                            if (y < h-1) b |= bitmap->bitAt(x - 1, y + 1);
+                        }
+                        if (y > 0) b |= bitmap->bitAt(x, y - 1);
+                        if (y < h-1) b |= bitmap->bitAt(x, y + 1);
+                        if (x < w-1) {
+                            b |= bitmap->bitAt(x + 1, y);
+                            if (y > 0) b |= bitmap->bitAt(x + 1, y - 1);
+                            if (y < h-1) b |= bitmap->bitAt(x + 1, y + 1);
+                        }
+                    }
+                    if (b) tmp.setBitAt(x, y);
+                }
+            }
+
+            swap(tmp, *bitmap);
+        //}
+    }
+}
+
+bool AtlasPacker::canAddChart(const BitMap * bitmap, int atlas_w, int atlas_h, int offset_x, int offset_y, int r)
+{
+    nvDebugCheck(r == 0 || r == 1);
+
+    // Check whether the two bitmaps overlap.
+
+    const int w = bitmap->width();
+    const int h = bitmap->height();
+    
+    if (r == 0) {
+        for (int y = 0; y < h; y++) {
+            int yy = y + offset_y;
+            if (yy >= 0) {
+                for (int x = 0; x < w; x++) {
+                    int xx = x + offset_x;
+                    if (xx >= 0) {
+                        if (bitmap->bitAt(x, y)) {
+                            if (xx < atlas_w && yy < atlas_h) {
+                                if (m_bitmap.bitAt(xx, yy)) return false;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    else if (r == 1) {
+        for (int y = 0; y < h; y++) {
+            int xx = y + offset_x;
+            if (xx >= 0) {
+                for (int x = 0; x < w; x++) {
+                    int yy = x + offset_y;
+                    if (yy >= 0) {
+                        if (bitmap->bitAt(x, y)) {
+                            if (xx < atlas_w && yy < atlas_h) {
+                                if (m_bitmap.bitAt(xx, yy)) return false;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    return true;
+}
+
+#if 0
+void AtlasPacker::checkCanAddChart(const Chart * chart, int w, int h, int x, int y, int r)
+{
+    nvDebugCheck(r == 0 || r == 1);
+    Vector2 extents = Vector2(float(w), float(h));
+    Vector2 offset = Vector2(float(x), float(y));
+
+    // Rasterize chart faces, set bits.
+    const uint faceCount = chart->faceCount();
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = chart->chartMesh()->faceAt(f);
+        
+        Vector2 vertices[4];
+
+        uint edgeCount = 0;
+        for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            if (edgeCount < 4)
+            {
+                Vector2 t = it.vertex()->tex;
+                if (r == 1) swap(t.x, t.y);
+                vertices[edgeCount] = t + offset;
+            }
+            edgeCount++;
+        }
+
+        if (edgeCount == 3)
+        {
+            Raster::drawTriangle(Raster::Mode_Antialiased, extents, /*enableScissors=*/true, vertices, AtlasPacker::checkBitsCallback, &m_bitmap);
+        }
+        else
+        {
+            Raster::drawQuad(Raster::Mode_Antialiased, extents, /*enableScissors=*/true, vertices, AtlasPacker::checkBitsCallback, &m_bitmap);
+        }
+    }
+}
+#endif // 0
+
+
+static Color32 chartColor = Color32(0);
+static void selectRandomColor(MTRand & rand) {
+    // Pick random color for this chart. @@ Select random hue, but fixed saturation/luminance?
+    chartColor.r = 128 + rand.getRange(127);
+    chartColor.g = 128 + rand.getRange(127);
+    chartColor.b = 128 + rand.getRange(127);
+    chartColor.a = 255;
+}
+static bool debugDrawCallback(void * param, int x, int y, Vector3::Arg, Vector3::Arg, Vector3::Arg, float area)
+{
+    Image * image = (Image *)param;
+
+    if (area > 0.0) {
+        Color32 c = image->pixel(x, y);
+        c.r = chartColor.r;
+        c.g = chartColor.g;
+        c.b = chartColor.b;
+        c.a += U8(ftoi_round(0.5f * area * 255));
+        image->pixel(x, y) = c;
+    }
+
+    return true;
+}
+
+void AtlasPacker::addChart(const Chart * chart, int w, int h, int x, int y, int r, Image * debugOutput)
+{
+    nvDebugCheck(r == 0 || r == 1);
+
+    nvDebugCheck(debugOutput != NULL);
+    selectRandomColor(m_rand);
+
+    Vector2 extents = Vector2(float(w), float(h));
+    Vector2 offset = Vector2(float(x), float(y)) + Vector2(0.5);
+
+    // Rasterize chart faces, set bits.
+    const uint faceCount = chart->faceCount();
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = chart->chartMesh()->faceAt(f);
+        
+        Vector2 vertices[4];
+
+        uint edgeCount = 0;
+        for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            if (edgeCount < 4)
+            {
+                Vector2 t = it.vertex()->tex;
+                if (r == 1) swap(t.x, t.y);
+                vertices[edgeCount] = t + offset;
+            }
+            edgeCount++;
+        }
+
+        if (edgeCount == 3)
+        {
+            Raster::drawTriangle(Raster::Mode_Antialiased, extents, /*enableScissors=*/true, vertices, debugDrawCallback, debugOutput);
+        }
+        else
+        {
+            Raster::drawQuad(Raster::Mode_Antialiased, extents, /*enableScissors=*/true, vertices, debugDrawCallback, debugOutput);
+        }
+    }
+}
+
+
+void AtlasPacker::addChart(const BitMap * bitmap, int atlas_w, int atlas_h, int offset_x, int offset_y, int r, Image * debugOutput)
+{
+    nvDebugCheck(r == 0 || r == 1);
+
+    // Check whether the two bitmaps overlap.
+
+    const int w = bitmap->width();
+    const int h = bitmap->height();
+
+    if (debugOutput != NULL) {
+        selectRandomColor(m_rand);
+    }
+
+    if (r == 0) {
+        for (int y = 0; y < h; y++) {
+            int yy = y + offset_y;
+            if (yy >= 0) {
+                for (int x = 0; x < w; x++) {
+                    int xx = x + offset_x;
+                    if (xx >= 0) {
+                        if (bitmap->bitAt(x, y)) {
+                            if (xx < atlas_w && yy < atlas_h) {
+                                if (debugOutput) debugOutput->pixel(xx, yy) = chartColor;
+                                else {
+                                    nvDebugCheck(m_bitmap.bitAt(xx, yy) == false);
+                                    m_bitmap.setBitAt(xx, yy);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    else if (r == 1) {
+        for (int y = 0; y < h; y++) {
+            int xx = y + offset_x;
+            if (xx >= 0) {
+                for (int x = 0; x < w; x++) {
+                    int yy = x + offset_y;
+                    if (yy >= 0) {
+                        if (bitmap->bitAt(x, y)) {
+                            if (xx < atlas_w && yy < atlas_h) {
+                                if (debugOutput) debugOutput->pixel(xx, yy) = chartColor;
+                                else {
+                                    nvDebugCheck(m_bitmap.bitAt(xx, yy) == false);
+                                    m_bitmap.setBitAt(xx, yy);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+
+/*static*/ bool AtlasPacker::checkBitsCallback(void * param, int x, int y, Vector3::Arg, Vector3::Arg, Vector3::Arg, float)
+{
+    BitMap * bitmap = (BitMap * )param;
+
+    nvDebugCheck(bitmap->bitAt(x, y) == false);
+
+    return true;
+}
+
+/*static*/ bool AtlasPacker::setBitsCallback(void * param, int x, int y, Vector3::Arg, Vector3::Arg, Vector3::Arg, float area)
+{
+    BitMap * bitmap = (BitMap * )param;
+
+    if (area > 0.0) {
+        bitmap->setBitAt(x, y);
+    }
+
+    return true;
+}
+
+
+
+float AtlasPacker::computeAtlasUtilization() const {
+    const uint w = m_width;
+    const uint h = m_height;
+    nvDebugCheck(w <= m_bitmap.width());
+    nvDebugCheck(h <= m_bitmap.height());
+
+    uint count = 0;
+    for (uint y = 0; y < h; y++) {
+        for (uint x = 0; x < w; x++) {
+            count += m_bitmap.bitAt(x, y);
+        }
+    }
+
+    return float(count) / (w * h);
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/param/AtlasPacker.h b/thirdparty/thekla_atlas/nvmesh/param/AtlasPacker.h
new file mode 100644
index 0000000000..2d305f38cd
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/AtlasPacker.h
@@ -0,0 +1,63 @@
+// This code is in the public domain -- castano@gmail.com
+
+#pragma once
+#ifndef NV_MESH_ATLASPACKER_H
+#define NV_MESH_ATLASPACKER_H
+
+#include "nvcore/RadixSort.h"
+#include "nvmath/Vector.h"
+#include "nvmath/Random.h"
+#include "nvimage/BitMap.h"
+#include "nvimage/Image.h"
+
+#include "nvmesh/nvmesh.h"
+
+
+namespace nv
+{
+    class Atlas;
+    class Chart;
+
+    struct AtlasPacker
+    {
+        AtlasPacker(Atlas * atlas);
+        ~AtlasPacker();
+
+        void packCharts(int quality, float texelArea, bool blockAligned, bool conservative);
+        float computeAtlasUtilization() const;
+
+    private:
+
+        void findChartLocation(int quality, const BitMap * bitmap, Vector2::Arg extents, int w, int h, int * best_x, int * best_y, int * best_w, int * best_h, int * best_r);
+        void findChartLocation_bruteForce(const BitMap * bitmap, Vector2::Arg extents, int w, int h, int * best_x, int * best_y, int * best_w, int * best_h, int * best_r);
+        void findChartLocation_random(const BitMap * bitmap, Vector2::Arg extents, int w, int h, int * best_x, int * best_y, int * best_w, int * best_h, int * best_r, int minTrialCount);
+
+        void drawChartBitmapDilate(const Chart * chart, BitMap * bitmap, int padding);
+        void drawChartBitmap(const Chart * chart, BitMap * bitmap, const Vector2 & scale, const Vector2 & offset);
+        
+        bool canAddChart(const BitMap * bitmap, int w, int h, int x, int y, int r);
+        void addChart(const BitMap * bitmap, int w, int h, int x, int y, int r, Image * debugOutput);
+        //void checkCanAddChart(const Chart * chart, int w, int h, int x, int y, int r);
+        void addChart(const Chart * chart, int w, int h, int x, int y, int r, Image * debugOutput);
+        
+
+        static bool checkBitsCallback(void * param, int x, int y, Vector3::Arg bar, Vector3::Arg dx, Vector3::Arg dy, float coverage);
+        static bool setBitsCallback(void * param, int x, int y, Vector3::Arg bar, Vector3::Arg dx, Vector3::Arg dy, float coverage);
+
+    private:
+
+        Atlas * m_atlas;
+        BitMap m_bitmap;
+        Image m_debug_bitmap;
+        RadixSort m_radix;
+
+        uint m_width;
+        uint m_height;
+        
+        MTRand m_rand;
+       
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_ATLASPACKER_H
diff --git a/thirdparty/thekla_atlas/nvmesh/param/LeastSquaresConformalMap.cpp b/thirdparty/thekla_atlas/nvmesh/param/LeastSquaresConformalMap.cpp
new file mode 100644
index 0000000000..cd1e8bbb7b
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/LeastSquaresConformalMap.cpp
@@ -0,0 +1,483 @@
+// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
+
+#include "nvmesh.h" // pch
+
+#include "LeastSquaresConformalMap.h"
+#include "ParameterizationQuality.h"
+#include "Util.h"
+
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Vertex.h"
+#include "nvmesh/halfedge/Face.h"
+
+#include "nvmath/Sparse.h"
+#include "nvmath/Solver.h"
+#include "nvmath/Vector.inl"
+
+#include "nvcore/Array.inl"
+
+
+using namespace nv;
+using namespace HalfEdge;
+
+namespace
+{
+
+    // Test all pairs of vertices in the boundary and check distance.
+    static void findDiameterVertices(HalfEdge::Mesh * mesh, HalfEdge::Vertex ** a, HalfEdge::Vertex ** b)
+    {
+        nvDebugCheck(mesh != NULL);
+        nvDebugCheck(a != NULL);
+        nvDebugCheck(b != NULL);
+
+        const uint vertexCount = mesh->vertexCount();
+
+        float maxLength = 0.0f;
+
+        for (uint v0 = 1; v0 < vertexCount; v0++)
+        {
+            HalfEdge::Vertex * vertex0 = mesh->vertexAt(v0);
+            nvDebugCheck(vertex0 != NULL);
+
+            if (!vertex0->isBoundary()) continue;
+
+            for (uint v1 = 0; v1 < v0; v1++)
+            {
+                HalfEdge::Vertex * vertex1 = mesh->vertexAt(v1);
+                nvDebugCheck(vertex1 != NULL);
+
+                if (!vertex1->isBoundary()) continue;
+
+                float len = length(vertex0->pos - vertex1->pos);
+
+                if (len > maxLength)
+                {
+                    maxLength = len;
+
+                    *a = vertex0;
+                    *b = vertex1;
+                }
+            }
+        }
+
+        nvDebugCheck(*a != NULL && *b != NULL);
+    }
+
+    // Fast sweep in 3 directions
+    static bool findApproximateDiameterVertices(HalfEdge::Mesh * mesh, HalfEdge::Vertex ** a, HalfEdge::Vertex ** b)
+    {
+        nvDebugCheck(mesh != NULL);
+        nvDebugCheck(a != NULL);
+        nvDebugCheck(b != NULL);
+
+        const uint vertexCount = mesh->vertexCount();
+
+        HalfEdge::Vertex * minVertex[3];
+        HalfEdge::Vertex * maxVertex[3];
+
+        minVertex[0] = minVertex[1] = minVertex[2] = NULL;
+        maxVertex[0] = maxVertex[1] = maxVertex[2] = NULL;
+
+        for (uint v = 1; v < vertexCount; v++)
+        {
+            HalfEdge::Vertex * vertex = mesh->vertexAt(v);
+            nvDebugCheck(vertex != NULL);
+
+            if (vertex->isBoundary())
+            {
+                minVertex[0] = minVertex[1] = minVertex[2] = vertex;
+                maxVertex[0] = maxVertex[1] = maxVertex[2] = vertex;
+                break;
+            }
+        }
+
+        if (minVertex[0] == NULL)
+        {
+            // Input mesh has not boundaries.
+            return false;
+        }
+
+        for (uint v = 1; v < vertexCount; v++)
+        {
+            HalfEdge::Vertex * vertex = mesh->vertexAt(v);
+            nvDebugCheck(vertex != NULL);
+
+            if (!vertex->isBoundary())
+            {
+                // Skip interior vertices.
+                continue;
+            }
+
+            if (vertex->pos.x < minVertex[0]->pos.x) minVertex[0] = vertex;
+            else if (vertex->pos.x > maxVertex[0]->pos.x) maxVertex[0] = vertex;
+
+            if (vertex->pos.y < minVertex[1]->pos.y) minVertex[1] = vertex;
+            else if (vertex->pos.y > maxVertex[1]->pos.y) maxVertex[1] = vertex;
+
+            if (vertex->pos.z < minVertex[2]->pos.z) minVertex[2] = vertex;
+            else if (vertex->pos.z > maxVertex[2]->pos.z) maxVertex[2] = vertex;
+        }
+
+        float lengths[3];
+        for (int i = 0; i < 3; i++)
+        {
+            lengths[i] = length(minVertex[i]->pos - maxVertex[i]->pos);
+        }
+
+        if (lengths[0] > lengths[1] && lengths[0] > lengths[2])
+        {
+            *a = minVertex[0];
+            *b = maxVertex[0];
+        }
+        else if (lengths[1] > lengths[2])
+        {
+            *a = minVertex[1];
+            *b = maxVertex[1];
+        }
+        else
+        {
+            *a = minVertex[2];
+            *b = maxVertex[2];
+        }
+
+        return true;
+    }
+
+    // Conformal relations from Bruno Levy:
+
+    // Computes the coordinates of the vertices of a triangle
+    // in a local 2D orthonormal basis of the triangle's plane.
+    static void project_triangle(Vector3::Arg p0, Vector3::Arg p1, Vector3::Arg p2, Vector2 * z0, Vector2 * z1, Vector2 * z2)
+    {
+        Vector3 X = normalize(p1 - p0, 0.0f);
+        Vector3 Z = normalize(cross(X, (p2 - p0)), 0.0f);
+        Vector3 Y = normalize(cross(Z, X), 0.0f);
+
+        float x0 = 0.0f;
+        float y0 = 0.0f;
+        float x1 = length(p1 - p0);
+        float y1 = 0.0f;
+        float x2 = dot((p2 - p0), X);
+        float y2 = dot((p2 - p0), Y);
+
+        *z0 = Vector2(x0, y0);
+        *z1 = Vector2(x1, y1);
+        *z2 = Vector2(x2, y2);
+    }
+
+    // LSCM equation, geometric form :
+    // (Z1 - Z0)(U2 - U0) = (Z2 - Z0)(U1 - U0)
+    // Where Uk = uk + i.vk is the complex number 
+    //                       corresponding to (u,v) coords
+    //       Zk = xk + i.yk is the complex number 
+    //                       corresponding to local (x,y) coords
+    // cool: no divide with this expression,
+    //  makes it more numerically stable in
+    //  the presence of degenerate triangles.
+
+    static void setup_conformal_map_relations(SparseMatrix & A, int row, const HalfEdge::Vertex * v0, const HalfEdge::Vertex * v1, const HalfEdge::Vertex * v2)
+    {
+        int id0 = v0->id;
+        int id1 = v1->id;
+        int id2 = v2->id;
+
+        Vector3 p0 = v0->pos;
+        Vector3 p1 = v1->pos;
+        Vector3 p2 = v2->pos;
+
+        Vector2 z0, z1, z2;
+        project_triangle(p0, p1, p2, &z0, &z1, &z2);
+
+        Vector2 z01 = z1 - z0;
+        Vector2 z02 = z2 - z0;
+
+        float a = z01.x;
+        float b = z01.y;
+        float c = z02.x;
+        float d = z02.y;
+        nvCheck(b == 0.0f);
+
+        // Note  : 2*id + 0 --> u
+        //         2*id + 1 --> v
+        int u0_id = 2 * id0 + 0;
+        int v0_id = 2 * id0 + 1;
+        int u1_id = 2 * id1 + 0;
+        int v1_id = 2 * id1 + 1;
+        int u2_id = 2 * id2 + 0;
+        int v2_id = 2 * id2 + 1;
+
+        // Note : b = 0
+
+        // Real part
+        A.setCoefficient(u0_id, 2 * row + 0, -a+c);
+        A.setCoefficient(v0_id, 2 * row + 0,  b-d);
+        A.setCoefficient(u1_id, 2 * row + 0,   -c);
+        A.setCoefficient(v1_id, 2 * row + 0,    d);
+        A.setCoefficient(u2_id, 2 * row + 0,    a);
+
+        // Imaginary part
+        A.setCoefficient(u0_id, 2 * row + 1, -b+d);
+        A.setCoefficient(v0_id, 2 * row + 1, -a+c);
+        A.setCoefficient(u1_id, 2 * row + 1,   -d);
+        A.setCoefficient(v1_id, 2 * row + 1,   -c);
+        A.setCoefficient(v2_id, 2 * row + 1,    a);
+    }
+
+
+    // Conformal relations from Brecht Van Lommel (based on ABF):
+
+    static float vec_angle_cos(Vector3::Arg v1, Vector3::Arg v2, Vector3::Arg v3)
+    {
+        Vector3 d1 = v1 - v2;
+        Vector3 d2 = v3 - v2;
+        return clamp(dot(d1, d2) / (length(d1) * length(d2)), -1.0f, 1.0f);
+    }
+
+    static float vec_angle(Vector3::Arg v1, Vector3::Arg v2, Vector3::Arg v3)
+    {
+        float dot = vec_angle_cos(v1, v2, v3);
+        return acosf(dot);
+    }
+
+    static void triangle_angles(Vector3::Arg v1, Vector3::Arg v2, Vector3::Arg v3, float *a1, float *a2, float *a3)
+    {
+        *a1 = vec_angle(v3, v1, v2);
+        *a2 = vec_angle(v1, v2, v3);
+        *a3 = PI - *a2 - *a1;
+    }
+
+    static void triangle_cosines(Vector3::Arg v1, Vector3::Arg v2, Vector3::Arg v3, float *a1, float *a2, float *a3)
+    {
+        *a1 = vec_angle_cos(v3, v1, v2);
+        *a2 = vec_angle_cos(v1, v2, v3);
+        *a3 = vec_angle_cos(v2, v3, v1);
+    }
+
+    static void setup_abf_relations(SparseMatrix & A, int row, const HalfEdge::Vertex * v0, const HalfEdge::Vertex * v1, const HalfEdge::Vertex * v2)
+    {
+        int id0 = v0->id;
+        int id1 = v1->id;
+        int id2 = v2->id;
+
+        Vector3 p0 = v0->pos;
+        Vector3 p1 = v1->pos;
+        Vector3 p2 = v2->pos;
+
+#if 1
+        // @@ IC: Wouldn't it be more accurate to return cos and compute 1-cos^2?
+        // It does indeed seem to be a little bit more robust.
+        // @@ Need to revisit this more carefully!
+
+        float a0, a1, a2;
+        triangle_angles(p0, p1, p2, &a0, &a1, &a2);
+
+        float s0 = sinf(a0);
+        float s1 = sinf(a1);
+        float s2 = sinf(a2);
+
+        /*// Hack for degenerate triangles.
+        if (equal(s0, 0) && equal(s1, 0) && equal(s2, 0)) {
+            if (equal(a0, 0)) a0 += 0.001f;
+            if (equal(a1, 0)) a1 += 0.001f;
+            if (equal(a2, 0)) a2 += 0.001f;
+
+            if (equal(a0, PI)) a0 = PI - a1 - a2;
+            if (equal(a1, PI)) a1 = PI - a0 - a2;
+            if (equal(a2, PI)) a2 = PI - a0 - a1;
+
+            s0 = sinf(a0);
+            s1 = sinf(a1);
+            s2 = sinf(a2);
+        }*/
+
+        if (s1 > s0 && s1 > s2)
+        {
+            swap(s1, s2);
+            swap(s0, s1);
+
+            swap(a1, a2);
+            swap(a0, a1);
+
+            swap(id1, id2);
+            swap(id0, id1);
+        }
+        else if (s0 > s1 && s0 > s2)
+        {
+            swap(s0, s2);
+            swap(s0, s1);
+
+            swap(a0, a2);
+            swap(a0, a1);
+
+            swap(id0, id2);
+            swap(id0, id1);
+        }
+
+        float c0 = cosf(a0);
+#else
+        float c0, c1, c2;
+        triangle_cosines(p0, p1, p2, &c0, &c1, &c2);
+
+        float s0 = 1 - c0*c0;
+        float s1 = 1 - c1*c1;
+        float s2 = 1 - c2*c2;
+
+        nvDebugCheck(s0 != 0 || s1 != 0 || s2 != 0);
+
+        if (s1 > s0 && s1 > s2)
+        {
+            swap(s1, s2);
+            swap(s0, s1);
+
+            swap(c1, c2);
+            swap(c0, c1);
+
+            swap(id1, id2);
+            swap(id0, id1);
+        }
+        else if (s0 > s1 && s0 > s2)
+        {
+            swap(s0, s2);
+            swap(s0, s1);
+
+            swap(c0, c2);
+            swap(c0, c1);
+
+            swap(id0, id2);
+            swap(id0, id1);
+        }
+#endif
+
+        float ratio = (s2 == 0.0f) ? 1.0f: s1/s2;
+        float cosine = c0 * ratio;
+        float sine = s0 * ratio;
+
+        // Note  : 2*id + 0 --> u
+        //         2*id + 1 --> v
+        int u0_id = 2 * id0 + 0;
+        int v0_id = 2 * id0 + 1;
+        int u1_id = 2 * id1 + 0;
+        int v1_id = 2 * id1 + 1;
+        int u2_id = 2 * id2 + 0;
+        int v2_id = 2 * id2 + 1;
+
+        // Real part
+        A.setCoefficient(u0_id, 2 * row + 0, cosine - 1.0f);
+        A.setCoefficient(v0_id, 2 * row + 0, -sine);
+        A.setCoefficient(u1_id, 2 * row + 0, -cosine);
+        A.setCoefficient(v1_id, 2 * row + 0, sine);
+        A.setCoefficient(u2_id, 2 * row + 0, 1);
+
+        // Imaginary part
+        A.setCoefficient(u0_id, 2 * row + 1, sine);
+        A.setCoefficient(v0_id, 2 * row + 1, cosine - 1.0f);
+        A.setCoefficient(u1_id, 2 * row + 1, -sine);
+        A.setCoefficient(v1_id, 2 * row + 1, -cosine);
+        A.setCoefficient(v2_id, 2 * row + 1, 1);
+    }
+
+} // namespace
+
+
+bool nv::computeLeastSquaresConformalMap(HalfEdge::Mesh * mesh)
+{
+    nvDebugCheck(mesh != NULL);
+
+    // For this to work properly, mesh should not have colocals that have the same 
+    // attributes, unless you want the vertices to actually have different texcoords.
+
+    const uint vertexCount = mesh->vertexCount();
+    const uint D = 2 * vertexCount;
+    const uint N = 2 * countMeshTriangles(mesh);
+
+    // N is the number of equations (one per triangle)
+    // D is the number of variables (one per vertex; there are 2 pinned vertices).
+	if (N < D - 4) {
+		return false;
+	}
+
+    SparseMatrix A(D, N);
+    FullVector b(N);
+    FullVector x(D);
+
+    // Fill b:
+    b.fill(0.0f);
+
+    // Fill x:
+    HalfEdge::Vertex * v0;
+    HalfEdge::Vertex * v1;
+    if (!findApproximateDiameterVertices(mesh, &v0, &v1))
+    {
+        // Mesh has no boundaries.
+        return false;
+    }
+    if (v0->tex == v1->tex)
+    {
+        // LSCM expects an existing parameterization.
+        return false;
+    }
+
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        HalfEdge::Vertex * vertex = mesh->vertexAt(v);
+        nvDebugCheck(vertex != NULL);
+
+        // Initial solution.
+        x[2 * v + 0] = vertex->tex.x;
+        x[2 * v + 1] = vertex->tex.y;
+    }
+
+    // Fill A:
+    const uint faceCount = mesh->faceCount();
+    for (uint f = 0, t = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = mesh->faceAt(f);
+        nvDebugCheck(face != NULL);
+        nvDebugCheck(face->edgeCount() == 3);
+
+        const HalfEdge::Vertex * vertex0 = NULL;
+
+        for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            const HalfEdge::Edge * edge = it.current();
+            nvCheck(edge != NULL);
+
+            if (vertex0 == NULL)
+            {
+                vertex0 = edge->vertex;
+            }
+            else if (edge->next->vertex != vertex0)
+            {
+                const HalfEdge::Vertex * vertex1 = edge->from();
+                const HalfEdge::Vertex * vertex2 = edge->to();
+
+                setup_abf_relations(A, t, vertex0, vertex1, vertex2);
+                //setup_conformal_map_relations(A, t, vertex0, vertex1, vertex2);
+
+                t++;
+            }
+        }
+    }
+
+    const uint lockedParameters[] =
+    {
+        2 * v0->id + 0,
+        2 * v0->id + 1,
+        2 * v1->id + 0,
+        2 * v1->id + 1
+    };
+
+    // Solve
+    LeastSquaresSolver(A, b, x, lockedParameters, 4, 0.000001f);
+
+    // Map x back to texcoords:
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        HalfEdge::Vertex * vertex = mesh->vertexAt(v);
+        nvDebugCheck(vertex != NULL);
+
+        vertex->tex = Vector2(x[2 * v + 0], x[2 * v + 1]);
+    }
+
+    return true;
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/param/LeastSquaresConformalMap.h b/thirdparty/thekla_atlas/nvmesh/param/LeastSquaresConformalMap.h
new file mode 100644
index 0000000000..51fbf193c8
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/LeastSquaresConformalMap.h
@@ -0,0 +1,15 @@
+// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
+
+#pragma once
+#ifndef NV_MESH_LEASTSQUARESCONFORMALMAP_H
+#define NV_MESH_LEASTSQUARESCONFORMALMAP_H
+
+namespace nv
+{
+    namespace HalfEdge { class Mesh; }
+
+    bool computeLeastSquaresConformalMap(HalfEdge::Mesh * mesh);
+
+} // nv namespace
+
+#endif // NV_MESH_LEASTSQUARESCONFORMALMAP_H
diff --git a/thirdparty/thekla_atlas/nvmesh/param/OrthogonalProjectionMap.cpp b/thirdparty/thekla_atlas/nvmesh/param/OrthogonalProjectionMap.cpp
new file mode 100644
index 0000000000..d6e5e30561
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/OrthogonalProjectionMap.cpp
@@ -0,0 +1,99 @@
+// This code is in the public domain -- castano@gmail.com
+
+#include "nvmesh.h" // pch
+
+#include "OrthogonalProjectionMap.h"
+
+#include "nvcore/Array.inl"
+
+#include "nvmath/Fitting.h"
+#include "nvmath/Vector.inl"
+#include "nvmath/Box.inl"
+#include "nvmath/Plane.inl"
+
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Vertex.h"
+#include "nvmesh/halfedge/Face.h"
+#include "nvmesh/geometry/Bounds.h"
+
+
+using namespace nv;
+
+bool nv::computeOrthogonalProjectionMap(HalfEdge::Mesh * mesh)
+{
+    Vector3 axis[2];
+
+#if 1
+
+    uint vertexCount = mesh->vertexCount();
+    Array<Vector3> points(vertexCount);
+    points.resize(vertexCount);
+
+    for (uint i = 0; i < vertexCount; i++)
+    {
+        points[i] = mesh->vertexAt(i)->pos;
+    }
+
+#if 0
+    axis[0] = Fit::computePrincipalComponent_EigenSolver(vertexCount, points.buffer());
+    axis[0] = normalize(axis[0]);
+
+    Plane plane = Fit::bestPlane(vertexCount, points.buffer());
+
+    Vector3 n = plane.vector();
+
+    axis[1] = cross(axis[0], n);
+    axis[1] = normalize(axis[1]);
+#else
+    // Avoid redundant computations.
+    float matrix[6];
+    Fit::computeCovariance(vertexCount, points.buffer(), matrix);
+
+    if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) {
+        return false;
+    }
+
+    float eigenValues[3];
+    Vector3 eigenVectors[3];
+    if (!nv::Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) {
+        return false;
+    }
+
+    axis[0] = normalize(eigenVectors[0]);
+    axis[1] = normalize(eigenVectors[1]);
+#endif
+
+
+#else
+
+    // IC: I thought this was generally more robust, but turns out it's not even guaranteed to return a valid projection. Imagine a narrow quad perpendicular to one plane, but rotated so that the shortest axis of 
+    // the bounding box is in the direction of that plane.
+
+    // Use the shortest box axis
+    Box box = MeshBounds::box(mesh);
+    Vector3 dir = box.extents();
+
+    if (fabs(dir.x) <= fabs(dir.y) && fabs(dir.x) <= fabs(dir.z)) {
+        axis[0] = Vector3(0, 1, 0); 
+        axis[1] = Vector3(0, 0, 1);
+    }
+    else if (fabs(dir.y) <= fabs(dir.z)) {
+        axis[0] = Vector3(1, 0, 0); 
+        axis[1] = Vector3(0, 0, 1);
+    }
+    else {
+        axis[0] = Vector3(1, 0, 0); 
+        axis[1] = Vector3(0, 1, 0);
+    }
+#endif
+
+    // Project vertices to plane.
+    for (HalfEdge::Mesh::VertexIterator it(mesh->vertices()); !it.isDone(); it.advance())
+    {
+        HalfEdge::Vertex * vertex = it.current();
+        vertex->tex.x = dot(axis[0], vertex->pos);
+        vertex->tex.y = dot(axis[1], vertex->pos);
+    }
+
+    return true;
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/param/OrthogonalProjectionMap.h b/thirdparty/thekla_atlas/nvmesh/param/OrthogonalProjectionMap.h
new file mode 100644
index 0000000000..54920413d5
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/OrthogonalProjectionMap.h
@@ -0,0 +1,15 @@
+// This code is in the public domain -- castano@gmail.com
+
+#pragma once
+#ifndef NV_MESH_ORTHOGONALPROJECTIONMAP_H
+#define NV_MESH_ORTHOGONALPROJECTIONMAP_H
+
+namespace nv
+{
+    namespace HalfEdge { class Mesh; }
+
+    bool computeOrthogonalProjectionMap(HalfEdge::Mesh * mesh);
+
+} // nv namespace
+
+#endif // NV_MESH_ORTHOGONALPROJECTIONMAP_H
diff --git a/thirdparty/thekla_atlas/nvmesh/param/ParameterizationQuality.cpp b/thirdparty/thekla_atlas/nvmesh/param/ParameterizationQuality.cpp
new file mode 100644
index 0000000000..683ee603cd
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/ParameterizationQuality.cpp
@@ -0,0 +1,323 @@
+// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
+
+#include "nvmesh.h" // pch
+
+#include "ParameterizationQuality.h"
+
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Face.h"
+#include "nvmesh/halfedge/Vertex.h"
+#include "nvmesh/halfedge/Edge.h"
+
+#include "nvmath/Vector.inl"
+
+#include "nvcore/Debug.h"
+
+#include <float.h>
+
+
+using namespace nv;
+
+#if 0
+/*
+float triangleConformalEnergy(Vector3 q[3], Vector2 p[3])
+{
+const Vector3 v1 = q[0];
+const Vector3 v2 = q[1];
+const Vector3 v3 = q[2];
+
+const Vector2 w1 = p[0];
+const Vector2 w2 = p[1];
+const Vector2 w3 = p[2];
+
+float x1 = v2.x() - v1.x();
+float x2 = v3.x() - v1.x();
+float y1 = v2.y() - v1.y();
+float y2 = v3.y() - v1.y();
+float z1 = v2.z() - v1.z();
+float z2 = v3.z() - v1.z();
+
+float s1 = w2.x() - w1.x();
+float s2 = w3.x() - w1.x();
+float t1 = w2.y() - w1.y();
+float t2 = w3.y() - w1.y();
+
+float r = 1.0f / (s1 * t2 - s2 * t1);
+Vector3 sdir((t2 * x1 - t1 * x2) * r, (t2 * y1 - t1 * y2) * r, (t2 * z1 - t1 * z2) * r);
+Vector3 tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r, (s1 * z2 - s2 * z1) * r);
+
+Vector3 N = cross(v3-v1, v2-v1);
+
+// Rotate 90 around N.
+}
+*/
+
+static float triangleConformalEnergy(Vector3 q[3], Vector2 p[3])
+{
+    // Using Denis formulas:
+    Vector3 c0 = q[1] - q[2];
+    Vector3 c1 = q[2] - q[0];
+    Vector3 c2 = q[0] - q[1];
+
+    Vector3 N = cross(-c0, c1);
+    float T = length(N);	// 2T
+    N = normalize(N, 0);
+
+    float cot_alpha0 = dot(-c1, c2) / length(cross(-c1, c2));
+    float cot_alpha1 = dot(-c2, c0) / length(cross(-c2, c0));
+    float cot_alpha2 = dot(-c0, c1) / length(cross(-c0, c1));
+
+    Vector3 t0 = -cot_alpha1 * c1 + cot_alpha2 * c2;
+    Vector3 t1 = -cot_alpha2 * c2 + cot_alpha0 * c0;
+    Vector3 t2 = -cot_alpha0 * c0 + cot_alpha1 * c1;
+
+    nvCheck(equal(length(t0), length(c0)));
+    nvCheck(equal(length(t1), length(c1)));
+    nvCheck(equal(length(t2), length(c2)));
+    nvCheck(equal(dot(t0, c0), 0));
+    nvCheck(equal(dot(t1, c1), 0));
+    nvCheck(equal(dot(t2, c2), 0));
+
+    // Gradients
+    Vector3 grad_u = 1.0f / T * (p[0].x * t0 + p[1].x * t1 + p[2].x * t2);
+    Vector3 grad_v = 1.0f / T * (p[0].y * t0 + p[1].y * t1 + p[2].y * t2);
+
+    // Rotated gradients
+    Vector3 Jgrad_u = 1.0f / T * (p[0].x * c0 + p[1].x * c1 + p[2].x * c2);
+    Vector3 Jgrad_v = 1.0f / T * (p[0].y * c0 + p[1].y * c1 + p[2].y * c2);
+
+    // Using Lengyel's formulas:
+    { 
+        const Vector3 v1 = q[0];
+        const Vector3 v2 = q[1];
+        const Vector3 v3 = q[2];
+
+        const Vector2 w1 = p[0];
+        const Vector2 w2 = p[1];
+        const Vector2 w3 = p[2];
+
+        float x1 = v2.x - v1.x;
+        float x2 = v3.x - v1.x;
+        float y1 = v2.y - v1.y;
+        float y2 = v3.y - v1.y;
+        float z1 = v2.z - v1.z;
+        float z2 = v3.z - v1.z;
+
+        float s1 = w2.x - w1.x;
+        float s2 = w3.x - w1.x;
+        float t1 = w2.y - w1.y;
+        float t2 = w3.y - w1.y;
+
+        float r = 1.0f / (s1 * t2 - s2 * t1);
+        Vector3 sdir((t2 * x1 - t1 * x2) * r, (t2 * y1 - t1 * y2) * r, (t2 * z1 - t1 * z2) * r);
+        Vector3 tdir((s1 * x2 - s2 * x1) * r, (s1 * y2 - s2 * y1) * r, (s1 * z2 - s2 * z1) * r);
+
+        Vector3 Jsdir = cross(N, sdir);
+        Vector3 Jtdir = cross(N, tdir);
+
+        float x = 3;
+    }
+
+    // check: sdir == grad_u
+    // check: tdir == grad_v
+
+    return length(grad_u - Jgrad_v);
+}
+#endif // 0
+
+
+ParameterizationQuality::ParameterizationQuality()
+{
+    m_totalTriangleCount = 0;
+    m_flippedTriangleCount = 0;
+    m_zeroAreaTriangleCount = 0;
+
+    m_parametricArea = 0.0f;
+    m_geometricArea = 0.0f;
+
+    m_stretchMetric = 0.0f;
+    m_maxStretchMetric = 0.0f;
+
+    m_conformalMetric = 0.0f;
+    m_authalicMetric = 0.0f;
+}
+
+ParameterizationQuality::ParameterizationQuality(const HalfEdge::Mesh * mesh)
+{
+    nvDebugCheck(mesh != NULL);
+
+    m_totalTriangleCount = 0;
+    m_flippedTriangleCount = 0;
+    m_zeroAreaTriangleCount = 0;
+
+    m_parametricArea = 0.0f;
+    m_geometricArea = 0.0f;
+
+    m_stretchMetric = 0.0f;
+    m_maxStretchMetric = 0.0f;
+
+    m_conformalMetric = 0.0f;
+    m_authalicMetric = 0.0f;
+
+    const uint faceCount = mesh->faceCount();
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = mesh->faceAt(f);
+        const HalfEdge::Vertex * vertex0 = NULL;
+
+        Vector3 p[3];
+        Vector2 t[3];
+
+        for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+        {
+            const HalfEdge::Edge * edge = it.current();
+
+            if (vertex0 == NULL)
+            {
+                vertex0 = edge->vertex;
+
+                p[0] = vertex0->pos;
+                t[0] = vertex0->tex;
+            }
+            else if (edge->to() != vertex0)
+            {
+                p[1] = edge->from()->pos;
+                p[2] = edge->to()->pos;
+                t[1] = edge->from()->tex;
+                t[2] = edge->to()->tex;
+
+                processTriangle(p, t);
+            }
+        }
+    }
+
+    if (m_flippedTriangleCount + m_zeroAreaTriangleCount == faceCount)
+    {
+        // If all triangles are flipped, then none is.
+        m_flippedTriangleCount = 0;
+    }
+
+    nvDebugCheck(isFinite(m_parametricArea) && m_parametricArea >= 0);
+    nvDebugCheck(isFinite(m_geometricArea) && m_geometricArea >= 0);
+    nvDebugCheck(isFinite(m_stretchMetric));
+    nvDebugCheck(isFinite(m_maxStretchMetric));
+    nvDebugCheck(isFinite(m_conformalMetric));
+    nvDebugCheck(isFinite(m_authalicMetric));
+}
+
+bool ParameterizationQuality::isValid() const
+{
+    return m_flippedTriangleCount == 0; // @@ Does not test for self-overlaps.
+}
+
+float ParameterizationQuality::rmsStretchMetric() const
+{
+    if (m_geometricArea == 0) return 0.0f;
+    float normFactor = sqrtf(m_parametricArea / m_geometricArea);
+    return sqrtf(m_stretchMetric / m_geometricArea) * normFactor;
+}
+
+float ParameterizationQuality::maxStretchMetric() const
+{
+    if (m_geometricArea == 0) return 0.0f;
+    float normFactor = sqrtf(m_parametricArea / m_geometricArea);
+    return m_maxStretchMetric * normFactor;
+}
+
+float ParameterizationQuality::rmsConformalMetric() const
+{
+    if (m_geometricArea == 0) return 0.0f;
+    return sqrtf(m_conformalMetric / m_geometricArea);
+}
+
+float ParameterizationQuality::maxAuthalicMetric() const
+{
+    if (m_geometricArea == 0) return 0.0f;
+    return sqrtf(m_authalicMetric / m_geometricArea);
+}
+
+void ParameterizationQuality::operator += (const ParameterizationQuality & pq)
+{
+    m_totalTriangleCount += pq.m_totalTriangleCount;
+    m_flippedTriangleCount += pq.m_flippedTriangleCount;
+    m_zeroAreaTriangleCount += pq.m_zeroAreaTriangleCount;
+
+    m_parametricArea += pq.m_parametricArea;
+    m_geometricArea += pq.m_geometricArea;
+
+    m_stretchMetric += pq.m_stretchMetric;
+    m_maxStretchMetric = max(m_maxStretchMetric, pq.m_maxStretchMetric);
+
+    m_conformalMetric += pq.m_conformalMetric;
+    m_authalicMetric += pq.m_authalicMetric;
+}
+
+
+void ParameterizationQuality::processTriangle(Vector3 q[3], Vector2 p[3])
+{
+    m_totalTriangleCount++;
+
+    // Evaluate texture stretch metric. See:
+    // - "Texture Mapping Progressive Meshes", Sander, Snyder, Gortler & Hoppe
+    // - "Mesh Parameterization: Theory and Practice", Siggraph'07 Course Notes, Hormann, Levy & Sheffer.
+
+    float t1 = p[0].x;
+    float s1 = p[0].y;
+    float t2 = p[1].x;
+    float s2 = p[1].y;
+    float t3 = p[2].x;
+    float s3 = p[2].y;
+
+    float geometricArea = length(cross(q[1] - q[0], q[2] - q[0])) / 2;
+    float parametricArea = ((s2 - s1)*(t3 - t1) - (s3 - s1)*(t2 - t1)) / 2;
+    
+    if (isZero(parametricArea))
+    {
+        m_zeroAreaTriangleCount++;
+        return;
+    }
+
+    Vector3 Ss = (q[0] * (t2- t3) + q[1] * (t3 - t1) + q[2] * (t1 - t2)) / (2 * parametricArea);
+    Vector3 St = (q[0] * (s3- s2) + q[1] * (s1 - s3) + q[2] * (s2 - s1)) / (2 * parametricArea);
+
+    float a = dot(Ss, Ss); // E
+    float b = dot(Ss, St); // F
+    float c = dot(St, St); // G
+
+    // Compute eigen-values of the first fundamental form:
+    float sigma1 = sqrtf(0.5f * max(0.0f, a + c - sqrtf(square(a - c) + 4 * square(b)))); // gamma uppercase, min eigenvalue.
+    float sigma2 = sqrtf(0.5f * max(0.0f, a + c + sqrtf(square(a - c) + 4 * square(b)))); // gamma lowercase, max eigenvalue.
+    nvCheck(sigma2 >= sigma1);
+
+    // isometric: sigma1 = sigma2 = 1
+    // conformal: sigma1 / sigma2 = 1
+    // authalic: sigma1 * sigma2 = 1
+
+    float rmsStretch = sqrtf((a + c) * 0.5f);
+    float rmsStretch2 = sqrtf((square(sigma1) + square(sigma2)) * 0.5f);
+    nvDebugCheck(equal(rmsStretch, rmsStretch2, 0.01f));
+
+    if (parametricArea < 0.0f)
+    {
+        // Count flipped triangles.
+        m_flippedTriangleCount++;
+
+        parametricArea = fabsf(parametricArea);
+    }
+
+    m_stretchMetric += square(rmsStretch) * geometricArea;
+    m_maxStretchMetric = max(m_maxStretchMetric, sigma2);
+
+    if (!isZero(sigma1, 0.000001f)) {
+        // sigma1 is zero when geometricArea is zero.
+        m_conformalMetric += (sigma2 / sigma1) * geometricArea;
+    }
+    m_authalicMetric += (sigma1 * sigma2) * geometricArea;
+
+    // Accumulate total areas.
+    m_geometricArea += geometricArea;
+    m_parametricArea += parametricArea;
+
+
+    //triangleConformalEnergy(q, p);
+}
diff --git a/thirdparty/thekla_atlas/nvmesh/param/ParameterizationQuality.h b/thirdparty/thekla_atlas/nvmesh/param/ParameterizationQuality.h
new file mode 100644
index 0000000000..342e26b889
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/ParameterizationQuality.h
@@ -0,0 +1,56 @@
+// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
+
+#pragma once
+#ifndef NV_MESH_PARAMETERIZATIONQUALITY_H
+#define NV_MESH_PARAMETERIZATIONQUALITY_H
+
+#include <nvmesh/nvmesh.h>
+
+namespace nv
+{
+    class Vector2;
+    class Vector3;
+
+    namespace HalfEdge { class Mesh; }
+
+    // Estimate quality of existing parameterization.
+    NVMESH_CLASS class ParameterizationQuality
+    {
+    public:
+        ParameterizationQuality();
+        ParameterizationQuality(const HalfEdge::Mesh * mesh);
+
+        bool isValid() const;
+
+        float rmsStretchMetric() const;
+        float maxStretchMetric() const;
+
+        float rmsConformalMetric() const;
+        float maxAuthalicMetric() const;
+
+        void operator += (const ParameterizationQuality & pq);
+
+    private:
+
+        void processTriangle(Vector3 p[3], Vector2 t[3]);
+
+    private:
+
+        uint m_totalTriangleCount;
+        uint m_flippedTriangleCount;
+        uint m_zeroAreaTriangleCount;
+
+        float m_parametricArea;
+        float m_geometricArea;
+
+        float m_stretchMetric;
+        float m_maxStretchMetric;
+
+        float m_conformalMetric;
+        float m_authalicMetric;
+
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_PARAMETERIZATIONQUALITY_H
diff --git a/thirdparty/thekla_atlas/nvmesh/param/SingleFaceMap.cpp b/thirdparty/thekla_atlas/nvmesh/param/SingleFaceMap.cpp
new file mode 100644
index 0000000000..4b205de8bf
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/SingleFaceMap.cpp
@@ -0,0 +1,53 @@
+// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
+
+#include "nvmesh.h" // pch
+
+#include "SingleFaceMap.h"
+
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Vertex.h"
+#include "nvmesh/halfedge/Face.h"
+
+#include "nvmath/Vector.inl"
+
+using namespace nv;
+
+
+
+void nv::computeSingleFaceMap(HalfEdge::Mesh * mesh)
+{
+    nvDebugCheck(mesh != NULL);
+    nvDebugCheck(mesh->faceCount() == 1);
+
+    HalfEdge::Face * face = mesh->faceAt(0);
+    nvCheck(face != NULL);
+
+    Vector3 p0 = face->edge->from()->pos;
+    Vector3 p1 = face->edge->to()->pos;
+
+    Vector3 X = normalizeSafe(p1 - p0, Vector3(0.0f), 0.0f);
+    Vector3 Z = face->normal();
+    Vector3 Y = normalizeSafe(cross(Z, X), Vector3(0.0f), 0.0f);
+
+    uint i = 0;
+    for (HalfEdge::Face::EdgeIterator it(face->edges()); !it.isDone(); it.advance(), i++)
+    {
+        HalfEdge::Vertex * vertex = it.vertex();
+        nvCheck(vertex != NULL);
+
+        if (i == 0)
+        {
+            vertex->tex = Vector2(0);
+        }
+        else
+        {
+            Vector3 pn = vertex->pos;
+
+            float xn = dot((pn - p0), X);
+            float yn = dot((pn - p0), Y);
+
+            vertex->tex = Vector2(xn, yn);
+        }
+    }
+}
+
diff --git a/thirdparty/thekla_atlas/nvmesh/param/SingleFaceMap.h b/thirdparty/thekla_atlas/nvmesh/param/SingleFaceMap.h
new file mode 100644
index 0000000000..b70719f5d8
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/SingleFaceMap.h
@@ -0,0 +1,18 @@
+// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
+
+#pragma once
+#ifndef NV_MESH_SINGLEFACEMAP_H
+#define NV_MESH_SINGLEFACEMAP_H
+
+namespace nv
+{
+    namespace HalfEdge
+    {
+        class Mesh;
+    }
+
+    void computeSingleFaceMap(HalfEdge::Mesh * mesh);
+
+} // nv namespace
+
+#endif // NV_MESH_SINGLEFACEMAP_H
diff --git a/thirdparty/thekla_atlas/nvmesh/param/Util.cpp b/thirdparty/thekla_atlas/nvmesh/param/Util.cpp
new file mode 100644
index 0000000000..fe7b58edf8
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/Util.cpp
@@ -0,0 +1,326 @@
+// This code is in the public domain -- castano@gmail.com
+
+#include "nvmesh.h" // pch
+
+#include "Util.h"
+
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Face.h"
+#include "nvmesh/halfedge/Vertex.h"
+
+#include "nvmath/Vector.inl"
+
+#include "nvcore/Array.inl"
+
+
+using namespace nv;
+
+// Determine if the given mesh is a	quad mesh.
+bool nv::isQuadMesh(const HalfEdge::Mesh * mesh)
+{
+    nvDebugCheck(mesh != NULL);
+
+    const uint faceCount = mesh->faceCount();
+    for(uint i = 0; i < faceCount; i++) {
+        const HalfEdge::Face * face = mesh->faceAt(i);
+        if (face->edgeCount() != 4) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool nv::isTriangularMesh(const HalfEdge::Mesh * mesh)
+{
+    for (HalfEdge::Mesh::ConstFaceIterator it(mesh->faces()); !it.isDone(); it.advance())
+    {
+        const HalfEdge::Face * face = it.current();
+        if (face->edgeCount() != 3) return false;
+    }
+    return true;
+}
+
+
+uint nv::countMeshTriangles(const HalfEdge::Mesh * mesh)
+{
+    const uint faceCount = mesh->faceCount();
+
+    uint triangleCount = 0;
+
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = mesh->faceAt(f);
+        
+        uint edgeCount = face->edgeCount();
+        nvDebugCheck(edgeCount > 2);
+
+        triangleCount += edgeCount - 2;
+    }
+
+    return triangleCount;
+}
+
+const HalfEdge::Vertex * nv::findBoundaryVertex(const HalfEdge::Mesh * mesh)
+{
+    const uint vertexCount = mesh->vertexCount();
+
+    for (uint v = 0; v < vertexCount; v++)
+    {
+        const HalfEdge::Vertex * vertex = mesh->vertexAt(v);
+        if (vertex->isBoundary()) return vertex;
+    }
+
+    return NULL;
+}
+
+
+HalfEdge::Mesh * nv::unifyVertices(const HalfEdge::Mesh * inputMesh)
+{
+    HalfEdge::Mesh * mesh = new HalfEdge::Mesh;
+    
+    // Only add the first colocal.
+    const uint vertexCount = inputMesh->vertexCount();
+    for (uint v = 0; v < vertexCount; v++) {
+        const HalfEdge::Vertex * vertex = inputMesh->vertexAt(v);
+        
+        if (vertex->isFirstColocal()) {
+            mesh->addVertex(vertex->pos);
+        }
+    }
+
+    nv::Array<uint> indexArray;
+
+    // Add new faces pointing to first colocals.
+    uint faceCount = inputMesh->faceCount();
+    for (uint f = 0; f < faceCount; f++) {
+        const HalfEdge::Face * face = inputMesh->faceAt(f);
+
+        indexArray.clear();
+
+        for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance()) {
+            const HalfEdge::Edge * edge = it.current();
+            const HalfEdge::Vertex * vertex = edge->vertex->firstColocal();
+
+            indexArray.append(vertex->id);
+        }
+
+        mesh->addFace(indexArray);
+    }
+
+    mesh->linkBoundary();
+
+    return mesh;
+}
+
+#include "nvmath/Basis.h"
+
+static bool pointInTriangle(const Vector2 & p, const Vector2 & a, const Vector2 & b, const Vector2 & c)
+{
+    return triangleArea(a, b, p) >= 0.00001f && 
+        triangleArea(b, c, p) >= 0.00001f && 
+        triangleArea(c, a, p) >= 0.00001f; 
+}
+
+
+// This is doing a simple ear-clipping algorithm that skips invalid triangles. Ideally, we should
+// also sort the ears by angle, start with the ones that have the smallest angle and proceed in order.
+HalfEdge::Mesh * nv::triangulate(const HalfEdge::Mesh * inputMesh)
+{
+    HalfEdge::Mesh * mesh = new HalfEdge::Mesh;
+    
+    // Add all vertices.
+    const uint vertexCount = inputMesh->vertexCount();
+    for (uint v = 0; v < vertexCount; v++) {
+        const HalfEdge::Vertex * vertex = inputMesh->vertexAt(v);
+        mesh->addVertex(vertex->pos);
+    }
+
+    Array<int> polygonVertices;
+    Array<float> polygonAngles;
+    Array<Vector2> polygonPoints;
+
+    const uint faceCount = inputMesh->faceCount();
+    for (uint f = 0; f < faceCount; f++)
+    {
+        const HalfEdge::Face * face = inputMesh->faceAt(f);
+        nvDebugCheck(face != NULL);
+
+        const uint edgeCount = face->edgeCount();
+        nvDebugCheck(edgeCount >= 3);
+
+        polygonVertices.clear();
+        polygonVertices.reserve(edgeCount);
+
+        if (edgeCount == 3) {
+            // Simple case for triangles.
+            for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+            {
+                const HalfEdge::Edge * edge = it.current();
+                const HalfEdge::Vertex * vertex = edge->vertex;
+                polygonVertices.append(vertex->id);
+            }
+
+            int v0 = polygonVertices[0];
+            int v1 = polygonVertices[1];
+            int v2 = polygonVertices[2];
+
+            mesh->addFace(v0, v1, v2);
+        }
+        else {
+            // Build 2D polygon projecting vertices onto normal plane.
+            // Faces are not necesarily planar, this is for example the case, when the face comes from filling a hole. In such cases
+            // it's much better to use the best fit plane.
+            const Vector3 fn = face->normal();
+
+            Basis basis;
+            basis.buildFrameForDirection(fn);
+
+            polygonPoints.clear();
+            polygonPoints.reserve(edgeCount);
+            polygonAngles.clear();
+            polygonAngles.reserve(edgeCount);
+
+            for (HalfEdge::Face::ConstEdgeIterator it(face->edges()); !it.isDone(); it.advance())
+            {
+                const HalfEdge::Edge * edge = it.current();
+                const HalfEdge::Vertex * vertex = edge->vertex;
+                polygonVertices.append(vertex->id);
+                
+                Vector2 p;
+                p.x = dot(basis.tangent, vertex->pos);
+                p.y = dot(basis.bitangent, vertex->pos);
+
+                polygonPoints.append(p);
+            }
+            polygonAngles.resize(edgeCount);
+
+            while (polygonVertices.size() > 2) {
+                uint size = polygonVertices.size();
+
+                // Update polygon angles. @@ Update only those that have changed.
+                float minAngle = 2 * PI;
+                uint bestEar = 0; // Use first one if none of them is valid.
+                bool bestIsValid = false;
+                for (uint i = 0; i < size; i++) {
+                    uint i0 = i; 
+                    uint i1 = (i+1) % size; // Use Sean's polygon interation trick.
+                    uint i2 = (i+2) % size;
+
+                    Vector2 p0 = polygonPoints[i0];
+                    Vector2 p1 = polygonPoints[i1];
+                    Vector2 p2 = polygonPoints[i2];
+
+                    float d = clamp(dot(p0-p1, p2-p1) / (length(p0-p1) * length(p2-p1)), -1.0f, 1.0f);
+                    float angle = acosf(d);
+                    
+                    float area = triangleArea(p0, p1, p2);
+                    if (area < 0.0f) angle = 2.0f * PI - angle;
+
+                    polygonAngles[i1] = angle;
+
+                    if (angle < minAngle || !bestIsValid) {
+
+                        // Make sure this is a valid ear, if not, skip this point.
+                        bool valid = true;
+                        for (uint j = 0; j < size; j++) {
+                            if (j == i0 || j == i1 || j == i2) continue;
+                            Vector2 p = polygonPoints[j];
+
+                            if (pointInTriangle(p, p0, p1, p2)) {
+                                valid = false;
+                                break;
+                            }
+                        }
+
+                        if (valid || !bestIsValid) {
+                            minAngle = angle;
+                            bestEar = i1;
+                            bestIsValid = valid;
+                        }
+                    }
+                }
+
+                nvDebugCheck(minAngle <= 2 * PI);
+
+                // Clip best ear:
+
+                uint i0 = (bestEar+size-1) % size;
+                uint i1 = (bestEar+0) % size;
+                uint i2 = (bestEar+1) % size;
+
+                int v0 = polygonVertices[i0];
+                int v1 = polygonVertices[i1];
+                int v2 = polygonVertices[i2];
+                
+                mesh->addFace(v0, v1, v2);
+
+                polygonVertices.removeAt(i1);
+                polygonPoints.removeAt(i1);
+                polygonAngles.removeAt(i1);
+            }
+        }
+
+#if 0
+
+        uint i = 0;
+        while (polygonVertices.size() > 2 && i < polygonVertices.size()) {
+            uint size = polygonVertices.size();
+            uint i0 = (i+0) % size;
+            uint i1 = (i+1) % size;
+            uint i2 = (i+2) % size;
+
+            const HalfEdge::Vertex * v0 = polygonVertices[i0];
+            const HalfEdge::Vertex * v1 = polygonVertices[i1];
+            const HalfEdge::Vertex * v2 = polygonVertices[i2];
+
+            const Vector3 p0 = v0->pos;
+            const Vector3 p1 = v1->pos;
+            const Vector3 p2 = v2->pos;
+
+            const Vector3 e0 = p2 - p1;
+            const Vector3 e1 = p0 - p1;
+
+            // If this ear forms a valid triangle, setup relations, remove v1 and repeat.
+            Vector3 n = cross(e0, e1);
+            float len = dot(fn, n); // = sin(angle)
+            
+            float angle = asin(len);
+
+
+            if (len > 0.0f) {
+                mesh->addFace(v0->id(), v1->id(), v2->id());
+                polygonVertices.removeAt(i1);
+                polygonAngles.removeAt(i1);
+                if (i2 > i1) i2--;
+                // @@ Update angles at i0 and i2
+            }
+            else {
+                i++;
+            }
+        }
+
+        // @@ Create a few degenerate triangles to avoid introducing holes.
+        i = 0;
+        const uint size = polygonVertices.size();
+        while (i < size - 2) {
+            uint i0 = (i+0) % size;
+            uint i1 = (i+1) % size;
+            uint i2 = (i+2) % size;
+
+            const HalfEdge::Vertex * v0 = polygonVertices[i0];
+            const HalfEdge::Vertex * v1 = polygonVertices[i1];
+            const HalfEdge::Vertex * v2 = polygonVertices[i2];
+
+            mesh->addFace(v0->id(), v1->id(), v2->id());
+            i++;
+        }
+#endif
+    }
+
+    mesh->linkBoundary();
+
+    return mesh;
+}
+
+
diff --git a/thirdparty/thekla_atlas/nvmesh/param/Util.h b/thirdparty/thekla_atlas/nvmesh/param/Util.h
new file mode 100644
index 0000000000..774563ac0b
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/param/Util.h
@@ -0,0 +1,18 @@
+// This code is in the public domain -- castano@gmail.com
+
+#include "nvmesh/nvmesh.h"
+
+namespace nv {
+
+    namespace HalfEdge { class Mesh; class Vertex; }
+
+    bool isQuadMesh(const HalfEdge::Mesh * mesh);
+    bool isTriangularMesh(const HalfEdge::Mesh * mesh);
+
+    uint countMeshTriangles(const HalfEdge::Mesh * mesh);
+    const HalfEdge::Vertex * findBoundaryVertex(const HalfEdge::Mesh * mesh);
+
+    HalfEdge::Mesh * unifyVertices(const HalfEdge::Mesh * inputMesh);
+    HalfEdge::Mesh * triangulate(const HalfEdge::Mesh * inputMesh);
+
+} // nv namespace
diff --git a/thirdparty/thekla_atlas/nvmesh/raster/ClippedTriangle.h b/thirdparty/thekla_atlas/nvmesh/raster/ClippedTriangle.h
new file mode 100644
index 0000000000..0947d4851c
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/raster/ClippedTriangle.h
@@ -0,0 +1,159 @@
+// Copyright NVIDIA Corporation 2007 -- Denis Kovacs <den.kovacs@gmail.com>
+
+#pragma once
+#ifndef NV_MESH_CLIPPEDTRIANGLE_H
+#define NV_MESH_CLIPPEDTRIANGLE_H
+
+#include <nvmath/Vector.h>
+
+namespace nv
+{
+
+    class ClippedTriangle
+    {
+    public:
+        ClippedTriangle(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c) 
+        {
+            m_numVertices = 3;
+            m_activeVertexBuffer = 0;
+
+            m_verticesA[0]=a;
+            m_verticesA[1]=b;
+            m_verticesA[2]=c;
+
+            m_vertexBuffers[0] = m_verticesA;
+            m_vertexBuffers[1] = m_verticesB;
+        }
+
+        uint vertexCount()
+        {
+            return m_numVertices;
+        }
+
+        const Vector2 * vertices()
+        {
+            return m_vertexBuffers[m_activeVertexBuffer];
+        }
+
+        inline void clipHorizontalPlane(float offset, float clipdirection) 
+        {
+            Vector2 * v  = m_vertexBuffers[m_activeVertexBuffer];
+            m_activeVertexBuffer ^= 1;
+            Vector2 * v2 = m_vertexBuffers[m_activeVertexBuffer];
+
+            v[m_numVertices] = v[0];
+
+            float dy2,   dy1 = offset - v[0].y;
+            int   dy2in, dy1in = clipdirection*dy1 >= 0;
+            uint  p=0;
+
+            for (uint k=0; k<m_numVertices; k++)
+            {
+                dy2   = offset - v[k+1].y;
+                dy2in = clipdirection*dy2 >= 0;
+
+                if (dy1in) v2[p++] = v[k];
+
+                if ( dy1in + dy2in == 1 ) // not both in/out
+                {
+                    float dx = v[k+1].x - v[k].x;
+                    float dy = v[k+1].y - v[k].y;
+                    v2[p++] = Vector2(v[k].x + dy1*(dx/dy), offset);
+                }
+
+                dy1 = dy2; dy1in = dy2in;
+            }
+            m_numVertices = p;
+
+            //for (uint k=0; k<m_numVertices; k++) printf("(%f, %f)\n", v2[k].x, v2[k].y); printf("\n");
+        }
+
+        inline void clipVerticalPlane(float offset, float clipdirection ) 
+        {
+            Vector2 * v  = m_vertexBuffers[m_activeVertexBuffer];
+            m_activeVertexBuffer ^= 1;
+            Vector2 * v2 = m_vertexBuffers[m_activeVertexBuffer];
+
+            v[m_numVertices] = v[0];
+
+            float dx2,   dx1   = offset - v[0].x;
+            int   dx2in, dx1in = clipdirection*dx1 >= 0;
+            uint  p=0;
+
+            for (uint k=0; k<m_numVertices; k++)
+            {
+                dx2 = offset - v[k+1].x;
+                dx2in = clipdirection*dx2 >= 0;
+
+                if (dx1in) v2[p++] = v[k];
+
+                if ( dx1in + dx2in == 1 ) // not both in/out
+                {
+                    float dx = v[k+1].x - v[k].x;
+                    float dy = v[k+1].y - v[k].y;
+                    v2[p++] = Vector2(offset, v[k].y + dx1*(dy/dx));
+                }
+
+                dx1 = dx2; dx1in = dx2in;
+            }
+            m_numVertices = p;
+
+            //for (uint k=0; k<m_numVertices; k++) printf("(%f, %f)\n", v2[k].x, v2[k].y); printf("\n");
+        }
+
+        void computeAreaCentroid()
+        {
+            Vector2 * v  = m_vertexBuffers[m_activeVertexBuffer];
+            v[m_numVertices] = v[0];
+
+            m_area = 0;
+            float centroidx=0, centroidy=0;
+            for (uint k=0; k<m_numVertices; k++)
+            {
+                // http://local.wasp.uwa.edu.au/~pbourke/geometry/polyarea/
+                float f = v[k].x*v[k+1].y - v[k+1].x*v[k].y;
+                m_area += f;
+                centroidx += f * (v[k].x + v[k+1].x);
+                centroidy += f * (v[k].y + v[k+1].y);
+            }
+            m_area = 0.5f * fabs(m_area);
+            if (m_area==0) {
+                m_centroid = Vector2(0.0f);
+            } else {
+                m_centroid = Vector2(centroidx/(6*m_area), centroidy/(6*m_area));
+            }
+        }
+
+        void clipAABox(float x0, float y0, float x1, float y1)
+        {
+            clipVerticalPlane  ( x0, -1);
+            clipHorizontalPlane( y0, -1);
+            clipVerticalPlane  ( x1,  1);
+            clipHorizontalPlane( y1,  1);
+
+            computeAreaCentroid();
+        }
+
+        Vector2 centroid()
+        {
+            return m_centroid;
+        }
+
+        float area()
+        {
+            return m_area;
+        }
+
+    private:
+        Vector2 m_verticesA[7+1];
+        Vector2 m_verticesB[7+1];
+        Vector2 * m_vertexBuffers[2];
+        uint    m_numVertices;
+        uint    m_activeVertexBuffer;
+        float   m_area;
+        Vector2 m_centroid;
+    };
+
+} // nv namespace
+
+#endif // NV_MESH_CLIPPEDTRIANGLE_H
diff --git a/thirdparty/thekla_atlas/nvmesh/raster/Raster.cpp b/thirdparty/thekla_atlas/nvmesh/raster/Raster.cpp
new file mode 100644
index 0000000000..d46b34f045
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/raster/Raster.cpp
@@ -0,0 +1,626 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+/** @file Raster.cpp
+ * @brief Triangle rasterization library using affine interpolation. Not
+ * specially optimized, but enough for my purposes.
+**/
+
+#include "nvmesh.h" // pch
+
+#include "Raster.h"
+#include "ClippedTriangle.h"
+
+#include "nvcore/Utils.h" // min, max
+
+#include "nvmath/Vector.inl"
+#include "nvmath/ftoi.h"
+
+
+#define RA_EPSILON		0.00001f
+
+using namespace nv;
+using namespace nv::Raster;
+
+namespace
+{
+    static inline float delta(float bot, float top, float ih)
+    {
+        return (bot - top) * ih;
+    }
+
+    static inline Vector2 delta(Vector2::Arg bot, Vector2::Arg top, float ih)
+    {
+        return (bot - top) * ih;
+    }
+
+    static inline Vector3 delta(Vector3::Arg bot, Vector3::Arg top, float ih)
+    {
+        return (bot - top) * ih;
+    }
+
+    // @@ The implementation in nvmath.h should be equivalent.
+    static inline int iround(float f)
+    {
+        // @@ Optimize this.
+        return int(floorf(f+0.5f));
+        //return int(round(f));
+        //return int(f);
+    }
+
+    /// A triangle vertex. 
+    struct Vertex
+    {
+        Vector2 pos;	// Position.
+        Vector3 tex;	// Texcoord. (Barycentric coordinate)
+    };
+
+
+    /// A triangle for rasterization.
+    struct Triangle
+    {
+        Triangle(Vector2::Arg v0, Vector2::Arg v1, Vector2::Arg v2, Vector3::Arg t0, Vector3::Arg t1, Vector3::Arg t2);
+
+        bool computeDeltas();
+
+        bool draw(const Vector2 & extents, bool enableScissors, SamplingCallback cb, void * param);
+        bool drawAA(const Vector2 & extents, bool enableScissors, SamplingCallback cb, void * param);
+        bool drawC(const Vector2 & extents, bool enableScissors, SamplingCallback cb, void * param);
+        void flipBackface();
+        void computeUnitInwardNormals();
+
+        // Vertices.	
+        Vector2 v1, v2, v3;
+        Vector2 n1, n2, n3; // unit inward normals
+        Vector3 t1, t2, t3;
+
+        // Deltas.
+        Vector3 dx, dy;
+
+        float sign;
+        bool valid;
+    };
+
+
+    /// Triangle ctor.
+    Triangle::Triangle(Vector2::Arg v0, Vector2::Arg v1, Vector2::Arg v2, 
+        Vector3::Arg t0, Vector3::Arg t1, Vector3::Arg t2)
+    {
+        // Init vertices.
+        this->v1 = v0;
+        this->v2 = v2;
+        this->v3 = v1;
+
+        // Set barycentric coordinates.
+        this->t1 = t0;
+        this->t2 = t2;
+        this->t3 = t1;
+
+        // make sure every triangle is front facing.
+        flipBackface();
+
+        // Compute deltas.
+        valid = computeDeltas();
+
+        computeUnitInwardNormals();
+    }
+
+
+    /// Compute texture space deltas.
+    /// This method takes two edge vectors that form a basis, determines the 
+    /// coordinates of the canonic vectors in that basis, and computes the 
+    /// texture gradient that corresponds to those vectors.
+    bool Triangle::computeDeltas()
+    {
+        Vector2 e0 = v3 - v1;
+        Vector2 e1 = v2 - v1;
+
+        Vector3 de0 = t3 - t1;
+        Vector3 de1 = t2 - t1;
+
+        float denom = 1.0f / (e0.y * e1.x - e1.y * e0.x);
+        if (!isFinite(denom)) {
+            return false;
+        }
+
+        float lambda1 = - e1.y * denom;
+        float lambda2 = e0.y * denom;
+        float lambda3 = e1.x * denom;
+        float lambda4 = - e0.x * denom;
+
+        dx = de0 * lambda1 + de1 * lambda2;
+        dy = de0 * lambda3 + de1 * lambda4;
+
+        return true;
+    }
+
+    // compute unit inward normals for each edge.
+    void Triangle::computeUnitInwardNormals()
+    {
+        n1 = v1 - v2; n1 = Vector2(-n1.y, n1.x); n1 = n1 * (1.0f/sqrtf(n1.x*n1.x + n1.y*n1.y));
+        n2 = v2 - v3; n2 = Vector2(-n2.y, n2.x); n2 = n2 * (1.0f/sqrtf(n2.x*n2.x + n2.y*n2.y));
+        n3 = v3 - v1; n3 = Vector2(-n3.y, n3.x); n3 = n3 * (1.0f/sqrtf(n3.x*n3.x + n3.y*n3.y));
+    }
+
+    void Triangle::flipBackface()
+    {
+        // check if triangle is backfacing, if so, swap two vertices
+        if ( ((v3.x-v1.x)*(v2.y-v1.y) - (v3.y-v1.y)*(v2.x-v1.x)) < 0 ) {
+            Vector2 hv=v1; v1=v2; v2=hv; // swap pos
+            Vector3 ht=t1; t1=t2; t2=ht; // swap tex
+        }
+    }
+
+    bool Triangle::draw(const Vector2 & extents, bool enableScissors, SamplingCallback cb, void * param)
+    {
+        // 28.4 fixed-point coordinates
+        const int Y1 = iround(16.0f * v1.y);
+        const int Y2 = iround(16.0f * v2.y);
+        const int Y3 = iround(16.0f * v3.y);
+
+        const int X1 = iround(16.0f * v1.x);
+        const int X2 = iround(16.0f * v2.x);
+        const int X3 = iround(16.0f * v3.x);
+
+        // Deltas
+        const int DX12 = X1 - X2;
+        const int DX23 = X2 - X3;
+        const int DX31 = X3 - X1;
+
+        const int DY12 = Y1 - Y2;
+        const int DY23 = Y2 - Y3;
+        const int DY31 = Y3 - Y1;
+
+        // Fixed-point deltas
+        const int FDX12 = DX12 << 4;
+        const int FDX23 = DX23 << 4;
+        const int FDX31 = DX31 << 4;
+
+        const int FDY12 = DY12 << 4;
+        const int FDY23 = DY23 << 4;
+        const int FDY31 = DY31 << 4;
+
+        int minx, miny, maxx, maxy;
+        if (enableScissors) {
+            int frustumX0 =  0 << 4;
+            int frustumY0 =  0 << 4;
+            int frustumX1 =  (int)extents.x << 4;
+            int frustumY1 =  (int)extents.y << 4;
+
+            // Bounding rectangle
+            minx = (nv::max(min3(X1, X2, X3), frustumX0) + 0xF) >> 4;
+            miny = (nv::max(min3(Y1, Y2, Y3), frustumY0) + 0xF) >> 4;
+            maxx = (nv::min(max3(X1, X2, X3), frustumX1) + 0xF) >> 4;
+            maxy = (nv::min(max3(Y1, Y2, Y3), frustumY1) + 0xF) >> 4;
+        }
+        else {
+            // Bounding rectangle
+            minx = (min3(X1, X2, X3) + 0xF) >> 4;
+            miny = (min3(Y1, Y2, Y3) + 0xF) >> 4;
+            maxx = (max3(X1, X2, X3) + 0xF) >> 4;
+            maxy = (max3(Y1, Y2, Y3) + 0xF) >> 4;
+        }
+
+        // Block size, standard 8x8 (must be power of two)
+        const int q = 8;
+
+        // @@ This won't work when minx,miny are negative. This code path is not used. Leaving as is for now.
+        nvCheck(minx >= 0);
+        nvCheck(miny >= 0);
+
+        // Start in corner of 8x8 block
+        minx &= ~(q - 1);
+        miny &= ~(q - 1);
+
+        // Half-edge constants
+        int C1 = DY12 * X1 - DX12 * Y1;
+        int C2 = DY23 * X2 - DX23 * Y2;
+        int C3 = DY31 * X3 - DX31 * Y3;
+
+        // Correct for fill convention
+        if(DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++;
+        if(DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++;
+        if(DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
+
+        // Loop through blocks
+        for(int y = miny; y < maxy; y += q)
+        {
+            for(int x = minx; x < maxx; x += q)
+            {
+                // Corners of block
+                int x0 = x << 4;
+                int x1 = (x + q - 1) << 4;
+                int y0 = y << 4;
+                int y1 = (y + q - 1) << 4;
+
+                // Evaluate half-space functions
+                bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0;
+                bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0;
+                bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0;
+                bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0;
+                int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3);
+
+                bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0;
+                bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0;
+                bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0;
+                bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0;
+                int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3);
+
+                bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0;
+                bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0;
+                bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0;
+                bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0;
+                int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3);
+
+                // Skip block when outside an edge
+                if(a == 0x0 || b == 0x0 || c == 0x0) continue;		
+
+                // Accept whole block when totally covered
+                if(a == 0xF && b == 0xF && c == 0xF)
+                {
+                    Vector3 texRow = t1 + dy*(y0 - v1.y) + dx*(x0 - v1.x);
+
+                    for(int iy = y; iy < y + q; iy++)
+                    {
+                        Vector3 tex = texRow;
+                        for(int ix = x; ix < x + q; ix++)
+                        {
+                            //Vector3 tex = t1 + dx * (ix - v1.x) + dy * (iy - v1.y);
+                            if (!cb(param, ix, iy, tex, dx, dy, 1.0)) {
+                                // early out.
+                                return false;
+                            }
+                            tex += dx;
+                        }
+                        texRow += dy;
+                    }
+                }
+                else // Partially covered block
+                {
+                    int CY1 = C1 + DX12 * y0 - DY12 * x0;
+                    int CY2 = C2 + DX23 * y0 - DY23 * x0;
+                    int CY3 = C3 + DX31 * y0 - DY31 * x0;
+                    Vector3 texRow = t1 + dy*(y0 - v1.y) + dx*(x0 - v1.x);
+
+                    for(int iy = y; iy < y + q; iy++)
+                    {
+                        int CX1 = CY1;
+                        int CX2 = CY2;
+                        int CX3 = CY3;
+                        Vector3 tex = texRow;
+
+                        for(int ix = x; ix < x + q; ix++)
+                        {
+                            if(CX1 > 0 && CX2 > 0 && CX3 > 0)
+                            {
+                                if (!cb(param, ix, iy, tex, dx, dy, 1.0))
+                                {
+                                    // early out.
+                                    return false;
+                                }
+                            }
+
+                            CX1 -= FDY12;
+                            CX2 -= FDY23;
+                            CX3 -= FDY31;
+                            tex += dx;
+                        }
+
+                        CY1 += FDX12;
+                        CY2 += FDX23;
+                        CY3 += FDX31;
+                        texRow += dy;
+                    }
+                }
+            }
+        }
+
+        return true;
+    }
+
+
+#define PX_INSIDE    1.0f/sqrt(2.0f)
+#define PX_OUTSIDE  -1.0f/sqrt(2.0f)
+
+#define BK_SIZE 8
+#define BK_INSIDE   sqrt(BK_SIZE*BK_SIZE/2.0f)
+#define BK_OUTSIDE -sqrt(BK_SIZE*BK_SIZE/2.0f)
+
+    // extents has to be multiple of BK_SIZE!!
+    bool Triangle::drawAA(const Vector2 & extents, bool enableScissors, SamplingCallback cb, void * param)
+    {
+        float minx, miny, maxx, maxy;
+        if (enableScissors) {
+            // Bounding rectangle
+            minx = floorf(max(min3(v1.x, v2.x, v3.x), 0.0f));
+            miny = floorf(max(min3(v1.y, v2.y, v3.y), 0.0f));
+            maxx = ceilf( min(max3(v1.x, v2.x, v3.x), extents.x-1.0f));
+            maxy = ceilf( min(max3(v1.y, v2.y, v3.y), extents.y-1.0f));
+        }
+        else {
+            // Bounding rectangle
+            minx = floorf(min3(v1.x, v2.x, v3.x));
+            miny = floorf(min3(v1.y, v2.y, v3.y));
+            maxx = ceilf( max3(v1.x, v2.x, v3.x));
+            maxy = ceilf( max3(v1.y, v2.y, v3.y));
+        }
+
+        // There's no reason to align the blocks to the viewport, instead we align them to the origin of the triangle bounds.
+        minx = floorf(minx);
+        miny = floorf(miny);
+        //minx = (float)(((int)minx) & (~((int)BK_SIZE - 1))); // align to blocksize (we don't need to worry about blocks partially out of viewport)
+        //miny = (float)(((int)miny) & (~((int)BK_SIZE - 1)));
+
+        minx += 0.5; miny +=0.5;  // sampling at texel centers!
+        maxx += 0.5; maxy +=0.5; 
+
+        // Half-edge constants
+        float C1 = n1.x * (-v1.x) + n1.y * (-v1.y);
+        float C2 = n2.x * (-v2.x) + n2.y * (-v2.y);
+        float C3 = n3.x * (-v3.x) + n3.y * (-v3.y);
+
+        // Loop through blocks
+        for(float y0 = miny; y0 <= maxy; y0 += BK_SIZE)
+        {
+            for(float x0 = minx; x0 <= maxx; x0 += BK_SIZE)
+            {
+                // Corners of block
+                float xc = (x0 + (BK_SIZE-1)/2.0f);
+                float yc = (y0 + (BK_SIZE-1)/2.0f);
+
+                // Evaluate half-space functions
+                float aC = C1 + n1.x * xc + n1.y * yc;
+                float bC = C2 + n2.x * xc + n2.y * yc;
+                float cC = C3 + n3.x * xc + n3.y * yc;
+
+                // Skip block when outside an edge
+                if( (aC <= BK_OUTSIDE) || (bC <= BK_OUTSIDE) || (cC <= BK_OUTSIDE) ) continue;
+
+                // Accept whole block when totally covered
+                if( (aC >= BK_INSIDE) && (bC >= BK_INSIDE) && (cC >= BK_INSIDE) )
+                {
+                    Vector3 texRow = t1 + dy*(y0 - v1.y) + dx*(x0 - v1.x);
+
+                    for (float y = y0; y < y0 + BK_SIZE; y++)
+                    {
+                        Vector3 tex = texRow;
+                        for(float x = x0; x < x0 + BK_SIZE; x++)
+                        {
+                            if (!cb(param, (int)x, (int)y, tex, dx, dy, 1.0f))
+                            {
+                                return false;
+                            }
+                            tex += dx;
+                        }
+                        texRow += dy;
+                    }
+                }
+                else // Partially covered block
+                {
+                    float CY1 = C1 + n1.x * x0 + n1.y * y0;
+                    float CY2 = C2 + n2.x * x0 + n2.y * y0;
+                    float CY3 = C3 + n3.x * x0 + n3.y * y0;
+                    Vector3 texRow = t1 + dy*(y0 - v1.y) + dx*(x0 - v1.x);	                  	
+
+                    for(float y = y0; y < y0 + BK_SIZE; y++) // @@ This is not clipping to scissor rectangle correctly.
+                    {
+                        float CX1 = CY1;
+                        float CX2 = CY2;
+                        float CX3 = CY3;
+                        Vector3 tex = texRow;
+
+                        for (float x = x0; x < x0 + BK_SIZE; x++)   // @@ This is not clipping to scissor rectangle correctly.
+                        {
+                            if (CX1 >= PX_INSIDE && CX2 >= PX_INSIDE && CX3 >= PX_INSIDE) 
+                            {
+                                // pixel completely covered
+                                Vector3 tex = t1 + dx * (x - v1.x) + dy * (y - v1.y);
+                                if (!cb(param, (int)x, (int)y, tex, dx, dy, 1.0f))
+                                {
+                                    return false;
+                                }
+                            }
+                            else if ((CX1 >= PX_OUTSIDE) && (CX2 >= PX_OUTSIDE) && (CX3 >= PX_OUTSIDE))
+                            {
+                                // triangle partially covers pixel. do clipping.
+                                ClippedTriangle ct(v1-Vector2(x,y), v2-Vector2(x,y), v3-Vector2(x,y));
+                                ct.clipAABox(-0.5, -0.5, 0.5, 0.5);
+                                Vector2 centroid = ct.centroid();
+                                float area = ct.area();
+                                if (area > 0.0f)
+                                {
+                                    Vector3 texCent = tex - dx*centroid.x - dy*centroid.y;
+                                    //nvCheck(texCent.x >= -0.1f && texCent.x <= 1.1f); // @@ Centroid is not very exact...
+                                    //nvCheck(texCent.y >= -0.1f && texCent.y <= 1.1f);
+                                    //nvCheck(texCent.z >= -0.1f && texCent.z <= 1.1f);
+                                    //Vector3 texCent2 = t1 + dx * (x - v1.x) + dy * (y - v1.y);
+                                    if (!cb(param, (int)x, (int)y, texCent, dx, dy, area))
+                                    {
+                                        return false;
+                                    }
+                                }
+                            }
+
+                            CX1 += n1.x;
+                            CX2 += n2.x;
+                            CX3 += n3.x;
+                            tex += dx;
+                        }
+
+                        CY1 += n1.y;
+                        CY2 += n2.y;
+                        CY3 += n3.y;
+                        texRow += dy;
+                    }
+                }
+            }
+        }
+
+        return true;
+    }
+
+} // namespace
+
+
+/// Process the given triangle.
+bool nv::Raster::drawTriangle(Mode mode, Vector2::Arg extents, bool enableScissors, const Vector2 v[3], SamplingCallback cb, void * param)
+{
+    Triangle tri(v[0], v[1], v[2], Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(0, 0, 1));
+    
+    // @@ It would be nice to have a conservative drawing mode that enlarges the triangle extents by one texel and is able to handle degenerate triangles.
+    // @@ Maybe the simplest thing to do would be raster triangle edges.
+
+    if (tri.valid) {
+        if (mode == Mode_Antialiased) {
+            return tri.drawAA(extents, enableScissors, cb, param);
+        } 
+        if (mode == Mode_Nearest) {
+            return tri.draw(extents, enableScissors, cb, param);
+        }
+    }
+
+    return true;
+}
+
+inline static float triangleArea(Vector2::Arg v1, Vector2::Arg v2, Vector2::Arg v3)
+{
+    return 0.5f * (v3.x * v1.y + v1.x * v2.y + v2.x * v3.y - v2.x * v1.y - v3.x * v2.y - v1.x * v3.y);
+}
+
+/// Process the given quad.
+bool nv::Raster::drawQuad(Mode mode, Vector2::Arg extents, bool enableScissors, const Vector2 v[4], SamplingCallback cb, void * param)
+{
+    bool sign0 = triangleArea(v[0], v[1], v[2]) > 0.0f;
+    bool sign1 = triangleArea(v[0], v[2], v[3]) > 0.0f;
+
+    // Divide the quad into two non overlapping triangles.
+    if (sign0 == sign1) {
+        Triangle tri0(v[0], v[1], v[2], Vector3(0,0,0), Vector3(1,0,0), Vector3(1,1,0));
+        Triangle tri1(v[0], v[2], v[3], Vector3(0,0,0), Vector3(1,1,0), Vector3(0,1,0));
+
+        if (tri0.valid && tri1.valid) {
+            if (mode == Mode_Antialiased) {
+                return tri0.drawAA(extents, enableScissors, cb, param) && tri1.drawAA(extents, enableScissors, cb, param);
+            } else {
+                return tri0.draw(extents, enableScissors, cb, param) && tri1.draw(extents, enableScissors, cb, param);
+            }
+        }
+    }
+    else
+    {
+        Triangle tri0(v[0], v[1], v[3], Vector3(0,0,0), Vector3(1,0,0), Vector3(0,1,0));
+        Triangle tri1(v[1], v[2], v[3], Vector3(1,0,0), Vector3(1,1,0), Vector3(0,1,0));
+
+        if (tri0.valid && tri1.valid) {
+            if (mode == Mode_Antialiased) {
+                return tri0.drawAA(extents, enableScissors, cb, param) && tri1.drawAA(extents, enableScissors, cb, param);
+            } else {
+                return tri0.draw(extents, enableScissors, cb, param) && tri1.draw(extents, enableScissors, cb, param);
+            }
+        }
+    }
+
+    return true;
+}
+
+
+static bool drawPoint(const Vector2 & p, const Vector2 v[2], LineSamplingCallback cb, void * param) {
+
+    int x = ftoi_round(p.x);
+    int y = ftoi_round(p.y);
+    Vector2 ip = Vector2(float(x) + 0.5f, float(y) + 0.5f);
+
+    float t;
+
+    // Return minimum distance between line segment vw and point p
+    Vector2 dv = v[1] - v[0];
+    const float l2 = nv::lengthSquared(dv);  // i.e. |w-v|^2 -  avoid a sqrt
+    if (l2 == 0.0) {
+        t = 0;                  // v0 == v1 case
+    }
+    else {
+        // Consider the line extending the segment, parameterized as v + t (w - v).
+        // We find projection of point p onto the line. 
+        // It falls where t = [(p-v) . (w-v)] / |w-v|^2
+        t = dot(ip - v[0], dv) / l2;
+        if (t < 0.0) {
+            t = 0;                      // Beyond the 'v0' end of the segment
+        }
+        else if (t > 1.0) {
+            t = 1;                      // Beyond the 'v1' end of the segment
+        }
+    }
+    
+    Vector2 projection = v[0] + t * dv; // Projection falls on the segment
+
+    float d = distance(ip, projection);
+
+    return cb(param, x, y, t, saturate(1-d));
+}
+
+
+void nv::Raster::drawLine(bool antialias, Vector2::Arg extents, bool enableScissors, const Vector2 v[2], LineSamplingCallback cb, void * param)
+{
+    nvCheck(antialias == true);         // @@ Not implemented.
+    //nvCheck(enableScissors == false); // @@ Not implemented.
+
+    // Very crappy DDA implementation.
+
+    Vector2 p = v[0];
+    Vector2 dp, dpdy;
+
+    float dx = v[1].x - v[0].x;
+    float dy = v[1].y - v[0].y;
+    int n;
+
+    // Degenerate line.
+    if (dx == 0 && dy == 0) return;
+
+    if (fabsf(dx) >= fabsf(dy)) {
+        n = iround(fabsf(dx));
+        dp.x = dx / fabsf(dx);
+        dp.y = dy / fabsf(dx);
+        nvDebugCheck(fabsf(dp.y) <= 1.0f);
+        dpdy.x = 0;
+        dpdy.y = 1;
+    }
+    else {
+        n = iround(fabs(dy));
+        dp.x = dx / fabsf(dy);
+        dp.y = dy / fabsf(dy);
+        nvDebugCheck(fabsf(dp.x) <= 1.0f);
+        dpdy.x = 1;
+        dpdy.y = 0;
+    }
+
+    for (int i = 0; i <= n; i++) {
+        drawPoint(p, v, cb, param);
+        drawPoint(p + dpdy, v, cb, param);
+        drawPoint(p - dpdy, v, cb, param);
+        p += dp;
+    }
+}
+
+
+// Draw vertical or horizontal segments. For degenerate triangles.
+/*bool nv::Raster::drawSegment(Vector2::Arg extents, bool enableScissors, const Vector2 v[2], LineSamplingCallback cb, void * param)
+{
+    nvCheck(enableScissors == false);
+
+    
+    if (v[0].x == v[1].x) {         // Vertical segment.
+        
+    }
+    else if (v[0].y == v[1].y) {    // Horizontal segment.
+        int y = ftoi_round(v[0].y);
+        int x0 = ftoi_floor(v[0].x);
+        int x1 = ftoi_floor(v[0].x);
+
+        for (int x = x0; x <= x1; x++) {
+
+            cb(param, x, y, t, 
+        }
+    }
+
+    return false; // Not a valid segment.
+}
+*/
diff --git a/thirdparty/thekla_atlas/nvmesh/raster/Raster.h b/thirdparty/thekla_atlas/nvmesh/raster/Raster.h
new file mode 100644
index 0000000000..05af2ddb00
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/raster/Raster.h
@@ -0,0 +1,49 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#pragma once
+#ifndef NV_MESH_RASTER_H
+#define NV_MESH_RASTER_H
+
+/** @file Raster.h
+ * @brief Rasterization library.
+ *
+ * This is just a standard scanline rasterizer that I took from one of my old
+ * projects. The perspective correction wasn't necessary so I just removed it.
+**/
+
+#include "nvmath/Vector.h"
+#include "nvmesh/nvmesh.h"
+
+namespace nv
+{
+
+    namespace Raster 
+    {
+        enum Mode {
+            Mode_Nearest,
+            Mode_Antialiased,
+            //Mode_Conservative
+        };
+
+
+        /// A callback to sample the environment. Return false to terminate rasterization.
+        typedef bool (NV_CDECL * SamplingCallback)(void * param, int x, int y, Vector3::Arg bar, Vector3::Arg dx, Vector3::Arg dy, float coverage);
+
+        // Process the given triangle. Returns false if rasterization was interrupted by the callback.
+        NVMESH_API bool drawTriangle(Mode mode, Vector2::Arg extents, bool enableScissors, const Vector2 v[3], SamplingCallback cb, void * param);
+
+        // Process the given quad. Returns false if rasterization was interrupted by the callback.
+        NVMESH_API bool drawQuad(Mode mode, Vector2::Arg extents, bool enableScissors, const Vector2 v[4], SamplingCallback cb, void * param);
+
+        typedef bool (NV_CDECL * LineSamplingCallback)(void * param, int x, int y, float t, float d);    // t is the position along the segment, d is the distance to the line.
+
+        // Process the given line.
+        NVMESH_API void drawLine(bool antialias, Vector2::Arg extents, bool enableScissors, const Vector2 v[2], LineSamplingCallback cb, void * param);
+
+        // Draw vertical or horizontal segments. For degenerate triangles.
+        //NVMESH_API void drawSegment(Vector2::Arg extents, bool enableScissors, const Vector2 v[2], SamplingCallback cb, void * param);
+    }
+}
+
+
+#endif // NV_MESH_RASTER_H
diff --git a/thirdparty/thekla_atlas/nvmesh/weld/Snap.cpp b/thirdparty/thekla_atlas/nvmesh/weld/Snap.cpp
new file mode 100644
index 0000000000..b6bff4d83d
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/weld/Snap.cpp
@@ -0,0 +1,100 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include <nvcore/RadixSort.h>
+
+#include <nvmesh/weld/Snap.h>
+#include <nvmesh/TriMesh.h>
+#include <nvmesh/geometry/Bounds.h>
+
+using namespace nv;
+
+namespace {
+	
+	// Snap the given vertices.
+	void Snap(TriMesh::Vertex & a, TriMesh::Vertex & b, float texThreshold, float norThreshold)
+	{
+		a.pos = b.pos = (a.pos + b.pos) * 0.5f;
+		
+		if (equal(a.tex.x, b.tex.x, texThreshold) && equal(a.tex.y, b.tex.y, texThreshold)) {
+			b.tex = a.tex = (a.tex + b.tex) * 0.5f;
+		}
+		
+		if (equal(a.nor.x, b.nor.x, norThreshold) && equal(a.nor.y, b.nor.y, norThreshold) && equal(a.nor.z, b.nor.z, norThreshold)) {
+			b.nor = a.nor = (a.nor + b.nor) * 0.5f;
+		}
+	};
+
+} // nv namespace
+
+uint nv::SnapVertices(TriMesh * mesh, float posThreshold, float texThreshold, float norThreshold)
+{
+	nvDebug("--- Snapping vertices.\n");
+	
+	// Determine largest axis.
+	Box box = MeshBounds::box(mesh);
+	Vector3 extents = box.extents();
+
+	int axis = 2;
+	if( extents.x > extents.y ) {
+		if( extents.x > extents.z ) {
+			axis = 0;
+		}
+	}
+	else if(extents.y > extents.z) {
+		axis = 1;
+	}
+	
+	// @@ Use diagonal instead!
+	
+
+	// Sort vertices according to the largest axis.
+	const uint vertexCount = mesh->vertexCount();
+	nvCheck(vertexCount > 2); // Must have at least two vertices.
+
+	// Get pos channel.
+	//PiMesh::Channel * pos_channel = mesh->GetChannel(mesh->FindChannel(VS_POS));
+	//nvCheck( pos_channel != NULL );
+
+	//const PiArray<Vec4> & pos_array = pos_channel->data;
+
+	Array<float> distArray;
+	distArray.resize(vertexCount);
+
+	for(uint v = 0; v < vertexCount; v++) {
+		if (axis == 0) distArray[v] = mesh->vertexAt(v).pos.x;
+		else if (axis == 1) distArray[v] = mesh->vertexAt(v).pos.y;
+		else distArray[v] = mesh->vertexAt(v).pos.z;
+	}
+
+	RadixSort radix;
+	const uint * xrefs = radix.sort(distArray.buffer(), distArray.count()).ranks();
+	nvCheck(xrefs != NULL);
+
+	uint snapCount = 0;
+	for(uint v = 0; v < vertexCount-1; v++) {
+		for(uint n = v+1; n < vertexCount; n++) {
+			nvDebugCheck( distArray[xrefs[v]] <= distArray[xrefs[n]] );
+			
+			if (fabs(distArray[xrefs[n]] - distArray[xrefs[v]]) > posThreshold) {
+				break;
+			}
+			
+			TriMesh::Vertex & v0 = mesh->vertexAt(xrefs[v]);
+			TriMesh::Vertex & v1 = mesh->vertexAt(xrefs[n]);
+			
+			const float dist = length(v0.pos - v1.pos);
+			
+			if (dist <= posThreshold) {
+				Snap(v0, v1, texThreshold, norThreshold);
+				snapCount++;
+			}
+		}
+	}
+
+	// @@ todo: debug, make sure that the distance between vertices is now >= threshold
+
+	nvDebug("---   %u vertices snapped\n", snapCount);
+
+	return snapCount;
+};
+
diff --git a/thirdparty/thekla_atlas/nvmesh/weld/Snap.h b/thirdparty/thekla_atlas/nvmesh/weld/Snap.h
new file mode 100644
index 0000000000..8e0566cda3
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/weld/Snap.h
@@ -0,0 +1,18 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#ifndef NV_MESH_SNAP_H
+#define NV_MESH_SNAP_H
+
+#include <nvmesh/nvmesh.h>
+#include <nvmath/nvmath.h>
+
+namespace nv
+{
+	class TriMesh;
+
+	NVMESH_API uint SnapVertices(TriMesh * mesh, float posThreshold=NV_EPSILON, float texThreshold=1.0f/1024, float norThreshold=NV_NORMAL_EPSILON);
+
+} // nv namespace
+
+
+#endif // NV_MESH_SNAP_H
diff --git a/thirdparty/thekla_atlas/nvmesh/weld/VertexWeld.cpp b/thirdparty/thekla_atlas/nvmesh/weld/VertexWeld.cpp
new file mode 100644
index 0000000000..2ba4dcae18
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/weld/VertexWeld.cpp
@@ -0,0 +1,205 @@
+// Copyright NVIDIA Corporation 2006 -- Ignacio Castano <icastano@nvidia.com>
+
+#include <nvmesh/TriMesh.h>
+#include <nvmesh/QuadTriMesh.h>
+
+#include <nvmesh/weld/VertexWeld.h>
+#include <nvmesh/weld/Weld.h>
+
+using namespace nv;
+
+// Weld trimesh vertices
+void nv::WeldVertices(TriMesh * mesh)
+{
+	nvDebug("--- Welding vertices.\n");
+	
+	nvCheck(mesh != NULL);
+
+	uint count = mesh->vertexCount();
+	Array<uint> xrefs;
+	Weld<TriMesh::Vertex> weld;
+	uint newCount = weld(mesh->vertices(), xrefs);
+	
+	nvDebug("---   %d vertices welded\n", count - newCount);
+	
+	
+	// Remap faces.
+	const uint faceCount = mesh->faceCount();
+	for(uint f = 0; f < faceCount; f++)
+	{
+		TriMesh::Face & face = mesh->faceAt(f);
+		face.v[0] = xrefs[face.v[0]];
+		face.v[1] = xrefs[face.v[1]];
+		face.v[2] = xrefs[face.v[2]];
+	}
+}
+
+
+// Weld trimesh vertices
+void nv::WeldVertices(QuadTriMesh * mesh)
+{
+	nvDebug("--- Welding vertices.\n");
+	
+	nvCheck(mesh != NULL);
+
+	uint  count = mesh->vertexCount();
+	Array<uint> xrefs;
+	Weld<TriMesh::Vertex> weld;
+	uint newCount = weld(mesh->vertices(), xrefs);
+	
+	nvDebug("---   %d vertices welded\n", count - newCount);
+	
+	// Remap faces.
+	const uint faceCount = mesh->faceCount();
+	for(uint f = 0; f < faceCount; f++)
+	{
+		QuadTriMesh::Face & face = mesh->faceAt(f);
+		face.v[0] = xrefs[face.v[0]];
+		face.v[1] = xrefs[face.v[1]];
+		face.v[2] = xrefs[face.v[2]];
+		
+		if (face.isQuadFace())
+		{
+			face.v[3] = xrefs[face.v[3]];
+		}
+	}
+}
+
+
+
+// OLD code
+
+#if 0
+
+namespace {
+
+struct VertexInfo {
+	uint id;			///< Original vertex id.
+	uint normal_face_group;
+	uint tangent_face_group;
+	uint material;
+	uint chart;
+};
+
+
+/// VertexInfo hash functor.
+struct VertexHash : public IHashFunctor<VertexInfo> {
+	VertexHash(PiMeshPtr m) : mesh(m) {
+		uint c = mesh->FindChannel(VS_POS);
+		piCheck(c != PI_NULL_INDEX);
+		channel = mesh->GetChannel(c);
+		piCheck(channel != NULL);
+	}
+
+	uint32 operator () (const VertexInfo & v) const {
+		return channel->data[v.id].GetHash();
+	}
+	
+private:
+	PiMeshPtr mesh;
+	PiMesh::Channel * channel;
+};
+
+
+/// VertexInfo comparator.
+struct VertexEqual : public IBinaryPredicate<VertexInfo> {
+	VertexEqual(PiMeshPtr m) : mesh(m) {}
+	
+	bool operator () (const VertexInfo & a, const VertexInfo & b) const {
+
+		bool equal = a.normal_face_group == b.normal_face_group && 
+			a.tangent_face_group == b.tangent_face_group &&
+			a.material == b.material && 
+			a.chart == b.chart;
+		
+		// Split vertex shared by different face types.
+		if( !equal ) {
+			return false;
+		}
+		
+		// They were the same vertex.
+		if( a.id == b.id ) {
+			return true;
+		}
+		
+		// Vertex equal if all the channels are equal.
+		return mesh->IsVertexEqual(a.id, b.id);
+	}
+
+private:	
+	PiMeshPtr mesh;
+};
+
+} // namespace
+
+
+/// Weld the vertices.
+void PiMeshVertexWeld::WeldVertices(const PiMeshSmoothGroup * mesh_smooth_group, 
+	const PiMeshMaterial * mesh_material, const PiMeshAtlas * mesh_atlas ) 
+{
+	piDebug( "--- Welding vertices:\n" );
+
+	piDebug( "---   Expand mesh vertices.\n" );
+	PiArray<VertexInfo> vertex_array;
+
+	const uint face_num = mesh->GetFaceNum();
+	const uint vertex_max = face_num * 3;
+	vertex_array.Resize( vertex_max );
+
+	for(uint i = 0; i < vertex_max; i++) {
+
+		uint f = i/3;
+	
+		const PiMesh::Face & face = mesh->GetFace(f);
+		vertex_array[i].id = face.v[i%3];
+
+		// Reset face attributes.
+		vertex_array[i].normal_face_group = PI_NULL_INDEX;
+		vertex_array[i].tangent_face_group = PI_NULL_INDEX;
+		vertex_array[i].material = PI_NULL_INDEX;
+		vertex_array[i].chart = PI_NULL_INDEX;
+		
+		// Set available attributes.
+		if( mesh_smooth_group != NULL ) {
+			if( mesh_smooth_group->HasNormalFaceGroups() ) {
+				vertex_array[i].normal_face_group = mesh_smooth_group->GetNormalFaceGroup( f );
+			}
+			if( mesh_smooth_group->HasTangentFaceGroups() ) {
+				vertex_array[i].tangent_face_group = mesh_smooth_group->GetTangentFaceGroup( f );
+			}
+		}
+		if( mesh_material != NULL ) {
+			vertex_array[i].material = mesh_material->GetFaceMaterial( f );
+		}
+		if( mesh_atlas != NULL && mesh_atlas->HasCharts() ) {
+			vertex_array[i].chart = mesh_atlas->GetFaceChart( f );
+		}
+	}
+	piDebug( "---   %d vertices.\n", vertex_max );
+
+	piDebug( "---   Collapse vertices.\n" );
+
+	uint * xrefs = new uint[vertex_max];
+	VertexHash hash(mesh);
+	VertexEqual equal(mesh);
+	const uint vertex_num = Weld( vertex_array, xrefs, hash, equal );
+	piCheck(vertex_num <= vertex_max);
+	piDebug( "---   %d vertices.\n", vertex_num );	
+	
+	// Remap face indices.
+	piDebug( "---   Remapping face indices.\n" );
+	mesh->RemapFaceIndices(vertex_max, xrefs);
+
+
+	// Overwrite xrefs to map new vertices to old vertices.
+	for(uint v = 0; v < vertex_num; v++) {
+		xrefs[v] = vertex_array[v].id;
+	}
+	
+	// Update vertex order.
+	mesh->ReorderVertices(vertex_num, xrefs);
+
+	delete [] xrefs;
+}
+
+#endif // 0
diff --git a/thirdparty/thekla_atlas/nvmesh/weld/VertexWeld.h b/thirdparty/thekla_atlas/nvmesh/weld/VertexWeld.h
new file mode 100644
index 0000000000..1dc2e4ba4d
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/weld/VertexWeld.h
@@ -0,0 +1,19 @@
+// Copyright NVIDIA Corporation 2006 -- Ignacio Castano <icastano@nvidia.com>
+
+#ifndef NV_MESH_VERTEXWELD_H
+#define NV_MESH_VERTEXWELD_H
+
+#include <nvmesh/nvmesh.h>
+
+namespace nv
+{
+	class TriMesh;
+	class QuadMesh;
+
+	NVMESH_API void WeldVertices(TriMesh * mesh);
+	NVMESH_API void WeldVertices(QuadTriMesh * mesh);
+
+} // nv namespace
+
+
+#endif // NV_MESH_VERTEXWELD_H
diff --git a/thirdparty/thekla_atlas/nvmesh/weld/Weld.h b/thirdparty/thekla_atlas/nvmesh/weld/Weld.h
new file mode 100644
index 0000000000..e615539461
--- /dev/null
+++ b/thirdparty/thekla_atlas/nvmesh/weld/Weld.h
@@ -0,0 +1,171 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#ifndef NV_MESH_WELD_H
+#define NV_MESH_WELD_H
+
+#include "nvcore/Array.h"
+#include "nvcore/Hash.h"
+#include "nvcore/Utils.h" // nextPowerOfTwo
+
+#include <string.h> // for memset, memcmp, memcpy
+
+// Weld function to remove array duplicates in linear time using hashing.
+
+namespace nv
+{
+
+/// Generic welding routine. This function welds the elements of the array p
+/// and returns the cross references in the xrefs array. To compare the elements
+/// it uses the given hash and equal functors.
+/// 
+/// This code is based on the ideas of Ville Miettinen and Pierre Terdiman.
+template <class T, class H=Hash<T>, class E=Equal<T> >
+struct Weld
+{
+	// xrefs maps old elements to new elements
+	uint operator()(Array<T> & p, Array<uint> & xrefs)
+	{
+		const uint N = p.size();							// # of input vertices.
+		uint outputCount = 0;								// # of output vertices
+		uint hashSize = nextPowerOfTwo(N);					// size of the hash table
+		uint * hashTable = new uint[hashSize + N];			// hash table + linked list
+		uint * next = hashTable + hashSize;					// use bottom part as linked list
+
+		xrefs.resize(N);
+		memset( hashTable, NIL, hashSize*sizeof(uint) );	// init hash table (NIL = 0xFFFFFFFF so memset works)
+
+		H hash;
+		E equal;
+		for (uint i = 0; i < N; i++)
+		{
+			const T & e = p[i];
+			uint32 hashValue = hash(e) & (hashSize-1);
+			uint offset = hashTable[hashValue];
+
+			// traverse linked list
+			while( offset != NIL && !equal(p[offset], e) )
+			{
+				offset = next[offset];
+			}
+
+			xrefs[i] = offset;
+
+			// no match found - copy vertex & add to hash
+			if( offset == NIL )
+			{
+				// save xref
+				xrefs[i] = outputCount;
+
+				// copy element
+				p[outputCount] = e;
+
+				// link to hash table
+				next[outputCount] = hashTable[hashValue];
+
+				// update hash heads and increase output counter
+				hashTable[hashValue] = outputCount++;
+			}
+		}
+
+		// cleanup
+		delete [] hashTable;
+
+		p.resize(outputCount);
+		
+		// number of output vertices
+		return outputCount;
+	}
+};
+
+
+/// Reorder the given array accoding to the indices given in xrefs.
+template <class T>
+void reorderArray(Array<T> & array, const Array<uint> & xrefs)
+{
+	const uint count = xrefs.count();
+	Array<T> new_array;
+    new_array.resize(count);
+
+	for(uint i = 0; i < count; i++) {
+		new_array[i] = array[xrefs[i]];
+	}
+
+	swap(array, new_array);
+}
+
+/// Reverse the given array so that new indices point to old indices.
+inline void reverseXRefs(Array<uint> & xrefs, uint count)
+{
+	Array<uint> new_xrefs;
+    new_xrefs.resize(count);
+	
+	for(uint i = 0; i < xrefs.count(); i++) {
+		new_xrefs[xrefs[i]] = i;
+	}
+	
+	swap(xrefs, new_xrefs);
+}
+
+
+
+//
+struct WeldN
+{
+    uint vertexSize;
+
+    WeldN(uint n) : vertexSize(n) {}
+
+	// xrefs maps old elements to new elements
+	uint operator()(uint8 * ptr, uint N, Array<uint> & xrefs)
+	{
+		uint outputCount = 0;								// # of output vertices
+		uint hashSize = nextPowerOfTwo(N);					// size of the hash table
+		uint * hashTable = new uint[hashSize + N];			// hash table + linked list
+		uint * next = hashTable + hashSize;					// use bottom part as linked list
+
+		xrefs.resize(N);
+		memset( hashTable, NIL, hashSize*sizeof(uint) );	// init hash table (NIL = 0xFFFFFFFF so memset works)
+
+		for (uint i = 0; i < N; i++)
+		{
+			const uint8 * vertex = ptr + i * vertexSize;
+			uint32 hashValue = sdbmHash(vertex, vertexSize) & (hashSize-1);
+			uint offset = hashTable[hashValue];
+
+			// traverse linked list
+			while (offset != NIL && memcmp(ptr + offset * vertexSize, vertex, vertexSize) != 0)
+			{
+				offset = next[offset];
+			}
+
+			xrefs[i] = offset;
+
+			// no match found - copy vertex & add to hash
+			if (offset == NIL)
+			{
+				// save xref
+				xrefs[i] = outputCount;
+
+				// copy element
+                memcpy(ptr + outputCount * vertexSize, vertex, vertexSize);
+
+				// link to hash table
+				next[outputCount] = hashTable[hashValue];
+
+				// update hash heads and increase output counter
+				hashTable[hashValue] = outputCount++;
+			}
+		}
+
+		// cleanup
+		delete [] hashTable;
+
+		// number of output vertices
+		return outputCount;
+	}
+};
+
+
+} // nv namespace
+
+#endif // NV_MESH_WELD_H
diff --git a/thirdparty/thekla_atlas/poshlib/posh.c b/thirdparty/thekla_atlas/poshlib/posh.c
new file mode 100644
index 0000000000..bd3fcc66ea
--- /dev/null
+++ b/thirdparty/thekla_atlas/poshlib/posh.c
@@ -0,0 +1,1006 @@
+/*
+LICENSE:
+
+Copyright (c) 2004, Brian Hook
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * The names of this package'ss contributors contributors may not
+      be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+/** 
+ @file    posh.c
+ @author  Brian Hook
+ @date    2002
+ @brief   Portable Open Source Harness primary source file
+*/
+#include "posh.h"
+
+#if !defined FORCE_DOXYGEN
+
+#if !defined POSH_NO_FLOAT
+#  define POSH_FLOAT_STRING "enabled"
+#else
+#  define POSH_FLOAT_STRING "disabled"
+#endif
+
+#if defined POSH_64BIT_INTEGER
+#  define POSH_64BIT_INTEGER_STRING "yes"
+#else
+#  define POSH_64BIT_INTEGER_STRING "no"
+#endif
+
+#if defined POSH_64BIT_POINTER
+#  define POSH_POINTER_STRING "64-bits"
+#else
+#  define POSH_POINTER_STRING "32-bits"
+#endif
+
+#if defined POSH_LITTLE_ENDIAN
+#  define IS_BIG_ENDIAN    0
+
+#  define NATIVE16  POSH_LittleU16
+#  define NATIVE32  POSH_LittleU32
+#  define NATIVE64  POSH_LittleU64
+#  define FOREIGN16 POSH_BigU16
+#  define FOREIGN32 POSH_BigU32
+#  define FOREIGN64 POSH_BigU64
+#else
+#  define IS_BIG_ENDIAN    1
+
+#  define NATIVE16  POSH_BigU16
+#  define NATIVE32  POSH_BigU32
+#  define NATIVE64  POSH_BigU64
+#  define FOREIGN16 POSH_LittleU16
+#  define FOREIGN32 POSH_LittleU32
+#  define FOREIGN64 POSH_LittleU64
+#endif /* POSH_LITTLE_ENDIAN */
+
+static 
+int 
+s_testBigEndian( void )
+{
+   union 
+   {
+      posh_byte_t c[ 4 ];
+      posh_u32_t  i;
+   } u;
+
+   u.i= 1;
+
+   if ( u.c[ 0 ] == 1 )
+   {
+      return 0;
+   }
+   return 1;
+}
+
+static
+const char *
+s_testSerialization( void )
+{
+   posh_byte_t serbuf[ 8 ];
+   posh_u16_t  tmp16;
+   posh_u32_t  tmp32;
+
+   /* 16-bit serialization */
+   POSH_WriteU16ToLittle( serbuf, 0xABCD );
+   if ( ( tmp16 = POSH_ReadU16FromLittle( serbuf ) ) != 0xABCD )
+   {
+      return "*ERROR: failed little-endian 16-bit serialization test";
+   }
+
+   POSH_WriteU16ToBig( serbuf, 0xABCD );
+   if ( ( tmp16 = POSH_ReadU16FromBig( serbuf ) ) != 0xABCD )
+   {
+      return "*ERROR: failed big-endian 16-bit serialization test";
+   }
+
+   /* 32-bit serialization */
+   POSH_WriteU32ToLittle( serbuf, 0xABCD1234L );
+   if ( ( tmp32 = POSH_ReadU32FromLittle( serbuf ) ) != 0xABCD1234 )
+   {
+      return "*ERROR: failed little-endian 32-bit serialization test";
+   }
+
+   POSH_WriteU32ToBig( serbuf, 0xABCD1234L );
+   if ( ( tmp32 = POSH_ReadU32FromBig( serbuf ) ) != 0xABCD1234 )
+   {
+      return "*ERROR: failed big-endian 32-bit serialization test";
+   }
+
+#if defined POSH_64BIT_INTEGER
+   {
+#define REF64 POSH_U64(0xFEDCBA9876543210)
+
+      posh_u64_t tmp64;
+
+      POSH_WriteU64ToLittle( serbuf, REF64 );
+
+      if ( ( tmp64 = POSH_ReadU64FromLittle( serbuf ) ) != REF64 )
+      {
+         return "*ERROR: failed little-endian 64-bit serialization test";
+      }
+
+      POSH_WriteU64ToBig( serbuf, REF64 );
+
+      if ( ( tmp64 = POSH_ReadU64FromBig( serbuf ) ) != REF64 )
+      {
+         return "*ERROR: failed big-endian 64-bit serialization test";
+      }
+   }
+#endif
+
+   return 0;
+}
+
+#if !defined POSH_NO_FLOAT
+static
+const char *
+s_testFloatingPoint( void )
+{
+   float fRef = 10.0f/30.0f;
+   double dRef = 10.0/30.0;
+   posh_byte_t dbuf[ 8 ];
+   float fTmp;
+   double dTmp;
+
+   fTmp = POSH_FloatFromLittleBits( POSH_LittleFloatBits( fRef ) );
+
+   if ( fTmp != fRef )
+   {
+      return "*ERROR: POSH little endian floating point conversion failed.  Please report this to poshlib@poshlib.org!\n";
+   }
+
+   fTmp = POSH_FloatFromBigBits( POSH_BigFloatBits( fRef ) );
+   if ( fTmp != fRef )
+   {
+      return "*ERROR: POSH big endian floating point conversion failed.  Please report this to poshlib@poshlib.org!\n";
+   }
+
+   POSH_DoubleBits( dRef, dbuf );
+
+   dTmp = POSH_DoubleFromBits( dbuf );
+
+   if ( dTmp != dRef )
+   {
+      return "*ERROR: POSH double precision floating point serialization failed.  Please report this to poshlib@poshlib.org!\n";
+   }
+
+   return 0;
+}
+#endif /* !defined POSH_NO_FLOAT */
+
+static
+const char *
+s_testEndianess( void )
+{
+   /* check endianess */
+   if ( s_testBigEndian() != IS_BIG_ENDIAN )
+   {
+      return "*ERROR: POSH compile time endianess does not match run-time endianess verification.  Please report this to poshlib@poshlib.org!\n";
+   }
+
+   /* make sure our endian swap routines work */
+   if ( ( NATIVE32( 0x11223344L ) != 0x11223344L ) || 
+        ( FOREIGN32( 0x11223344L ) != 0x44332211L ) ||
+        ( NATIVE16( 0x1234 ) != 0x1234 ) ||
+        ( FOREIGN16( 0x1234 ) != 0x3412 ) )
+   {
+      return "*ERROR: POSH endianess macro selection failed.  Please report this to poshlib@poshlib.org!\n";
+   }
+
+   /* test serialization routines */
+
+   return 0;
+}
+#endif /* !defined FORCE_DOXYGEN */
+
+/**
+  Returns a string describing this platform's basic attributes.  
+
+  POSH_GetArchString() reports on an architecture's statically determined
+  attributes.  In addition, it will perform run-time verification checks
+  to make sure the various platform specific functions work.  If an error
+  occurs, please contact me at poshlib@poshlib.org so we can try to resolve
+  what the specific failure case is.
+  @returns a string describing this platform on success, or a string in the 
+           form "*ERROR: [text]" on failure.  You can simply check to see if
+           the first character returned is '*' to verify an error condition.
+*/
+const char *
+POSH_GetArchString( void )
+{
+   const char *err;
+   const char *s = "OS:.............."POSH_OS_STRING"\n"
+                   "CPU:............."POSH_CPU_STRING"\n"
+                   "endian:.........."POSH_ENDIAN_STRING"\n"
+                   "ptr size:........"POSH_POINTER_STRING"\n"
+                   "64-bit ints......"POSH_64BIT_INTEGER_STRING"\n"
+                   "floating point..."POSH_FLOAT_STRING"\n"
+                   "compiler........."POSH_COMPILER_STRING"\n";
+
+   /* test endianess */
+   err = s_testEndianess();
+
+   if ( err != 0 )
+   {
+      return err;
+   }
+
+   /* test serialization */
+   err = s_testSerialization();
+
+   if ( err != 0 )
+   {
+      return err;
+   }
+
+#if !defined POSH_NO_FLOAT
+   /* check that our floating point support is correct */
+   err = s_testFloatingPoint();
+
+   if ( err != 0 )
+   {
+      return err;
+   }
+
+#endif
+
+   return s;
+}
+
+/* ---------------------------------------------------------------------------*/
+/*                           BYTE SWAPPING SUPPORT                            */
+/* ---------------------------------------------------------------------------*/
+/** 
+ * Byte swaps a 16-bit unsigned value
+ *
+   @ingroup ByteSwapFunctions
+   @param v [in] unsigned 16-bit input value to swap
+   @returns a byte swapped version of v
+ */
+posh_u16_t
+POSH_SwapU16( posh_u16_t v )
+{
+   posh_u16_t swapped;
+
+   swapped  = v << 8;
+   swapped |= v >> 8;
+
+   return swapped;
+}
+
+/** 
+ * Byte swaps a 16-bit signed value
+ *
+   @ingroup ByteSwapFunctions
+   @param v [in] signed 16-bit input value to swap
+   @returns a byte swapped version of v
+   @remarks This just calls back to the unsigned version, since byte swapping 
+            is independent of sign.  However, we still provide this function to
+            avoid signed/unsigned mismatch compiler warnings.
+ */
+posh_i16_t
+POSH_SwapI16( posh_i16_t v )
+{
+   return ( posh_i16_t ) POSH_SwapU16( v );
+}
+
+/** 
+ * Byte swaps a 32-bit unsigned value
+ *
+   @ingroup ByteSwapFunctions
+   @param v [in] unsigned 32-bit input value to swap
+   @returns a byte swapped version of v
+ */
+posh_u32_t
+POSH_SwapU32( posh_u32_t v )
+{
+   posh_u32_t swapped;
+
+   swapped  = ( v & 0xFF ) << 24;
+   swapped |= ( v & 0xFF00 ) << 8;
+   swapped |= ( v >> 8 ) & 0xFF00;
+   swapped |= ( v >> 24 );
+
+   return swapped;
+}
+
+/** 
+ * Byte swaps a 32-bit signed value
+ *
+   @ingroup ByteSwapFunctions
+   @param v [in] signed 32-bit input value to swap
+   @returns a byte swapped version of v
+   @remarks This just calls back to the unsigned version, since byte swapping 
+            is independent of sign.  However, we still provide this function to
+            avoid signed/unsigned mismatch compiler warnings.
+ */
+posh_i32_t
+POSH_SwapI32( posh_i32_t v )
+{
+   return ( posh_i32_t ) POSH_SwapU32( ( posh_u32_t ) v );
+}
+
+#if defined POSH_64BIT_INTEGER
+/**
+ * Byte swaps a 64-bit unsigned value
+
+   @param v [in] a 64-bit input value to swap
+   @ingroup SixtyFourBit
+   @returns a byte swapped version of v
+*/
+posh_u64_t 
+POSH_SwapU64( posh_u64_t v )
+{
+   posh_byte_t tmp;
+   union {
+      posh_byte_t bytes[ 8 ];
+      posh_u64_t  u64;
+   } u;
+
+   u.u64 = v;
+
+   tmp = u.bytes[ 0 ]; u.bytes[ 0 ] = u.bytes[ 7 ]; u.bytes[ 7 ] = tmp;
+   tmp = u.bytes[ 1 ]; u.bytes[ 1 ] = u.bytes[ 6 ]; u.bytes[ 6 ] = tmp;
+   tmp = u.bytes[ 2 ]; u.bytes[ 2 ] = u.bytes[ 5 ]; u.bytes[ 5 ] = tmp;
+   tmp = u.bytes[ 3 ]; u.bytes[ 3 ] = u.bytes[ 4 ]; u.bytes[ 4 ] = tmp;
+
+   return u.u64;
+}
+
+/**
+ * Byte swaps a 64-bit signed value
+
+   @param v [in] a 64-bit input value to swap
+   @ingroup SixtyFourBit
+   @returns a byte swapped version of v
+*/
+posh_i64_t 
+POSH_SwapI64( posh_i64_t v )
+{
+   return ( posh_i64_t ) POSH_SwapU64( ( posh_u64_t ) v );
+}
+
+#endif /* defined POSH_64BIT_INTEGER */
+
+/* ---------------------------------------------------------------------------*/
+/*                           IN-MEMORY SERIALIZATION                          */
+/* ---------------------------------------------------------------------------*/
+
+/**
+ * Writes an unsigned 16-bit value to a little endian buffer
+
+ @ingroup MemoryBuffer
+ @param dst [out] pointer to the destination buffer, may not be NULL.  Alignment doesn't matter.
+ @param value [in] host-endian unsigned 16-bit value
+ @returns a pointer to the location two bytes after dst
+ @remarks does no validation of the inputs
+*/
+posh_u16_t *
+POSH_WriteU16ToLittle( void *dst, posh_u16_t value )
+{
+   posh_u16_t  *p16 = ( posh_u16_t * ) dst;
+   posh_byte_t *p   = ( posh_byte_t * ) dst;
+
+   p[ 0 ] = value & 0xFF;
+   p[ 1 ] = ( value & 0xFF00) >> 8;
+
+   return p16 + 1;
+}
+
+/**
+ * Writes a signed 16-bit value to a little endian buffer
+
+ @ingroup MemoryBuffer
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian signed 16-bit value
+ @returns a pointer to the location two bytes after dst
+ @remarks does no validation of the inputs.  This simply calls
+          POSH_WriteU16ToLittle() with appropriate casting.
+*/
+posh_i16_t *
+POSH_WriteI16ToLittle( void *dst, posh_i16_t value )
+{
+   return ( posh_i16_t * ) POSH_WriteU16ToLittle( dst, ( posh_u16_t ) value );
+}
+
+/**
+ * Writes an unsigned 32-bit value to a little endian buffer
+
+ @ingroup MemoryBuffer
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian signed 32-bit value
+ @returns a pointer to the location four bytes after dst
+ @remarks does no validation of the inputs.
+*/
+posh_u32_t *
+POSH_WriteU32ToLittle( void *dst, posh_u32_t value )
+{
+   posh_u32_t  *p32   = ( posh_u32_t * ) dst;
+   posh_byte_t *p     = ( posh_byte_t * ) dst;
+
+   p[ 0 ] = ( value & 0xFF );
+   p[ 1 ] = ( value & 0xFF00 ) >> 8;
+   p[ 2 ] = ( value & 0xFF0000 ) >> 16;
+   p[ 3 ] = ( value & 0xFF000000 ) >> 24;
+
+   return p32 + 1;
+}
+
+/**
+ * Writes a signed 32-bit value to a little endian buffer
+
+ @ingroup MemoryBuffer
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian signed 32-bit value
+ @returns a pointer to the location four bytes after dst
+ @remarks does no validation of the inputs.  This simply calls
+          POSH_WriteU32ToLittle() with appropriate casting.
+*/
+posh_i32_t *
+POSH_WriteI32ToLittle( void *dst, posh_i32_t value )
+{
+   return ( posh_i32_t * ) POSH_WriteU32ToLittle( dst, ( posh_u32_t ) value );
+}
+
+/**
+ * Writes an unsigned 16-bit value to a big endian buffer
+
+ @ingroup MemoryBuffer
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian unsigned 16-bit value
+ @returns a pointer to the location two bytes after dst
+ @remarks does no validation of the inputs
+*/
+posh_u16_t *
+POSH_WriteU16ToBig( void *dst, posh_u16_t value )
+{
+   posh_u16_t *p16 = ( posh_u16_t * ) dst;
+   posh_byte_t *p  = ( posh_byte_t * ) dst;
+
+   p[ 1 ] = ( value & 0xFF );
+   p[ 0 ] = ( value & 0xFF00 ) >> 8;
+
+   return p16 + 1;
+}
+
+/**
+ * Writes a signed 16-bit value to a big endian buffer
+
+ @ingroup MemoryBuffer
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian signed 16-bit value
+ @returns a pointer to the location two bytes after dst
+ @remarks does no validation of the inputs.  This simply calls
+          POSH_WriteU16ToLittle() with appropriate casting.
+*/
+posh_i16_t *
+POSH_WriteI16ToBig( void *dst, posh_i16_t value )
+{
+   return ( posh_i16_t * ) POSH_WriteU16ToBig( dst, ( posh_u16_t ) value );
+}
+
+/**
+ * Writes an unsigned 32-bit value to a big endian buffer
+
+ @ingroup MemoryBuffer
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian unsigned 32-bit value
+ @returns a pointer to the location four bytes after dst
+ @remarks does no validation of the inputs.
+*/
+posh_u32_t *
+POSH_WriteU32ToBig( void *dst, posh_u32_t value )
+{
+   posh_u32_t *p32 = ( posh_u32_t * ) dst;
+   posh_byte_t *p  = ( posh_byte_t * ) dst;
+
+   p[ 3 ] = ( value & 0xFF );
+   p[ 2 ] = ( value & 0xFF00 ) >> 8;
+   p[ 1 ] = ( value & 0xFF0000 ) >> 16;
+   p[ 0 ] = ( value & 0xFF000000 ) >> 24;
+
+   return p32 + 1;
+}
+
+/**
+ * Writes a signed 32-bit value to a big endian buffer
+
+ @ingroup MemoryBuffer
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian signed 32-bit value
+ @returns a pointer to the location four bytes after dst
+ @remarks does no validation of the inputs.  This simply calls
+          POSH_WriteU32ToBig() with appropriate casting.
+*/
+posh_i32_t *
+POSH_WriteI32ToBig( void *dst, posh_i32_t value )
+{
+   return ( posh_i32_t * ) POSH_WriteU32ToBig( dst, ( posh_u32_t ) value );
+}
+
+#if defined POSH_64BIT_INTEGER
+/**
+ * Writes an unsigned 64-bit value to a little-endian buffer
+
+ @ingroup SixtyFourBit
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian unsigned 64-bit value
+ @returns a pointer to the location eight bytes after dst
+ @remarks does no validation of the inputs.
+*/
+posh_u64_t *
+POSH_WriteU64ToLittle( void *dst, posh_u64_t value )
+{
+   posh_u64_t *p64 = ( posh_u64_t * ) dst;
+   posh_byte_t *p  = ( posh_byte_t * ) dst;
+   int i;
+
+   for ( i = 0; i < 8; i++, value >>= 8 )
+   {
+       p[ i ] = ( posh_byte_t ) ( value & 0xFF );
+   }
+
+   return p64 + 1;
+}
+
+/**
+ * Writes a signed 64-bit value to a little-endian buffer
+
+ @ingroup SixtyFourBit
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian unsigned 64-bit value
+ @returns a pointer to the location eight bytes after dst
+ @remarks does no validation of the inputs.
+*/
+posh_i64_t *
+POSH_WriteI64ToLittle( void *dst, posh_i64_t value )
+{
+   return ( posh_i64_t * ) POSH_WriteU64ToLittle( dst, ( posh_u64_t ) value );
+}
+
+/**
+ * Writes an unsigned 64-bit value to a big-endian buffer
+
+ @ingroup SixtyFourBit
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian unsigned 64-bit value
+ @returns a pointer to the location eight bytes after dst
+ @remarks does no validation of the inputs.
+*/
+posh_u64_t *
+POSH_WriteU64ToBig( void *dst, posh_u64_t value )
+{
+   posh_u64_t *p64 = ( posh_u64_t * ) dst;
+   posh_byte_t *p  = ( posh_byte_t * ) dst;
+   int i;
+
+   for ( i = 0; i < 8; i++, value >>= 8 )
+   {
+       p[ 7-i ] = ( posh_byte_t ) ( value & 0xFF );
+   }
+
+   return p64 + 8;
+}
+
+/**
+ * Writes a signed 64-bit value to a big-endian buffer
+
+ @ingroup SixtyFourBit
+ @param dst [out] pointer to the destination buffer, may not be NULL
+ @param value [in] host-endian signed 64-bit value
+ @returns a pointer to the location eight bytes after dst
+ @remarks does no validation of the inputs.
+*/
+posh_i64_t *
+POSH_WriteI64ToBig( void *dst, posh_i64_t value )
+{
+   return ( posh_i64_t * ) POSH_WriteU64ToBig( dst, ( posh_u64_t ) value );
+}
+
+#endif /* POSH_64BIT_INTEGER */
+
+/* ---------------------------------------------------------------------------*/
+/*                         IN-MEMORY DESERIALIZATION                          */
+/* ---------------------------------------------------------------------------*/
+
+/** 
+ * Reads an unsigned 16-bit value from a little-endian buffer
+ @ingroup MemoryBuffer
+ @param src [in] source buffer
+ @returns host-endian unsigned 16-bit value
+*/
+posh_u16_t  
+POSH_ReadU16FromLittle( const void *src )
+{
+    posh_u16_t   v = 0;
+    posh_byte_t *p = ( posh_byte_t * ) src;
+
+    v |= p[ 0 ];
+    v |= ( ( posh_u16_t ) p[ 1 ] ) << 8;
+
+    return v;
+}
+
+/** 
+ * Reads a signed 16-bit value from a little-endian buffer
+ @ingroup MemoryBuffer
+ @param src [in] source buffer
+ @returns host-endian signed 16-bit value
+*/
+posh_i16_t  
+POSH_ReadI16FromLittle( const void *src )
+{
+   return ( posh_i16_t ) POSH_ReadU16FromLittle( src );
+}
+
+/** 
+ * Reads an unsigned 32-bit value from a little-endian buffer
+ @ingroup MemoryBuffer
+ @param src [in] source buffer
+ @returns host-endian unsigned 32-bit value
+*/
+posh_u32_t  
+POSH_ReadU32FromLittle( const void *src )
+{
+    posh_u32_t v = 0;
+    posh_byte_t *p = ( posh_byte_t * ) src;
+
+    v |= p[ 0 ];
+    v |= ( ( posh_u32_t ) p[ 1 ] ) << 8;
+    v |= ( ( posh_u32_t ) p[ 2 ] ) << 16;
+    v |= ( ( posh_u32_t ) p[ 3 ] ) << 24;
+
+    return v;
+}
+
+/** 
+ * Reads a signed 32-bit value from a little-endian buffer
+ @ingroup MemoryBuffer
+ @param src [in] source buffer
+ @returns host-endian signed 32-bit value
+*/
+posh_i32_t  
+POSH_ReadI32FromLittle( const void *src )
+{
+   return ( posh_i32_t ) POSH_ReadU32FromLittle( src );
+}
+
+
+/** 
+ * Reads an unsigned 16-bit value from a big-endian buffer
+ @ingroup MemoryBuffer
+ @param src [in] source buffer
+ @returns host-endian unsigned 16-bit value
+*/
+posh_u16_t  
+POSH_ReadU16FromBig( const void *src )
+{
+    posh_u16_t   v = 0;
+    posh_byte_t *p = ( posh_byte_t * ) src;
+
+    v |= p[ 1 ];
+    v |= ( ( posh_u16_t ) p[ 0 ] ) << 8;
+
+    return v;
+}
+
+/** 
+ * Reads a signed 16-bit value from a big-endian buffer
+ @ingroup MemoryBuffer
+ @param src [in] source buffer
+ @returns host-endian signed 16-bit value
+*/
+posh_i16_t  
+POSH_ReadI16FromBig( const void *src )
+{
+   return ( posh_i16_t ) POSH_ReadU16FromBig( src );
+}
+
+/** 
+ * Reads an unsigned 32-bit value from a big-endian buffer
+ @ingroup MemoryBuffer
+ @param src [in] source buffer
+ @returns host-endian unsigned 32-bit value
+*/
+posh_u32_t  
+POSH_ReadU32FromBig( const void *src )
+{
+    posh_u32_t   v = 0;
+    posh_byte_t *p = ( posh_byte_t * ) src;
+
+    v |= p[ 3 ];
+    v |= ( ( posh_u32_t ) p[ 2 ] ) << 8;
+    v |= ( ( posh_u32_t ) p[ 1 ] ) << 16;
+    v |= ( ( posh_u32_t ) p[ 0 ] ) << 24;
+
+    return v;
+}
+
+/** 
+ * Reads a signed 32-bit value from a big-endian buffer
+ @ingroup MemoryBuffer
+ @param src [in] source buffer
+ @returns host-endian signed 32-bit value
+*/
+posh_i32_t  
+POSH_ReadI32FromBig( const void *src )
+{
+   return POSH_BigI32( (*(const posh_i32_t*)src ) );
+}
+
+#if defined POSH_64BIT_INTEGER
+
+/** 
+ * Reads an unsigned 64-bit value from a little-endian buffer
+ @param src [in] source buffer
+ @returns host-endian unsigned 32-bit value
+*/
+posh_u64_t  
+POSH_ReadU64FromLittle( const void *src )
+{
+    posh_u64_t v = 0;
+    posh_byte_t *p = ( posh_byte_t * ) src;
+    int i;
+
+    for ( i = 0; i < 8; i++ )
+    {
+        v |= ( ( posh_u64_t ) p[ i ] ) << (i*8);
+    }
+
+    return v;
+}
+
+/** 
+ * Reads a signed 64-bit value from a little-endian buffer
+ @param src [in] source buffer
+ @returns host-endian signed 32-bit value
+*/
+posh_i64_t  
+POSH_ReadI64FromLittle( const void *src )
+{
+   return ( posh_i64_t ) POSH_ReadU64FromLittle( src );
+}
+
+/** 
+ * Reads an unsigned 64-bit value from a big-endian buffer
+ @param src [in] source buffer
+ @returns host-endian unsigned 32-bit value
+*/
+posh_u64_t
+POSH_ReadU64FromBig( const void *src )
+{
+    posh_u64_t v = 0;
+    posh_byte_t *p = ( posh_byte_t * ) src;
+    int i;
+
+    for ( i = 0; i < 8; i++ )
+    {
+        v |= ( ( posh_u64_t ) p[ 7-i ] ) << (i*8);
+    }
+
+    return v;
+}
+
+/** 
+ * Reads an signed 64-bit value from a big-endian buffer
+ @param src [in] source buffer
+ @returns host-endian signed 32-bit value
+*/
+posh_i64_t
+POSH_ReadI64FromBig( const void *src )
+{
+   return ( posh_i64_t ) POSH_ReadU64FromBig( src );
+}
+
+#endif /* POSH_64BIT_INTEGER */
+
+/* ---------------------------------------------------------------------------*/
+/*                           FLOATING POINT SUPPORT                           */
+/* ---------------------------------------------------------------------------*/
+
+#if !defined POSH_NO_FLOAT
+
+/** @ingroup FloatingPoint
+    @param[in] f floating point value
+    @returns a little-endian bit representation of f
+ */
+posh_u32_t
+POSH_LittleFloatBits( float f )
+{
+   union
+   {
+      float f32;
+      posh_u32_t u32;
+   } u;
+
+   u.f32 = f;
+
+   return POSH_LittleU32( u.u32 );
+}
+
+/** 
+ * Extracts raw big-endian bits from a 32-bit floating point value
+ *
+   @ingroup FloatingPoint
+   @param   f [in] floating point value
+   @returns a big-endian bit representation of f
+ */
+posh_u32_t
+POSH_BigFloatBits( float f )
+{
+   union
+   {
+      float f32;
+      posh_u32_t u32;
+   } u;
+
+   u.f32 = f;
+
+   return POSH_BigU32( u.u32 );
+}
+
+/** 
+ * Extracts raw, little-endian bit representation from a 64-bit double.
+ *
+   @param d [in] 64-bit double precision value
+   @param dst [out] 8-byte storage buffer
+   @ingroup FloatingPoint
+   @returns the raw bits used to represent the value 'd', in the form dst[0]=LSB
+ */
+void
+POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] )
+{
+   union
+   {
+      double d64;
+      posh_byte_t bytes[ 8 ];
+   } u;
+
+   u.d64 = d;
+
+#if defined POSH_LITTLE_ENDIAN
+   dst[ 0 ] = u.bytes[ 0 ];
+   dst[ 1 ] = u.bytes[ 1 ];
+   dst[ 2 ] = u.bytes[ 2 ];
+   dst[ 3 ] = u.bytes[ 3 ];
+   dst[ 4 ] = u.bytes[ 4 ];
+   dst[ 5 ] = u.bytes[ 5 ];
+   dst[ 6 ] = u.bytes[ 6 ];
+   dst[ 7 ] = u.bytes[ 7 ];
+#else
+   dst[ 0 ] = u.bytes[ 7 ];
+   dst[ 1 ] = u.bytes[ 6 ];
+   dst[ 2 ] = u.bytes[ 5 ];
+   dst[ 3 ] = u.bytes[ 4 ];
+   dst[ 4 ] = u.bytes[ 3 ];
+   dst[ 5 ] = u.bytes[ 2 ];
+   dst[ 6 ] = u.bytes[ 1 ];
+   dst[ 7 ] = u.bytes[ 0 ];
+#endif
+}
+
+/** 
+ * Creates a double-precision, 64-bit floating point value from a set of raw, 
+ * little-endian bits
+
+   @ingroup FloatingPoint
+   @param src [in] little-endian byte representation of 64-bit double precision 
+                  floating point value
+   @returns double precision floating point representation of the raw bits
+   @remarks No error checking is performed, so there are no guarantees that the 
+            result is a valid number, nor is there any check to ensure that src is 
+            non-NULL.  BE CAREFUL USING THIS.
+ */
+double
+POSH_DoubleFromBits( const posh_byte_t src[ 8 ] )
+{
+   union
+   {
+      double d64;
+      posh_byte_t bytes[ 8 ];
+   } u;
+
+#if defined POSH_LITTLE_ENDIAN
+   u.bytes[ 0 ] = src[ 0 ];
+   u.bytes[ 1 ] = src[ 1 ];
+   u.bytes[ 2 ] = src[ 2 ];
+   u.bytes[ 3 ] = src[ 3 ];
+   u.bytes[ 4 ] = src[ 4 ];
+   u.bytes[ 5 ] = src[ 5 ];
+   u.bytes[ 6 ] = src[ 6 ];
+   u.bytes[ 7 ] = src[ 7 ];
+#else
+   u.bytes[ 0 ] = src[ 7 ];
+   u.bytes[ 1 ] = src[ 6 ];
+   u.bytes[ 2 ] = src[ 5 ];
+   u.bytes[ 3 ] = src[ 4 ];
+   u.bytes[ 4 ] = src[ 3 ];
+   u.bytes[ 5 ] = src[ 2 ];
+   u.bytes[ 6 ] = src[ 1 ];
+   u.bytes[ 7 ] = src[ 0 ];
+#endif
+
+   return u.d64;
+}
+
+/** 
+ * Creates a floating point number from little endian bits
+ *
+   @ingroup FloatingPoint
+   @param   bits [in] raw floating point bits in little-endian form
+   @returns a floating point number based on the given bit representation
+   @remarks No error checking is performed, so there are no guarantees that the 
+            result is a valid number.  BE CAREFUL USING THIS.
+ */
+float       
+POSH_FloatFromLittleBits( posh_u32_t bits )
+{
+   union
+   {
+      float f32;
+      posh_u32_t u32;
+   } u;
+
+   u.u32 = bits;
+#if defined POSH_BIG_ENDIAN
+   u.u32 = POSH_SwapU32( u.u32 );
+#endif
+
+   return u.f32;
+}
+
+/** 
+ * Creates a floating point number from big-endian bits
+ *
+   @ingroup FloatingPoint
+   @param   bits [in] raw floating point bits in big-endian form
+   @returns a floating point number based on the given bit representation
+   @remarks No error checking is performed, so there are no guarantees that the 
+            result is a valid number.  BE CAREFUL USING THIS.
+ */
+float
+POSH_FloatFromBigBits( posh_u32_t bits )
+{
+   union
+   {
+      float f32;
+      posh_u32_t u32;
+   } u;
+
+   u.u32 = bits;
+#if defined POSH_LITTLE_ENDIAN
+   u.u32 = POSH_SwapU32( u.u32 );
+#endif
+
+   return u.f32;
+}
+
+#endif /* !defined POSH_NO_FLOAT */
diff --git a/thirdparty/thekla_atlas/poshlib/posh.h b/thirdparty/thekla_atlas/poshlib/posh.h
new file mode 100644
index 0000000000..c3efe26a2d
--- /dev/null
+++ b/thirdparty/thekla_atlas/poshlib/posh.h
@@ -0,0 +1,1030 @@
+/**
+@file posh.h
+@author Brian Hook
+@version 1.3.001
+
+Header file for POSH, the Portable Open Source Harness project.
+
+NOTE: Unlike most header files, this one is designed to be included
+multiple times, which is why it does not have the @#ifndef/@#define
+preamble.
+
+POSH relies on environment specified preprocessor symbols in order
+to infer as much as possible about the target OS/architecture and
+the host compiler capabilities.
+
+NOTE: POSH is simple and focused. It attempts to provide basic
+functionality and information, but it does NOT attempt to emulate
+missing functionality.  I am also not willing to make POSH dirty
+and hackish to support truly ancient and/or outmoded and/or bizarre
+technologies such as non-ANSI compilers, systems with non-IEEE
+floating point formats, segmented 16-bit operating systems, etc.
+
+Please refer to the accompanying HTML documentation or visit
+http://www.poshlib.org for more information on how to use POSH.
+
+LICENSE:
+
+Copyright (c) 2004, Brian Hook
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+
+    * The names of this package'ss contributors contributors may not
+      be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REVISION:
+
+I've been lax about revision histories, so this starts at, um, 1.3.001.
+Sorry for any inconveniences.
+
+1.3.001 - 2/23/2006 - Incorporated fix for bug reported by Bill Cary,
+                      where I was not detecting Visual Studio
+                      compilation on x86-64 systems.  Added check for
+                      _M_X64 which should fix that.
+
+*/
+/*
+I have yet to find an authoritative reference on preprocessor
+symbols, but so far this is what I've gleaned:
+
+GNU GCC/G++:
+   - __GNUC__: GNU C version
+   - __GNUG__: GNU C++ compiler
+   - __sun__ : on Sun platforms
+   - __svr4__: on Solaris and other SysV R4 platforms
+   - __mips__: on MIPS processor platforms
+   - __sparc_v9__: on Sparc 64-bit CPUs
+   - __sparcv9: 64-bit Solaris
+   - __MIPSEL__: mips processor, compiled for little endian
+   - __MIPSEB__: mips processor, compiled for big endian
+   - _R5900: MIPS/Sony/Toshiba R5900 (PS2)
+   - mc68000: 68K
+   - m68000: 68K
+   - m68k: 68K
+   - __palmos__: PalmOS
+
+Intel C/C++ Compiler:
+   - __ECC      : compiler version, IA64 only
+   - __EDG__
+   - __ELF__
+   - __GXX_ABI_VERSION
+   - __i386     : IA-32 only
+   - __i386__   : IA-32 only
+   - i386       : IA-32 only
+   - __ia64     : IA-64 only
+   - __ia64__   : IA-64 only
+   - ia64       : IA-64 only
+   - __ICC      : IA-32 only
+   - __INTEL_COMPILER : IA-32 or IA-64, newer versions only
+
+Apple's C/C++ Compiler for OS X:
+   - __APPLE_CC__
+   - __APPLE__
+   - __BIG_ENDIAN__
+   - __APPLE__
+   - __ppc__
+   - __MACH__
+
+DJGPP:
+   - __MSDOS__
+   - __unix__
+   - __unix
+   - __GNUC__
+   - __GO32
+   - DJGPP
+   - __i386, __i386, i386
+
+Cray's C compiler:
+   - _ADDR64: if 64-bit pointers
+   - _UNICOS: 
+   - __unix:
+
+SGI's CC compiler predefines the following (and more) with -ansi:
+   - __sgi
+   - __unix
+   - __host_mips
+   - _SYSTYPE_SVR4
+   - __mips
+   - _MIPSEB
+   - anyone know if there is a predefined symbol for the compiler?!
+
+MinGW:
+   - as GnuC but also defines _WIN32, __WIN32, WIN32, _X86_, __i386, __i386__, and several others
+   - __MINGW32__
+
+Cygwin:
+   - as Gnu C, but also
+   - __unix__
+   - __CYGWIN32__
+
+Microsoft Visual Studio predefines the following:
+   - _MSC_VER
+   - _WIN32: on Win32
+   - _M_IX6 (on x86 systems)
+   - _M_X64: on x86-64 systems
+   - _M_ALPHA (on DEC AXP systems)
+   - _SH3: WinCE, Hitachi SH-3
+   - _MIPS: WinCE, MIPS
+   - _ARM: WinCE, ARM
+
+Sun's C Compiler:
+   - sun and _sun
+   - unix and _unix
+   - sparc and _sparc (SPARC systems only)
+   - i386 and _i386 (x86 systems only)
+   - __SVR4 (Solaris only)
+   - __sparcv9: 64-bit solaris
+   - __SUNPRO_C
+   - _LP64: defined in 64-bit LP64 mode, but only if <sys/types.h> is included
+
+Borland C/C++ predefines the following:
+   - __BORLANDC__:
+
+DEC/Compaq C/C++ on Alpha:
+   - __alpha
+   - __arch64__
+   - __unix__ (on Tru64 Unix)
+   - __osf__
+   - __DECC
+   - __DECCXX (C++ compilation)
+   - __DECC_VER
+   - __DECCXX_VER
+
+IBM's AIX compiler:
+   - __64BIT__ if 64-bit mode
+   - _AIX
+   - __IBMC__: C compiler version
+   - __IBMCPP__: C++ compiler version
+   - _LONG_LONG: compiler allows long long
+
+Watcom:
+   - __WATCOMC__
+   - __DOS__ : if targeting DOS
+   - __386__ : if 32-bit support
+   - __WIN32__ : if targetin 32-bit Windows
+
+HP-UX C/C++ Compiler:
+   - __hpux
+   - __unix
+   - __hppa (on PA-RISC)
+   - __LP64__: if compiled in 64-bit mode
+
+Metrowerks:
+   - __MWERKS__
+   - __powerpc__
+   - _powerc
+   - __MC68K__
+   - macintosh when compiling for MacOS
+   - __INTEL__ for x86 targets
+   - __POWERPC__
+
+*/
+
+/*
+** ----------------------------------------------------------------------------
+** Include <limits.h> optionally
+** ----------------------------------------------------------------------------
+*/
+#ifdef POSH_USE_LIMITS_H
+#  include <limits.h>
+#endif
+
+/*
+** ----------------------------------------------------------------------------
+** Determine compilation environment
+** ----------------------------------------------------------------------------
+*/
+#if defined __ECC || defined __ICC || defined __INTEL_COMPILER
+#  define POSH_COMPILER_STRING "Intel C/C++"
+#  define POSH_COMPILER_INTEL 1
+#endif
+
+#if ( defined __host_mips || defined __sgi ) && !defined __GNUC__
+#  define POSH_COMPILER_STRING    "MIPSpro C/C++"
+#  define POSH_COMPILER_MIPSPRO 1 
+#endif
+
+#if defined __hpux && !defined __GNUC__
+#  define POSH_COMPILER_STRING "HP-UX CC"
+#  define POSH_COMPILER_HPCC 1 
+#endif
+
+#if defined __GNUC__ && !defined __clang__
+#  define POSH_COMPILER_STRING "Gnu GCC"
+#  define POSH_COMPILER_GCC 1
+#endif
+
+#if defined __clang__
+#  define POSH_COMPILER_STRING "Clang"
+#  define POSH_COMPILER_CLANG 1
+#endif
+
+#if defined __APPLE_CC__
+   /* we don't define the compiler string here, let it be GNU */
+#  define POSH_COMPILER_APPLECC 1
+#endif
+
+#if defined __IBMC__ || defined __IBMCPP__
+#  define POSH_COMPILER_STRING "IBM C/C++"
+#  define POSH_COMPILER_IBM 1
+#endif
+
+#if defined _MSC_VER
+#  define POSH_COMPILER_STRING "Microsoft Visual C++"
+#  define POSH_COMPILER_MSVC 1
+#endif
+
+#if defined __SUNPRO_C
+#  define POSH_COMPILER_STRING "Sun Pro" 
+#  define POSH_COMPILER_SUN 1
+#endif
+
+#if defined __BORLANDC__
+#  define POSH_COMPILER_STRING "Borland C/C++"
+#  define POSH_COMPILER_BORLAND 1
+#endif
+
+#if defined __MWERKS__
+#  define POSH_COMPILER_STRING     "MetroWerks CodeWarrior"
+#  define POSH_COMPILER_METROWERKS 1
+#endif
+
+#if defined __DECC || defined __DECCXX
+#  define POSH_COMPILER_STRING "Compaq/DEC C/C++"
+#  define POSH_COMPILER_DEC 1
+#endif
+
+#if defined __WATCOMC__
+#  define POSH_COMPILER_STRING "Watcom C/C++"
+#  define POSH_COMPILER_WATCOM 1
+#endif
+
+#if !defined POSH_COMPILER_STRING
+#  define POSH_COMPILER_STRING "Unknown compiler"
+#endif
+
+/*
+** ----------------------------------------------------------------------------
+** Determine target operating system
+** ----------------------------------------------------------------------------
+*/
+#if defined linux || defined __linux__
+#  define POSH_OS_LINUX 1 
+#  define POSH_OS_STRING "Linux"
+#endif
+
+#if defined __FreeBSD__
+#  define POSH_OS_FREEBSD 1 
+#  define POSH_OS_STRING "FreeBSD"
+#endif
+
+#if defined __CYGWIN32__
+#  define POSH_OS_CYGWIN32 1
+#  define POSH_OS_STRING "Cygwin"
+#endif
+
+#if defined GEKKO
+#  define POSH_OS_GAMECUBE
+#  define __powerpc__
+#  define POSH_OS_STRING "GameCube"
+#endif
+
+#if defined __MINGW32__
+#  define POSH_OS_MINGW 1
+#  define POSH_OS_STRING "MinGW"
+#endif
+
+#if defined GO32 && defined DJGPP && defined __MSDOS__
+#  define POSH_OS_GO32 1
+#  define POSH_OS_STRING "GO32/MS-DOS"
+#endif
+
+/* NOTE: make sure you use /bt=DOS if compiling for 32-bit DOS,
+   otherwise Watcom assumes host=target */
+#if defined __WATCOMC__  && defined __386__ && defined __DOS__
+#  define POSH_OS_DOS32 1
+#  define POSH_OS_STRING "DOS/32-bit"
+#endif
+
+#if defined _UNICOS
+#  define POSH_OS_UNICOS 1
+#  define POSH_OS_STRING "UNICOS"
+#endif
+
+//ACS if we're in xcode, look at the target conditionals to figure out if this is ios or osx
+#if defined __APPLE__
+#  include "TargetConditionals.h"
+#endif
+#if TARGET_OS_IPHONE
+#    define POSH_OS_IOS 1
+#    define POSH_OS_STRING "iOS"
+#else
+#  if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
+#    define POSH_OS_OSX 1
+#    define POSH_OS_STRING "MacOS X"
+#  endif
+#endif
+
+#if defined __sun__ || defined sun || defined __sun || defined __solaris__
+#  if defined __SVR4 || defined __svr4__ || defined __solaris__
+#     define POSH_OS_STRING "Solaris"
+#     define POSH_OS_SOLARIS 1
+#  endif
+#  if !defined POSH_OS_STRING
+#     define POSH_OS_STRING "SunOS"
+#     define POSH_OS_SUNOS 1
+#  endif
+#endif
+
+#if defined __sgi__ || defined sgi || defined __sgi
+#  define POSH_OS_IRIX 1
+#  define POSH_OS_STRING "Irix"
+#endif
+
+#if defined __hpux__ || defined __hpux
+#  define POSH_OS_HPUX 1
+#  define POSH_OS_STRING "HP-UX"
+#endif
+
+#if defined _AIX
+#  define POSH_OS_AIX 1
+#  define POSH_OS_STRING "AIX"
+#endif
+
+#if ( defined __alpha && defined __osf__ )
+#  define POSH_OS_TRU64 1
+#  define POSH_OS_STRING "Tru64"
+#endif
+
+#if defined __BEOS__ || defined __beos__
+#  define POSH_OS_BEOS 1
+#  define POSH_OS_STRING "BeOS"
+#endif
+
+#if defined amiga || defined amigados || defined AMIGA || defined _AMIGA
+#  define POSH_OS_AMIGA 1
+#  define POSH_OS_STRING "Amiga"
+#endif
+
+#if defined __unix__
+#  define POSH_OS_UNIX 1 
+#  if !defined POSH_OS_STRING
+#     define POSH_OS_STRING "Unix-like(generic)"
+#  endif
+#endif
+
+#if defined _WIN32_WCE
+#  define POSH_OS_WINCE 1
+#  define POSH_OS_STRING "Windows CE"
+#endif
+
+#if defined _XBOX || defined _XBOX_VER
+#  define POSH_OS_XBOX 1
+#  define POSH_OS_STRING "XBOX"
+#endif
+
+#if defined __ORBIS__
+#   define POSH_OS_ORBIS
+#endif
+
+#if defined _WIN32 || defined WIN32 || defined __NT__ || defined __WIN32__
+#  if !defined POSH_OS_XBOX
+#  define POSH_OS_WIN32 1
+#     if defined _WIN64
+#        define POSH_OS_WIN64 1
+#        define POSH_OS_STRING "Win64"
+#     else
+#        if !defined POSH_OS_STRING
+#           define POSH_OS_STRING "Win32"
+#        endif
+#     endif
+#  endif
+#endif
+
+#if defined __palmos__
+#  define POSH_OS_PALM 1
+#  define POSH_OS_STRING "PalmOS"
+#endif
+
+#if defined THINK_C || defined macintosh
+#  define POSH_OS_MACOS 1
+#  define POSH_OS_STRING "MacOS"
+#endif
+
+/*
+** -----------------------------------------------------------------------------
+** Determine target CPU
+** -----------------------------------------------------------------------------
+*/
+
+#if defined GEKKO
+#  define POSH_CPU_PPC750 1
+#  define POSH_CPU_STRING "IBM PowerPC 750 (NGC)"
+#endif
+
+#if defined mc68000 || defined m68k || defined __MC68K__ || defined m68000
+#  define POSH_CPU_68K 1
+#  define POSH_CPU_STRING "MC68000"
+#endif
+
+#if defined __PPC__ || defined __POWERPC__  || defined powerpc || defined _POWER || defined __ppc__ || defined __powerpc__ || defined _M_PPC
+#  define POSH_CPU_PPC 1
+#  if !defined POSH_CPU_STRING
+#    if defined __powerpc64__
+#       define POSH_CPU_STRING "PowerPC64"
+#    else
+#       define POSH_CPU_STRING "PowerPC"
+#    endif
+#  endif
+#endif
+
+#if defined _CRAYT3E || defined _CRAYMPP
+#  define POSH_CPU_CRAYT3E 1 /* target processor is a DEC Alpha 21164 used in a Cray T3E*/
+#  define POSH_CPU_STRING "Cray T3E (Alpha 21164)"
+#endif
+
+#if defined CRAY || defined _CRAY && !defined _CRAYT3E
+#  error Non-AXP Cray systems not supported
+#endif
+
+#if defined _SH3
+#  define POSH_CPU_SH3 1
+#  define POSH_CPU_STRING "Hitachi SH-3"
+#endif
+
+#if defined __sh4__ || defined __SH4__
+#  define POSH_CPU_SH3 1
+#  define POSH_CPU_SH4 1
+#  define POSH_CPU_STRING "Hitachi SH-4"
+#endif
+
+#if defined __sparc__ || defined __sparc
+#  if defined __arch64__ || defined __sparcv9 || defined __sparc_v9__
+#     define POSH_CPU_SPARC64 1 
+#     define POSH_CPU_STRING "Sparc/64"
+#  else
+#     define POSH_CPU_STRING "Sparc/32"
+#  endif
+#  define POSH_CPU_SPARC 1
+#endif
+
+#if defined ARM || defined __arm__ || defined _ARM
+#  define POSH_CPU_STRONGARM 1
+#  define POSH_CPU_STRING "ARM"
+#endif
+
+#if defined mips || defined __mips__ || defined __MIPS__ || defined _MIPS
+#  define POSH_CPU_MIPS 1 
+#  if defined _R5900
+#    define POSH_CPU_STRING "MIPS R5900 (PS2)"
+#  else
+#    define POSH_CPU_STRING "MIPS"
+#  endif
+#endif
+
+#if defined __ia64 || defined _M_IA64 || defined __ia64__ 
+#  define POSH_CPU_IA64 1
+#  define POSH_CPU_STRING "IA64"
+#endif
+
+#if defined __X86__ || defined __i386__ || defined i386 || defined _M_IX86 || defined __386__ || defined __x86_64__ || defined _M_X64
+#  define POSH_CPU_X86 1
+#  if defined __x86_64__ || defined _M_X64
+#     define POSH_CPU_X86_64 1 
+#  endif
+#  if defined POSH_CPU_X86_64
+#     define POSH_CPU_STRING "AMD x86-64"
+#  else
+#     define POSH_CPU_STRING "Intel 386+"
+#  endif
+#endif
+
+#if defined __alpha || defined alpha || defined _M_ALPHA || defined __alpha__
+#  define POSH_CPU_AXP 1
+#  define POSH_CPU_STRING "AXP"
+#endif
+
+#if defined __hppa || defined hppa
+#  define POSH_CPU_HPPA 1
+#  define POSH_CPU_STRING "PA-RISC"
+#endif
+
+#if !defined POSH_CPU_STRING
+#  error POSH cannot determine target CPU
+#  define POSH_CPU_STRING "Unknown" /* this is here for Doxygen's benefit */
+#endif
+
+/*
+** -----------------------------------------------------------------------------
+** Attempt to autodetect building for embedded on Sony PS2
+** -----------------------------------------------------------------------------
+*/
+#if !defined POSH_OS_STRING
+#  if !defined FORCE_DOXYGEN
+#    define POSH_OS_EMBEDDED 1 
+#  endif
+#  if defined _R5900
+#     define POSH_OS_STRING "Sony PS2(embedded)"
+#  else
+#     define POSH_OS_STRING "Embedded/Unknown"
+#  endif
+#endif
+
+/*
+** ---------------------------------------------------------------------------
+** Handle cdecl, stdcall, fastcall, etc.
+** ---------------------------------------------------------------------------
+*/
+#if defined POSH_CPU_X86 && !defined POSH_CPU_X86_64
+#  if defined __GNUC__
+#     define POSH_CDECL __attribute__((cdecl))
+#     define POSH_STDCALL __attribute__((stdcall))
+#     define POSH_FASTCALL __attribute__((fastcall))
+#  elif ( defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__ || defined __MWERKS__ )
+#     define POSH_CDECL    __cdecl
+#     define POSH_STDCALL  __stdcall
+#     define POSH_FASTCALL __fastcall
+#  endif
+#else
+#  define POSH_CDECL    
+#  define POSH_STDCALL  
+#  define POSH_FASTCALL 
+#endif
+
+/*
+** ---------------------------------------------------------------------------
+** Define POSH_IMPORTEXPORT signature based on POSH_DLL and POSH_BUILDING_LIB
+** ---------------------------------------------------------------------------
+*/
+
+/*
+** We undefine this so that multiple inclusions will work
+*/
+#if defined POSH_IMPORTEXPORT
+#  undef POSH_IMPORTEXPORT
+#endif
+
+#if defined POSH_DLL
+#   if defined POSH_OS_WIN32
+#      if defined _MSC_VER 
+#         if ( _MSC_VER >= 800 )
+#            if defined POSH_BUILDING_LIB
+#               define POSH_IMPORTEXPORT __declspec( dllexport )
+#            else
+#               define POSH_IMPORTEXPORT __declspec( dllimport )
+#            endif
+#         else
+#            if defined POSH_BUILDING_LIB
+#               define POSH_IMPORTEXPORT __export
+#            else
+#               define POSH_IMPORTEXPORT 
+#            endif
+#         endif
+#      endif  /* defined _MSC_VER */
+#      if defined __BORLANDC__
+#         if ( __BORLANDC__ >= 0x500 )
+#            if defined POSH_BUILDING_LIB 
+#               define POSH_IMPORTEXPORT __declspec( dllexport )
+#            else
+#               define POSH_IMPORTEXPORT __declspec( dllimport )
+#            endif
+#         else
+#            if defined POSH_BUILDING_LIB
+#               define POSH_IMPORTEXPORT __export
+#            else
+#               define POSH_IMPORTEXPORT 
+#            endif
+#         endif
+#      endif /* defined __BORLANDC__ */
+       /* for all other compilers, we're just making a blanket assumption */
+#      if defined __GNUC__ || defined __WATCOMC__ || defined __MWERKS__
+#         if defined POSH_BUILDING_LIB
+#            define POSH_IMPORTEXPORT __declspec( dllexport )
+#         else
+#            define POSH_IMPORTEXPORT __declspec( dllimport )
+#         endif
+#      endif /* all other compilers */
+#      if !defined POSH_IMPORTEXPORT
+#         error Building DLLs not supported on this compiler (poshlib@poshlib.org if you know how)
+#      endif
+#   endif /* defined POSH_OS_WIN32 */
+#endif
+
+/* On pretty much everything else, we can thankfully just ignore this */
+#if !defined POSH_IMPORTEXPORT
+#  define POSH_IMPORTEXPORT
+#endif
+
+#if defined FORCE_DOXYGEN
+#  define POSH_DLL    
+#  define POSH_BUILDING_LIB
+#  undef POSH_DLL
+#  undef POSH_BUILDING_LIB
+#endif
+
+/*
+** ----------------------------------------------------------------------------
+** (Re)define POSH_PUBLIC_API export signature 
+** ----------------------------------------------------------------------------
+*/
+#ifdef POSH_PUBLIC_API
+#  undef POSH_PUBLIC_API
+#endif
+
+#if ( ( defined _MSC_VER ) && ( _MSC_VER < 800 ) ) || ( defined __BORLANDC__ && ( __BORLANDC__ < 0x500 ) )
+#  define POSH_PUBLIC_API(rtype) extern rtype POSH_IMPORTEXPORT 
+#else
+#  define POSH_PUBLIC_API(rtype) extern POSH_IMPORTEXPORT rtype
+#endif
+
+/*
+** ----------------------------------------------------------------------------
+** Try to infer endianess.  Basically we just go through the CPUs we know are
+** little endian, and assume anything that isn't one of those is big endian.
+** As a sanity check, we also do this with operating systems we know are
+** little endian, such as Windows.  Some processors are bi-endian, such as 
+** the MIPS series, so we have to be careful about those.
+** ----------------------------------------------------------------------------
+*/
+#if defined POSH_CPU_X86 || defined POSH_CPU_AXP || defined POSH_CPU_STRONGARM || defined POSH_OS_WIN32 || defined POSH_OS_WINCE || defined __MIPSEL__
+#  define POSH_ENDIAN_STRING "little"
+#  define POSH_LITTLE_ENDIAN 1
+#else
+#  define POSH_ENDIAN_STRING "big"
+#  define POSH_BIG_ENDIAN 1
+#endif
+
+#if defined FORCE_DOXYGEN
+#  define POSH_LITTLE_ENDIAN
+#endif
+
+/*
+** ----------------------------------------------------------------------------
+** Cross-platform compile time assertion macro
+** ----------------------------------------------------------------------------
+*/
+#define POSH_COMPILE_TIME_ASSERT(name, x) typedef int _POSH_dummy_ ## name[(x) ? 1 : -1 ]
+
+/*
+** ----------------------------------------------------------------------------
+** 64-bit Integer
+**
+** We don't require 64-bit support, nor do we emulate its functionality, we
+** simply export it if it's available.  Since we can't count on <limits.h>
+** for 64-bit support, we ignore the POSH_USE_LIMITS_H directive.
+** ----------------------------------------------------------------------------
+*/
+#if defined ( __LP64__ ) || defined ( __powerpc64__ ) || defined POSH_CPU_SPARC64
+#  define POSH_64BIT_INTEGER 1
+typedef long posh_i64_t; 
+typedef unsigned long posh_u64_t;
+#  define POSH_I64( x ) ((posh_i64_t)x)
+#  define POSH_U64( x ) ((posh_u64_t)x)
+#  define POSH_I64_PRINTF_PREFIX "l"
+#elif defined _MSC_VER || defined __BORLANDC__ || defined __WATCOMC__ || ( defined __alpha && defined __DECC )
+#  define POSH_64BIT_INTEGER 1
+typedef __int64 posh_i64_t;
+typedef unsigned __int64 posh_u64_t;
+#  define POSH_I64( x ) ((posh_i64_t)x)
+#  define POSH_U64( x ) ((posh_u64_t)x)
+#  define POSH_I64_PRINTF_PREFIX "I64"
+#elif defined __GNUC__ || defined __MWERKS__ || defined __SUNPRO_C || defined __SUNPRO_CC || defined __APPLE_CC__ || defined POSH_OS_IRIX || defined _LONG_LONG || defined _CRAYC
+#  define POSH_64BIT_INTEGER 1
+typedef long long posh_i64_t;
+typedef unsigned long long posh_u64_t;
+#  define POSH_U64( x ) ((posh_u64_t)(x##LL))
+#  define POSH_I64( x ) ((posh_i64_t)(x##LL))
+#  define POSH_I64_PRINTF_PREFIX "ll"
+#endif
+
+/* hack */
+/*#ifdef __MINGW32__
+#undef POSH_I64
+#undef POSH_U64
+#undef POSH_I64_PRINTF_PREFIX
+#define POSH_I64( x ) ((posh_i64_t)x)
+#define POSH_U64( x ) ((posh_u64_t)x)
+#define POSH_I64_PRINTF_PREFIX "I64"
+#endif*/
+
+#ifdef FORCE_DOXYGEN
+typedef long long posh_i64_t;
+typedef unsigned long posh_u64_t;
+#  define POSH_64BIT_INTEGER
+#  define POSH_I64_PRINTF_PREFIX
+#  define POSH_I64(x)
+#  define POSH_U64(x)
+#endif
+
+/** Minimum value for a 64-bit signed integer */
+#define POSH_I64_MIN  POSH_I64(0x8000000000000000)
+/** Maximum value for a 64-bit signed integer */
+#define POSH_I64_MAX  POSH_I64(0x7FFFFFFFFFFFFFFF)
+/** Minimum value for a 64-bit unsigned integer */
+#define POSH_U64_MIN  POSH_U64(0)
+/** Maximum value for a 64-bit unsigned integer */
+#define POSH_U64_MAX  POSH_U64(0xFFFFFFFFFFFFFFFF)
+
+/* ----------------------------------------------------------------------------
+** Basic Sized Types
+**
+** These types are expected to be EXACTLY sized so you can use them for
+** serialization.
+** ----------------------------------------------------------------------------
+*/
+#define POSH_FALSE 0 
+#define POSH_TRUE  1 
+
+typedef int            posh_bool_t;
+typedef unsigned char  posh_byte_t;
+
+/* NOTE: These assume that CHAR_BIT is 8!! */
+typedef unsigned char  posh_u8_t;
+typedef signed char    posh_i8_t;
+
+#if defined POSH_USE_LIMITS_H
+#  if CHAR_BITS > 8
+#    error This machine uses 9-bit characters.  This is a warning, you can comment this out now.
+#  endif /* CHAR_BITS > 8 */
+
+/* 16-bit */
+#  if ( USHRT_MAX == 65535 ) 
+   typedef unsigned short posh_u16_t;
+   typedef short          posh_i16_t;
+#  else
+   /* Yes, in theory there could still be a 16-bit character type and shorts are
+      32-bits in size...if you find such an architecture, let me know =P */
+#    error No 16-bit type found
+#  endif
+
+/* 32-bit */
+#  if ( INT_MAX == 2147483647 )
+  typedef unsigned       posh_u32_t;
+  typedef int            posh_i32_t;
+#  elif ( LONG_MAX == 2147483647 )
+  typedef unsigned long  posh_u32_t;
+  typedef long           posh_i32_t;
+#  else
+      error No 32-bit type found
+#  endif
+
+#else /* POSH_USE_LIMITS_H */
+
+  typedef unsigned short posh_u16_t;
+  typedef short          posh_i16_t;
+
+#  if !defined POSH_OS_PALM
+  typedef unsigned       posh_u32_t;
+  typedef int            posh_i32_t;
+#  else
+  typedef unsigned long  posh_u32_t;
+  typedef long           posh_i32_t;
+#  endif
+#endif
+
+/** Minimum value for a byte */
+#define POSH_BYTE_MIN    0
+/** Maximum value for an 8-bit unsigned value */
+#define POSH_BYTE_MAX    255
+/** Minimum value for a byte */
+#define POSH_I16_MIN     ( ( posh_i16_t ) 0x8000 )
+/** Maximum value for a 16-bit signed value */
+#define POSH_I16_MAX     ( ( posh_i16_t ) 0x7FFF ) 
+/** Minimum value for a 16-bit unsigned value */
+#define POSH_U16_MIN     0
+/** Maximum value for a 16-bit unsigned value */
+#define POSH_U16_MAX     ( ( posh_u16_t ) 0xFFFF )
+/** Minimum value for a 32-bit signed value */
+#define POSH_I32_MIN     ( ( posh_i32_t ) 0x80000000 )
+/** Maximum value for a 32-bit signed value */
+#define POSH_I32_MAX     ( ( posh_i32_t ) 0x7FFFFFFF )
+/** Minimum value for a 32-bit unsigned value */
+#define POSH_U32_MIN     0
+/** Maximum value for a 32-bit unsigned value */
+#define POSH_U32_MAX     ( ( posh_u32_t ) 0xFFFFFFFF )
+
+/*
+** ----------------------------------------------------------------------------
+** Sanity checks on expected sizes
+** ----------------------------------------------------------------------------
+*/
+#if !defined FORCE_DOXYGEN
+
+POSH_COMPILE_TIME_ASSERT(posh_byte_t, sizeof(posh_byte_t) == 1);
+POSH_COMPILE_TIME_ASSERT(posh_u8_t, sizeof(posh_u8_t) == 1);
+POSH_COMPILE_TIME_ASSERT(posh_i8_t, sizeof(posh_i8_t) == 1);
+POSH_COMPILE_TIME_ASSERT(posh_u16_t, sizeof(posh_u16_t) == 2);
+POSH_COMPILE_TIME_ASSERT(posh_i16_t, sizeof(posh_i16_t) == 2);
+POSH_COMPILE_TIME_ASSERT(posh_u32_t, sizeof(posh_u32_t) == 4);
+POSH_COMPILE_TIME_ASSERT(posh_i32_t, sizeof(posh_i32_t) == 4);
+
+#if !defined POSH_NO_FLOAT
+   POSH_COMPILE_TIME_ASSERT(posh_testfloat_t, sizeof(float)==4 );
+   POSH_COMPILE_TIME_ASSERT(posh_testdouble_t, sizeof(double)==8);
+#endif
+
+#if defined POSH_64BIT_INTEGER
+   POSH_COMPILE_TIME_ASSERT(posh_u64_t, sizeof(posh_u64_t) == 8);
+   POSH_COMPILE_TIME_ASSERT(posh_i64_t, sizeof(posh_i64_t) == 8);
+#endif
+
+#endif
+
+/*
+** ----------------------------------------------------------------------------
+** 64-bit pointer support
+** ----------------------------------------------------------------------------
+*/
+#if defined POSH_CPU_AXP && ( defined POSH_OS_TRU64 || defined POSH_OS_LINUX )
+#  define POSH_64BIT_POINTER 1
+#endif
+
+#if defined POSH_CPU_X86_64 && defined POSH_OS_LINUX
+#  define POSH_64BIT_POINTER 1
+#endif
+
+#if defined POSH_CPU_SPARC64 || defined POSH_OS_WIN64 || defined __64BIT__ || defined __LP64 || defined _LP64 || defined __LP64__ || defined _ADDR64 || defined _CRAYC
+#   define POSH_64BIT_POINTER 1
+#endif
+
+#if defined POSH_64BIT_POINTER
+   POSH_COMPILE_TIME_ASSERT( posh_64bit_pointer, sizeof( void * ) == 8 );
+#elif !defined FORCE_DOXYGEN
+/* if this assertion is hit then you're on a system that either has 64-bit
+   addressing and we didn't catch it, or you're on a system with 16-bit
+   pointers.  In the latter case, POSH doesn't actually care, we're just
+   triggering this assertion to make sure you're aware of the situation,
+   so feel free to delete it.
+
+   If this assertion is triggered on a known 32 or 64-bit platform, 
+   please let us know (poshlib@poshlib.org) */
+   POSH_COMPILE_TIME_ASSERT( posh_32bit_pointer, sizeof( void * ) == 4 );
+#endif
+
+#if defined FORCE_DOXYGEN
+#  define POSH_64BIT_POINTER
+#endif
+
+/*
+** ----------------------------------------------------------------------------
+** POSH Utility Functions
+**
+** These are optional POSH utility functions that are not required if you don't
+** need anything except static checking of your host and target environment.
+** 
+** These functions are NOT wrapped with POSH_PUBLIC_API because I didn't want
+** to enforce their export if your own library is only using them internally.
+** ----------------------------------------------------------------------------
+*/
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+const char *POSH_GetArchString( void );
+
+#if !defined POSH_NO_FLOAT
+
+posh_u32_t  POSH_LittleFloatBits( float f );
+posh_u32_t  POSH_BigFloatBits( float f );
+float       POSH_FloatFromLittleBits( posh_u32_t bits );
+float       POSH_FloatFromBigBits( posh_u32_t bits );
+
+void        POSH_DoubleBits( double d, posh_byte_t dst[ 8 ] );
+double      POSH_DoubleFromBits( const posh_byte_t src[ 8 ] );
+
+/* unimplemented
+float      *POSH_WriteFloatToLittle( void *dst, float f );
+float      *POSH_WriteFloatToBig( void *dst, float f );
+float       POSH_ReadFloatFromLittle( const void *src );
+float       POSH_ReadFloatFromBig( const void *src );
+
+double     *POSH_WriteDoubleToLittle( void *dst, double d );
+double     *POSH_WriteDoubleToBig( void *dst, double d );
+double      POSH_ReadDoubleFromLittle( const void *src );
+double      POSH_ReadDoubleFromBig( const void *src );
+*/
+#endif /* !defined POSH_NO_FLOAT */
+
+#if defined FORCE_DOXYGEN
+#  define POSH_NO_FLOAT
+#  undef  POSH_NO_FLOAT
+#endif
+
+extern posh_u16_t  POSH_SwapU16( posh_u16_t u );
+extern posh_i16_t  POSH_SwapI16( posh_i16_t u );
+extern posh_u32_t  POSH_SwapU32( posh_u32_t u );
+extern posh_i32_t  POSH_SwapI32( posh_i32_t u );
+
+#if defined POSH_64BIT_INTEGER
+
+extern posh_u64_t  POSH_SwapU64( posh_u64_t u );
+extern posh_i64_t  POSH_SwapI64( posh_i64_t u );
+
+#endif /*POSH_64BIT_INTEGER */
+
+extern posh_u16_t *POSH_WriteU16ToLittle( void *dst, posh_u16_t value );
+extern posh_i16_t *POSH_WriteI16ToLittle( void *dst, posh_i16_t value );
+extern posh_u32_t *POSH_WriteU32ToLittle( void *dst, posh_u32_t value );
+extern posh_i32_t *POSH_WriteI32ToLittle( void *dst, posh_i32_t value );
+
+extern posh_u16_t *POSH_WriteU16ToBig( void *dst, posh_u16_t value );
+extern posh_i16_t *POSH_WriteI16ToBig( void *dst, posh_i16_t value );
+extern posh_u32_t *POSH_WriteU32ToBig( void *dst, posh_u32_t value );
+extern posh_i32_t *POSH_WriteI32ToBig( void *dst, posh_i32_t value );
+
+extern posh_u16_t  POSH_ReadU16FromLittle( const void *src );
+extern posh_i16_t  POSH_ReadI16FromLittle( const void *src );
+extern posh_u32_t  POSH_ReadU32FromLittle( const void *src );
+extern posh_i32_t  POSH_ReadI32FromLittle( const void *src );
+
+extern posh_u16_t  POSH_ReadU16FromBig( const void *src );
+extern posh_i16_t  POSH_ReadI16FromBig( const void *src );
+extern posh_u32_t  POSH_ReadU32FromBig( const void *src );
+extern posh_i32_t  POSH_ReadI32FromBig( const void *src );
+
+#if defined POSH_64BIT_INTEGER
+extern posh_u64_t *POSH_WriteU64ToLittle( void *dst, posh_u64_t value );
+extern posh_i64_t *POSH_WriteI64ToLittle( void *dst, posh_i64_t value );
+extern posh_u64_t *POSH_WriteU64ToBig( void *dst, posh_u64_t value );
+extern posh_i64_t *POSH_WriteI64ToBig( void *dst, posh_i64_t value );
+
+extern posh_u64_t  POSH_ReadU64FromLittle( const void *src );
+extern posh_i64_t  POSH_ReadI64FromLittle( const void *src );
+extern posh_u64_t  POSH_ReadU64FromBig( const void *src );
+extern posh_i64_t  POSH_ReadI64FromBig( const void *src );
+#endif /* POSH_64BIT_INTEGER */
+
+#if defined POSH_LITTLE_ENDIAN
+
+#  define POSH_LittleU16(x) (x)
+#  define POSH_LittleU32(x) (x)
+#  define POSH_LittleI16(x) (x)
+#  define POSH_LittleI32(x) (x)
+#  if defined POSH_64BIT_INTEGER
+#    define POSH_LittleU64(x) (x)
+#    define POSH_LittleI64(x) (x)
+#  endif /* defined POSH_64BIT_INTEGER */
+
+#  define POSH_BigU16(x) POSH_SwapU16(x)
+#  define POSH_BigU32(x) POSH_SwapU32(x)
+#  define POSH_BigI16(x) POSH_SwapI16(x)
+#  define POSH_BigI32(x) POSH_SwapI32(x)
+#  if defined POSH_64BIT_INTEGER
+#    define POSH_BigU64(x) POSH_SwapU64(x)
+#    define POSH_BigI64(x) POSH_SwapI64(x)
+#  endif /* defined POSH_64BIT_INTEGER */
+
+#else
+
+#  define POSH_BigU16(x) (x)
+#  define POSH_BigU32(x) (x)
+#  define POSH_BigI16(x) (x)
+#  define POSH_BigI32(x) (x)
+
+#  if defined POSH_64BIT_INTEGER
+#    define POSH_BigU64(x) (x)
+#    define POSH_BigI64(x) (x)
+#  endif /* POSH_64BIT_INTEGER */
+
+#  define POSH_LittleU16(x) POSH_SwapU16(x)
+#  define POSH_LittleU32(x) POSH_SwapU32(x)
+#  define POSH_LittleI16(x) POSH_SwapI16(x)
+#  define POSH_LittleI32(x) POSH_SwapI32(x)
+
+#  if defined POSH_64BIT_INTEGER
+#    define POSH_LittleU64(x) POSH_SwapU64(x)
+#    define POSH_LittleI64(x) POSH_SwapI64(x)
+#  endif /* POSH_64BIT_INTEGER */
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+
diff --git a/thirdparty/thekla_atlas/thekla/thekla_atlas.cpp b/thirdparty/thekla_atlas/thekla/thekla_atlas.cpp
new file mode 100644
index 0000000000..d6f0accf54
--- /dev/null
+++ b/thirdparty/thekla_atlas/thekla/thekla_atlas.cpp
@@ -0,0 +1,271 @@
+
+#include "thekla_atlas.h"
+
+#include <cfloat>
+
+#include "nvmesh/halfedge/Edge.h"
+#include "nvmesh/halfedge/Mesh.h"
+#include "nvmesh/halfedge/Face.h"
+#include "nvmesh/halfedge/Vertex.h"
+#include "nvmesh/param/Atlas.h"
+
+#include "nvmath/Vector.inl"
+#include "nvmath/ftoi.h"
+
+#include "nvcore/Array.inl"
+
+
+using namespace Thekla;
+using namespace nv;
+
+
+inline Atlas_Output_Mesh * set_error(Atlas_Error * error, Atlas_Error code) {
+    if (error) *error = code;
+    return NULL;
+}
+
+
+
+static void input_to_mesh(const Atlas_Input_Mesh * input, HalfEdge::Mesh * mesh, Atlas_Error * error) {
+
+    Array<uint> canonicalMap;
+    canonicalMap.reserve(input->vertex_count);
+
+    for (int i = 0; i < input->vertex_count; i++) {
+        const Atlas_Input_Vertex & input_vertex = input->vertex_array[i];
+        const float * pos = input_vertex.position;
+        const float * nor = input_vertex.normal;
+        const float * tex = input_vertex.uv;
+
+        HalfEdge::Vertex * vertex = mesh->addVertex(Vector3(pos[0], pos[1], pos[2]));
+        vertex->nor.set(nor[0], nor[1], nor[2]);
+        vertex->tex.set(tex[0], tex[1]);
+
+        canonicalMap.append(input_vertex.first_colocal);
+    }
+
+    mesh->linkColocalsWithCanonicalMap(canonicalMap);
+
+
+    const int face_count = input->face_count;
+
+    int non_manifold_faces = 0;
+    for (int i = 0; i < face_count; i++) {
+        const Atlas_Input_Face & input_face = input->face_array[i];
+
+        int v0 = input_face.vertex_index[0];
+        int v1 = input_face.vertex_index[1];
+        int v2 = input_face.vertex_index[2];
+
+        HalfEdge::Face * face = mesh->addFace(v0, v1, v2);
+        if (face != NULL) {
+            face->material = input_face.material_index;
+        }
+        else {
+            non_manifold_faces++;
+        }
+    }
+
+    mesh->linkBoundary();
+
+    if (non_manifold_faces != 0 && error != NULL) {
+        *error = Atlas_Error_Invalid_Mesh_Non_Manifold;
+    }
+}
+
+static Atlas_Output_Mesh * mesh_atlas_to_output(const HalfEdge::Mesh * mesh, const Atlas & atlas, Atlas_Error * error) {
+
+    Atlas_Output_Mesh * output = new Atlas_Output_Mesh;
+
+    const MeshCharts * charts = atlas.meshAt(0);
+
+    // Allocate vertices.
+    const int vertex_count = charts->vertexCount();
+    output->vertex_count = vertex_count;
+    output->vertex_array = new Atlas_Output_Vertex[vertex_count];
+
+    int w = 0;
+    int h = 0;
+
+    // Output vertices.
+    const int chart_count = charts->chartCount();
+    for (int i = 0; i < chart_count; i++) {
+        const Chart * chart = charts->chartAt(i);
+        uint vertexOffset = charts->vertexCountBeforeChartAt(i);
+
+        const uint chart_vertex_count = chart->vertexCount();
+        for (uint v = 0; v < chart_vertex_count; v++) {
+            Atlas_Output_Vertex & output_vertex = output->vertex_array[vertexOffset + v]; 
+
+            uint original_vertex = chart->mapChartVertexToOriginalVertex(v);
+            output_vertex.xref = original_vertex;
+
+            Vector2 uv = chart->chartMesh()->vertexAt(v)->tex;
+            output_vertex.uv[0] = uv.x;
+            output_vertex.uv[1] = uv.y;
+            w = max(w, ftoi_ceil(uv.x));
+            h = max(h, ftoi_ceil(uv.y));
+        }
+    }
+
+    const int face_count = mesh->faceCount();
+    output->index_count = face_count * 3;
+    output->index_array = new int[face_count * 3];
+
+    // Set face indices.
+    for (int f = 0; f < face_count; f++) {
+        uint c = charts->faceChartAt(f);
+        uint i = charts->faceIndexWithinChartAt(f);
+        uint vertexOffset = charts->vertexCountBeforeChartAt(c);
+
+        const Chart * chart = charts->chartAt(c);
+        nvDebugCheck(chart->faceAt(i) == f);
+
+        const HalfEdge::Face * face = chart->chartMesh()->faceAt(i);
+        const HalfEdge::Edge * edge = face->edge;
+
+        output->index_array[3*f+0] = vertexOffset + edge->vertex->id;
+        output->index_array[3*f+1] = vertexOffset + edge->next->vertex->id;
+        output->index_array[3*f+2] = vertexOffset + edge->next->next->vertex->id;
+    }
+
+    *error = Atlas_Error_Success;
+    output->atlas_width = w;
+    output->atlas_height = h;
+
+    return output;
+}
+
+
+void Thekla::atlas_set_default_options(Atlas_Options * options) {
+    if (options != NULL) {
+        // These are the default values we use on The Witness.
+
+        options->charter = Atlas_Charter_Default;
+        options->charter_options.witness.proxy_fit_metric_weight = 2.0f;
+        options->charter_options.witness.roundness_metric_weight = 0.01f;
+        options->charter_options.witness.straightness_metric_weight = 6.0f;
+        options->charter_options.witness.normal_seam_metric_weight = 4.0f;
+        options->charter_options.witness.texture_seam_metric_weight = 0.5f;
+        options->charter_options.witness.max_chart_area = FLT_MAX;
+        options->charter_options.witness.max_boundary_length = FLT_MAX;
+
+        options->mapper = Atlas_Mapper_Default;
+
+        options->packer = Atlas_Packer_Default;
+        options->packer_options.witness.packing_quality = 0;
+        options->packer_options.witness.texel_area = 8;
+        options->packer_options.witness.block_align = true;
+        options->packer_options.witness.conservative = false;
+    }
+}
+
+
+Atlas_Output_Mesh * Thekla::atlas_generate(const Atlas_Input_Mesh * input, const Atlas_Options * options, Atlas_Error * error) {
+    // Validate args.
+    if (input == NULL || options == NULL || error == NULL) return set_error(error, Atlas_Error_Invalid_Args);
+
+    // Validate options.
+    if (options->charter != Atlas_Charter_Witness) {
+        return set_error(error, Atlas_Error_Invalid_Options);
+    }
+    if (options->charter == Atlas_Charter_Witness) {
+        // @@ Validate input options!
+    }
+
+    if (options->mapper != Atlas_Mapper_LSCM) {
+        return set_error(error, Atlas_Error_Invalid_Options);
+    }
+    if (options->mapper == Atlas_Mapper_LSCM) {
+        // No options.
+    }
+
+    if (options->packer != Atlas_Packer_Witness) {
+        return set_error(error, Atlas_Error_Invalid_Options);
+    }
+    if (options->packer == Atlas_Packer_Witness) {
+        // @@ Validate input options!
+    }
+
+    // Validate input mesh.
+    for (int i = 0; i < input->face_count; i++) {
+        int v0 = input->face_array[i].vertex_index[0];
+        int v1 = input->face_array[i].vertex_index[1];
+        int v2 = input->face_array[i].vertex_index[2];
+
+        if (v0 < 0 || v0 >= input->vertex_count || 
+            v1 < 0 || v1 >= input->vertex_count || 
+            v2 < 0 || v2 >= input->vertex_count)
+        {
+            return set_error(error, Atlas_Error_Invalid_Mesh);
+        }
+    }
+
+
+    // Build half edge mesh.
+    AutoPtr<HalfEdge::Mesh> mesh(new HalfEdge::Mesh);
+
+    input_to_mesh(input, mesh.ptr(), error);
+
+    if (*error == Atlas_Error_Invalid_Mesh) {
+        return NULL;
+    }
+
+    Atlas atlas;
+
+    // Charter.
+    if (options->charter == Atlas_Charter_Extract) {
+        return set_error(error, Atlas_Error_Not_Implemented);
+    }
+    else if (options->charter == Atlas_Charter_Witness) {
+        SegmentationSettings segmentation_settings;
+        segmentation_settings.proxyFitMetricWeight = options->charter_options.witness.proxy_fit_metric_weight;
+        segmentation_settings.roundnessMetricWeight = options->charter_options.witness.roundness_metric_weight;
+        segmentation_settings.straightnessMetricWeight = options->charter_options.witness.straightness_metric_weight;
+        segmentation_settings.normalSeamMetricWeight = options->charter_options.witness.normal_seam_metric_weight;
+        segmentation_settings.textureSeamMetricWeight = options->charter_options.witness.texture_seam_metric_weight;
+        segmentation_settings.maxChartArea = options->charter_options.witness.max_chart_area;
+        segmentation_settings.maxBoundaryLength = options->charter_options.witness.max_boundary_length;
+
+        Array<uint> uncharted_materials;
+        atlas.computeCharts(mesh.ptr(), segmentation_settings, uncharted_materials);
+    }
+    
+    if (atlas.hasFailed())
+        return NULL;
+
+    // Mapper.
+    if (options->mapper == Atlas_Mapper_LSCM) {
+        atlas.parameterizeCharts();
+    }
+
+    if (atlas.hasFailed())
+        return NULL;
+
+    // Packer.
+    if (options->packer == Atlas_Packer_Witness) {
+        int packing_quality = options->packer_options.witness.packing_quality;
+        float texel_area = options->packer_options.witness.texel_area;
+        int block_align = options->packer_options.witness.block_align;
+        int conservative = options->packer_options.witness.conservative;
+
+        /*float utilization =*/ atlas.packCharts(packing_quality, texel_area, block_align, conservative);
+    }
+    
+    if (atlas.hasFailed())
+        return NULL;
+
+
+    // Build output mesh.
+    return mesh_atlas_to_output(mesh.ptr(), atlas, error);
+}
+
+
+void Thekla::atlas_free(Atlas_Output_Mesh * output) {
+    if (output != NULL) {
+        delete [] output->vertex_array;
+        delete [] output->index_array;
+        delete output;
+    }
+}
+
diff --git a/thirdparty/thekla_atlas/thekla/thekla_atlas.h b/thirdparty/thekla_atlas/thekla/thekla_atlas.h
new file mode 100644
index 0000000000..1d0716e781
--- /dev/null
+++ b/thirdparty/thekla_atlas/thekla/thekla_atlas.h
@@ -0,0 +1,116 @@
+
+// Thekla Atlas Generator
+
+namespace Thekla {
+
+enum Atlas_Charter {
+    Atlas_Charter_Witness,  // Options: threshold
+    Atlas_Charter_Extract,  // Options: ---
+    Atlas_Charter_Default = Atlas_Charter_Witness
+};
+
+enum Atlas_Mapper {
+    Atlas_Mapper_LSCM,      // Options: ---
+    Atlas_Mapper_Default = Atlas_Mapper_LSCM
+};
+
+enum Atlas_Packer {
+    Atlas_Packer_Witness,   // Options: texel_area
+    Atlas_Packer_Default = Atlas_Packer_Witness
+};
+
+struct Atlas_Options {
+    Atlas_Charter charter;
+    union {
+        struct {
+            float proxy_fit_metric_weight;
+            float roundness_metric_weight;
+            float straightness_metric_weight;
+            float normal_seam_metric_weight;
+            float texture_seam_metric_weight;
+            float max_chart_area;
+            float max_boundary_length;
+        } witness;
+        struct {
+        } extract;
+    } charter_options;
+
+    Atlas_Mapper mapper;
+    union {
+    } mapper_options;
+
+    Atlas_Packer packer;
+    union {
+        struct {
+            int packing_quality;
+            float texel_area;       // This is not really texel area, but 1 / texel width?
+            bool block_align;       // Align charts to 4x4 blocks. 
+            bool conservative;      // Pack charts with extra padding.
+        } witness;
+    } packer_options;
+};
+
+struct Atlas_Input_Vertex {
+    float position[3];
+    float normal[3];
+    float uv[2];
+    int first_colocal;
+};
+
+struct Atlas_Input_Face {
+    int vertex_index[3];
+    int material_index;
+};
+
+struct Atlas_Input_Mesh {
+    int vertex_count;
+    int face_count;
+    Atlas_Input_Vertex * vertex_array;
+    Atlas_Input_Face * face_array;
+};
+
+struct Atlas_Output_Vertex {
+    float uv[2];
+    int xref;   // Index of input vertex from which this output vertex originated.
+};
+
+struct Atlas_Output_Mesh {
+    int atlas_width;
+    int atlas_height;
+    int vertex_count;
+    int index_count;
+    Atlas_Output_Vertex * vertex_array;
+    int * index_array;
+};
+
+enum Atlas_Error {
+    Atlas_Error_Success,
+    Atlas_Error_Invalid_Args,
+    Atlas_Error_Invalid_Options,
+    Atlas_Error_Invalid_Mesh,
+    Atlas_Error_Invalid_Mesh_Non_Manifold,
+    Atlas_Error_Not_Implemented,
+};
+
+void atlas_set_default_options(Atlas_Options * options);
+
+Atlas_Output_Mesh * atlas_generate(const Atlas_Input_Mesh * input, const Atlas_Options * options, Atlas_Error * error);
+
+void atlas_free(Atlas_Output_Mesh * output);
+
+
+/*
+
+Should we represent the input mesh with an opaque structure that simply holds pointers to the user data? That would allow us to avoid having to copy attributes to an intermediate representation.
+
+struct Atlas_Input_Mesh;
+
+void mesh_set_vertex_position(Atlas_Input_Mesh * mesh, float * ptr, int stride);
+void mesh_set_vertex_normal(Atlas_Input_Mesh * mesh, float * ptr, int stride);
+void mesh_set_vertex_uv(Mesh * mesh, float * ptr, int stride);
+
+void mesh_set_index(Mesh * mesh, int * ptr);
+*/
+
+} // Thekla namespace
+