23 files changed, 3652 insertions, 1822 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 99f917dbc1..3f2fc6d8f9 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -513,7 +513,7 @@ File extracted from upstream release tarball:
 ## xatlas
 
 - Upstream: https://github.com/jpcy/xatlas
-- Version: git (f65a664, 2019)
+- Version: git (b4b5426, 2019)
 - License: MIT
 
 Files extracted from upstream source:
diff --git a/thirdparty/assimp/assimp/config.h b/thirdparty/assimp/assimp/config.h
index 382a698268..d0e4817349 100644
--- a/thirdparty/assimp/assimp/config.h
+++ b/thirdparty/assimp/assimp/config.h
@@ -983,6 +983,13 @@ enum aiComponent {
 #define AI_CONFIG_GLOBAL_SCALE_FACTOR_DEFAULT 1.0f
 #endif // !! AI_DEBONE_THRESHOLD
 
+#define AI_CONFIG_APP_SCALE_KEY "APP_SCALE_FACTOR"
+
+#if (!defined AI_CONFIG_APP_SCALE_KEY)
+#   define AI_CONFIG_APP_SCALE_KEY 1.0
+#endif // AI_CONFIG_APP_SCALE_KEY
+
+
 // ---------- All the Build/Compile-time defines ------------
 
 /** @brief Specifies if double precision is supported inside assimp
diff --git a/thirdparty/assimp/code/Common/BaseImporter.cpp b/thirdparty/assimp/code/Common/BaseImporter.cpp
index 0a5694aa0e..de5018a250 100644
--- a/thirdparty/assimp/code/Common/BaseImporter.cpp
+++ b/thirdparty/assimp/code/Common/BaseImporter.cpp
@@ -76,9 +76,25 @@ BaseImporter::~BaseImporter() {
     // nothing to do here
 }
 
+void BaseImporter::UpdateImporterScale( Importer* pImp )
+{
+    ai_assert(pImp != nullptr);
+    ai_assert(importerScale != 0.0);
+    ai_assert(fileScale != 0.0);
+
+    double activeScale = importerScale * fileScale;
+
+    // Set active scaling
+    pImp->SetPropertyFloat( AI_CONFIG_APP_SCALE_KEY, activeScale);
+
+    ASSIMP_LOG_DEBUG_F("UpdateImporterScale scale set: %f", activeScale );
+}
+
 // ------------------------------------------------------------------------------------------------
 // Imports the given file and returns the imported data.
-aiScene* BaseImporter::ReadFile(const Importer* pImp, const std::string& pFile, IOSystem* pIOHandler) {
+aiScene* BaseImporter::ReadFile(Importer* pImp, const std::string& pFile, IOSystem* pIOHandler) {
+
+
     m_progress = pImp->GetProgressHandler();
     if (nullptr == m_progress) {
         return nullptr;
@@ -100,6 +116,11 @@ aiScene* BaseImporter::ReadFile(const Importer* pImp, const std::string& pFile,
     {
         InternReadFile( pFile, sc.get(), &filter);
 
+        // Calculate import scale hook - required because pImp not available anywhere else
+        // passes scale into ScaleProcess
+        UpdateImporterScale(pImp);
+
+
     } catch( const std::exception& err )    {
         // extract error description
         m_ErrorText = err.what();
@@ -112,7 +133,7 @@ aiScene* BaseImporter::ReadFile(const Importer* pImp, const std::string& pFile,
 }
 
 // ------------------------------------------------------------------------------------------------
-void BaseImporter::SetupProperties(const Importer* /*pImp*/)
+void BaseImporter::SetupProperties(const Importer* pImp)
 {
     // the default implementation does nothing
 }
@@ -588,6 +609,8 @@ aiScene* BatchLoader::GetImport( unsigned int which )
     return nullptr;
 }
 
+
+
 // ------------------------------------------------------------------------------------------------
 void BatchLoader::LoadAll()
 {
diff --git a/thirdparty/assimp/code/FBX/FBXConverter.cpp b/thirdparty/assimp/code/FBX/FBXConverter.cpp
index 9f940d3226..9bd970098e 100644
--- a/thirdparty/assimp/code/FBX/FBXConverter.cpp
+++ b/thirdparty/assimp/code/FBX/FBXConverter.cpp
@@ -66,6 +66,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <vector>
 #include <sstream>
 #include <iomanip>
+#include <cstdint>
 
 
 namespace Assimp {
@@ -90,7 +91,6 @@ namespace Assimp {
         , anim_fps()
         , out(out)
         , doc(doc)
-        , mRemoveEmptyBones( removeEmptyBones )
         , mCurrentUnit(FbxUnit::cm) {
             // animations need to be converted first since this will
             // populate the node_anim_chain_bits map, which is needed
@@ -119,7 +119,6 @@ namespace Assimp {
 
             ConvertGlobalSettings();
             TransferDataToScene();
-            ConvertToUnitScale(unit);
 
             // if we didn't read any meshes set the AI_SCENE_FLAGS_INCOMPLETE
             // to make sure the scene passes assimp's validation. FBX files
@@ -685,30 +684,37 @@ namespace Assimp {
             bool ok;
 
             aiMatrix4x4 chain[TransformationComp_MAXIMUM];
+
+            ai_assert(TransformationComp_MAXIMUM < 32);
+            std::uint32_t chainBits = 0;
+            // A node won't need a node chain if it only has these.
+            const std::uint32_t chainMaskSimple = (1 << TransformationComp_Translation) + (1 << TransformationComp_Scaling) + (1 << TransformationComp_Rotation);
+            // A node will need a node chain if it has any of these.
+            const std::uint32_t chainMaskComplex = ((1 << (TransformationComp_MAXIMUM)) - 1) - chainMaskSimple;
+
             std::fill_n(chain, static_cast<unsigned int>(TransformationComp_MAXIMUM), aiMatrix4x4());
 
             // generate transformation matrices for all the different transformation components
             const float zero_epsilon = 1e-6f;
             const aiVector3D all_ones(1.0f, 1.0f, 1.0f);
-            bool is_complex = false;
 
             const aiVector3D& PreRotation = PropertyGet<aiVector3D>(props, "PreRotation", ok);
             if (ok && PreRotation.SquareLength() > zero_epsilon) {
-                is_complex = true;
+                chainBits = chainBits | (1 << TransformationComp_PreRotation);
 
                 GetRotationMatrix(Model::RotOrder::RotOrder_EulerXYZ, PreRotation, chain[TransformationComp_PreRotation]);
             }
 
             const aiVector3D& PostRotation = PropertyGet<aiVector3D>(props, "PostRotation", ok);
             if (ok && PostRotation.SquareLength() > zero_epsilon) {
-                is_complex = true;
+                chainBits = chainBits | (1 << TransformationComp_PostRotation);
 
                 GetRotationMatrix(Model::RotOrder::RotOrder_EulerXYZ, PostRotation, chain[TransformationComp_PostRotation]);
             }
 
             const aiVector3D& RotationPivot = PropertyGet<aiVector3D>(props, "RotationPivot", ok);
             if (ok && RotationPivot.SquareLength() > zero_epsilon) {
-                is_complex = true;
+                chainBits = chainBits | (1 << TransformationComp_RotationPivot) | (1 << TransformationComp_RotationPivotInverse);
 
                 aiMatrix4x4::Translation(RotationPivot, chain[TransformationComp_RotationPivot]);
                 aiMatrix4x4::Translation(-RotationPivot, chain[TransformationComp_RotationPivotInverse]);
@@ -716,21 +722,21 @@ namespace Assimp {
 
             const aiVector3D& RotationOffset = PropertyGet<aiVector3D>(props, "RotationOffset", ok);
             if (ok && RotationOffset.SquareLength() > zero_epsilon) {
-                is_complex = true;
+                chainBits = chainBits | (1 << TransformationComp_RotationOffset);
 
                 aiMatrix4x4::Translation(RotationOffset, chain[TransformationComp_RotationOffset]);
             }
 
             const aiVector3D& ScalingOffset = PropertyGet<aiVector3D>(props, "ScalingOffset", ok);
             if (ok && ScalingOffset.SquareLength() > zero_epsilon) {
-                is_complex = true;
+                chainBits = chainBits | (1 << TransformationComp_ScalingOffset);
 
                 aiMatrix4x4::Translation(ScalingOffset, chain[TransformationComp_ScalingOffset]);
             }
 
             const aiVector3D& ScalingPivot = PropertyGet<aiVector3D>(props, "ScalingPivot", ok);
             if (ok && ScalingPivot.SquareLength() > zero_epsilon) {
-                is_complex = true;
+                chainBits = chainBits | (1 << TransformationComp_ScalingPivot) | (1 << TransformationComp_ScalingPivotInverse);
 
                 aiMatrix4x4::Translation(ScalingPivot, chain[TransformationComp_ScalingPivot]);
                 aiMatrix4x4::Translation(-ScalingPivot, chain[TransformationComp_ScalingPivotInverse]);
@@ -738,22 +744,28 @@ namespace Assimp {
 
             const aiVector3D& Translation = PropertyGet<aiVector3D>(props, "Lcl Translation", ok);
             if (ok && Translation.SquareLength() > zero_epsilon) {
+                chainBits = chainBits | (1 << TransformationComp_Translation);
+
                 aiMatrix4x4::Translation(Translation, chain[TransformationComp_Translation]);
             }
 
             const aiVector3D& Scaling = PropertyGet<aiVector3D>(props, "Lcl Scaling", ok);
             if (ok && (Scaling - all_ones).SquareLength() > zero_epsilon) {
+                chainBits = chainBits | (1 << TransformationComp_Scaling);
+
                 aiMatrix4x4::Scaling(Scaling, chain[TransformationComp_Scaling]);
             }
 
             const aiVector3D& Rotation = PropertyGet<aiVector3D>(props, "Lcl Rotation", ok);
             if (ok && Rotation.SquareLength() > zero_epsilon) {
+                chainBits = chainBits | (1 << TransformationComp_Rotation);
+
                 GetRotationMatrix(rot, Rotation, chain[TransformationComp_Rotation]);
             }
 
             const aiVector3D& GeometricScaling = PropertyGet<aiVector3D>(props, "GeometricScaling", ok);
             if (ok && (GeometricScaling - all_ones).SquareLength() > zero_epsilon) {
-                is_complex = true;
+                chainBits = chainBits | (1 << TransformationComp_GeometricScaling);
                 aiMatrix4x4::Scaling(GeometricScaling, chain[TransformationComp_GeometricScaling]);
                 aiVector3D GeometricScalingInverse = GeometricScaling;
                 bool canscale = true;
@@ -768,13 +780,14 @@ namespace Assimp {
                     }
                 }
                 if (canscale) {
+                    chainBits = chainBits | (1 << TransformationComp_GeometricScalingInverse);
                     aiMatrix4x4::Scaling(GeometricScalingInverse, chain[TransformationComp_GeometricScalingInverse]);
                 }
             }
 
             const aiVector3D& GeometricRotation = PropertyGet<aiVector3D>(props, "GeometricRotation", ok);
             if (ok && GeometricRotation.SquareLength() > zero_epsilon) {
-                is_complex = true;
+                chainBits = chainBits | (1 << TransformationComp_GeometricRotation) | (1 << TransformationComp_GeometricRotationInverse);
                 GetRotationMatrix(rot, GeometricRotation, chain[TransformationComp_GeometricRotation]);
                 GetRotationMatrix(rot, GeometricRotation, chain[TransformationComp_GeometricRotationInverse]);
                 chain[TransformationComp_GeometricRotationInverse].Inverse();
@@ -782,7 +795,7 @@ namespace Assimp {
 
             const aiVector3D& GeometricTranslation = PropertyGet<aiVector3D>(props, "GeometricTranslation", ok);
             if (ok && GeometricTranslation.SquareLength() > zero_epsilon) {
-                is_complex = true;
+                chainBits = chainBits | (1 << TransformationComp_GeometricTranslation) | (1 << TransformationComp_GeometricTranslationInverse);
                 aiMatrix4x4::Translation(GeometricTranslation, chain[TransformationComp_GeometricTranslation]);
                 aiMatrix4x4::Translation(-GeometricTranslation, chain[TransformationComp_GeometricTranslationInverse]);
             }
@@ -790,12 +803,12 @@ namespace Assimp {
             // is_complex needs to be consistent with NeedsComplexTransformationChain()
             // or the interplay between this code and the animation converter would
             // not be guaranteed.
-            ai_assert(NeedsComplexTransformationChain(model) == is_complex);
+            ai_assert(NeedsComplexTransformationChain(model) == ((chainBits & chainMaskComplex) != 0));
 
             // now, if we have more than just Translation, Scaling and Rotation,
             // we need to generate a full node chain to accommodate for assimp's
             // lack to express pivots and offsets.
-            if (is_complex && doc.Settings().preservePivots) {
+            if ((chainBits & chainMaskComplex) && doc.Settings().preservePivots) {
                 FBXImporter::LogInfo("generating full transformation chain for node: " + name);
 
                 // query the anim_chain_bits dictionary to find out which chain elements
@@ -808,7 +821,7 @@ namespace Assimp {
                 for (size_t i = 0; i < TransformationComp_MAXIMUM; ++i, bit <<= 1) {
                     const TransformationComp comp = static_cast<TransformationComp>(i);
 
-                    if (chain[i].IsIdentity() && (anim_chain_bitmask & bit) == 0) {
+                    if ((chainBits & bit) == 0 && (anim_chain_bitmask & bit) == 0) {
                         continue;
                     }
 
@@ -1462,14 +1475,8 @@ namespace Assimp {
 
                     const WeightIndexArray& indices = cluster->GetIndices();
 
-                    if (indices.empty() && mRemoveEmptyBones ) {
-                        continue;
-                    }
-
                     const MatIndexArray& mats = geo.GetMaterialIndices();
 
-                    bool ok = false;
-
                     const size_t no_index_sentinel = std::numeric_limits<size_t>::max();
 
                     count_out_indices.clear();
@@ -1509,8 +1516,7 @@ namespace Assimp {
                                     out_indices.push_back(std::distance(outputVertStartIndices->begin(), it));
                                 }
 
-                                ++count_out_indices.back();
-                                ok = true;
+                                ++count_out_indices.back();                               
                             }
                         }
                     }
@@ -1518,10 +1524,8 @@ namespace Assimp {
                     // if we found at least one, generate the output bones
                     // XXX this could be heavily simplified by collecting the bone
                     // data in a single step.
-                    if (ok && mRemoveEmptyBones) {
-                        ConvertCluster(bones, model, *cluster, out_indices, index_out_indices,
+                    ConvertCluster(bones, model, *cluster, out_indices, index_out_indices,
                             count_out_indices, node_global_transform);
-                    }
                 }
             }
             catch (std::exception&) {
@@ -3532,46 +3536,6 @@ void FBXConverter::SetShadingPropertiesRaw(aiMaterial* out_mat, const PropertyTa
             out->mMetaData->Set(14, "CustomFrameRate", doc.GlobalSettings().CustomFrameRate());
         }
 
-        void FBXConverter::ConvertToUnitScale( FbxUnit unit ) {
-            if (mCurrentUnit == unit) {
-                return;
-            }
-
-            ai_real scale = 1.0;
-            if (mCurrentUnit == FbxUnit::cm) {
-                if (unit == FbxUnit::m) {
-                    scale = (ai_real)0.01;
-                } else if (unit == FbxUnit::km) {
-                    scale = (ai_real)0.00001;
-                }
-            } else if (mCurrentUnit == FbxUnit::m) {
-                if (unit == FbxUnit::cm) {
-                    scale = (ai_real)100.0;
-                } else if (unit == FbxUnit::km) {
-                    scale = (ai_real)0.001;
-                }
-            } else if (mCurrentUnit == FbxUnit::km) {
-                if (unit == FbxUnit::cm) {
-                    scale = (ai_real)100000.0;
-                } else if (unit == FbxUnit::m) {
-                    scale = (ai_real)1000.0;
-                }
-            }
-            
-            for (auto mesh : meshes) {
-                if (nullptr == mesh) {
-                    continue;
-                }
-
-                if (mesh->HasPositions()) {
-                    for (unsigned int i = 0; i < mesh->mNumVertices; ++i) {
-                        aiVector3D &pos = mesh->mVertices[i];
-                        pos *= scale;
-                    }
-                }
-            }
-        }
-
         void FBXConverter::TransferDataToScene()
         {
             ai_assert(!out->mMeshes);
diff --git a/thirdparty/assimp/code/FBX/FBXConverter.h b/thirdparty/assimp/code/FBX/FBXConverter.h
index 17a7bc56b7..b458627392 100644
--- a/thirdparty/assimp/code/FBX/FBXConverter.h
+++ b/thirdparty/assimp/code/FBX/FBXConverter.h
@@ -431,10 +431,6 @@ private:
     void ConvertGlobalSettings();
 
     // ------------------------------------------------------------------------------------------------
-    //  Will perform the conversion from a given unit to the requested unit.
-    void ConvertToUnitScale(FbxUnit unit);
-
-    // ------------------------------------------------------------------------------------------------
     // copy generated meshes, animations, lights, cameras and textures to the output scene
     void TransferDataToScene();
 
@@ -470,9 +466,6 @@ private:
 
     aiScene* const out;
     const FBX::Document& doc;
-
-    bool mRemoveEmptyBones;
-
     FbxUnit mCurrentUnit;
 };
 
diff --git a/thirdparty/assimp/code/FBX/FBXDocument.cpp b/thirdparty/assimp/code/FBX/FBXDocument.cpp
index 1af08fe6d8..506fd978dd 100644
--- a/thirdparty/assimp/code/FBX/FBXDocument.cpp
+++ b/thirdparty/assimp/code/FBX/FBXDocument.cpp
@@ -90,14 +90,6 @@ const Object* LazyObject::Get(bool dieOnError)
         return object.get();
     }
 
-    // if this is the root object, we return a dummy since there
-    // is no root object int he fbx file - it is just referenced
-    // with id 0.
-    if(id == 0L) {
-        object.reset(new Object(id, element, "Model::RootNode"));
-        return object.get();
-    }
-
     const Token& key = element.KeyToken();
     const TokenList& tokens = element.Tokens();
 
diff --git a/thirdparty/assimp/code/FBX/FBXExporter.cpp b/thirdparty/assimp/code/FBX/FBXExporter.cpp
index 153e676506..8ebc8555a2 100644
--- a/thirdparty/assimp/code/FBX/FBXExporter.cpp
+++ b/thirdparty/assimp/code/FBX/FBXExporter.cpp
@@ -1706,8 +1706,7 @@ void FBXExporter::WriteObjects ()
                     }
                     if (end) { break; }
                 }
-                limbnodes.insert(parent);
-                skeleton.insert(parent);
+                
                 // if it was the skeleton root we can finish here
                 if (end) { break; }
             }
@@ -1848,44 +1847,10 @@ void FBXExporter::WriteObjects ()
             inverse_bone_xform.Inverse();
             aiMatrix4x4 tr = inverse_bone_xform * mesh_xform;
 
-            // this should be the same as the bone's mOffsetMatrix.
-            // if it's not the same, the skeleton isn't in the bind pose.
-            float epsilon = 1e-4f; // some error is to be expected
-            float epsilon_custom = mProperties->GetPropertyFloat("BINDPOSE_EPSILON", -1);
-            if(epsilon_custom > 0)
-                epsilon = epsilon_custom;
-            bool bone_xform_okay = true;
-            if (b && ! tr.Equal(b->mOffsetMatrix, epsilon)) {
-                not_in_bind_pose.insert(b);
-                bone_xform_okay = false;
-            }
+            sdnode.AddChild("Transform", tr);
 
-            // if we have a bone we should use the mOffsetMatrix,
-            // otherwise try to just use the calculated transform.
-            if (b) {
-                sdnode.AddChild("Transform", b->mOffsetMatrix);
-            } else {
-                sdnode.AddChild("Transform", tr);
-            }
-            // note: it doesn't matter if we mix these,
-            // because if they disagree we'll throw an exception later.
-            // it could be that the skeleton is not in the bone pose
-            // but all bones are still defined,
-            // in which case this would use the mOffsetMatrix for everything
-            // and a correct skeleton would still be output.
-
-            // transformlink should be the position of the bone in world space.
-            // if the bone is in the bind pose (or nonexistent),
-            // we can just use the matrix we already calculated
-            if (bone_xform_okay) {
-                sdnode.AddChild("TransformLink", bone_xform);
-            // otherwise we can only work it out using the mesh position.
-            } else {
-                aiMatrix4x4 trl = b->mOffsetMatrix;
-                trl.Inverse();
-                trl *= mesh_xform;
-                sdnode.AddChild("TransformLink", trl);
-            }
+
+            sdnode.AddChild("TransformLink", bone_xform);
             // note: this means we ALWAYS rely on the mesh node transform
             // being unchanged from the time the skeleton was bound.
             // there's not really any way around this at the moment.
diff --git a/thirdparty/assimp/code/FBX/FBXImporter.cpp b/thirdparty/assimp/code/FBX/FBXImporter.cpp
index ec8bbd2b47..bd359dbf29 100644
--- a/thirdparty/assimp/code/FBX/FBXImporter.cpp
+++ b/thirdparty/assimp/code/FBX/FBXImporter.cpp
@@ -189,8 +189,16 @@ void FBXImporter::InternReadFile( const std::string& pFile, aiScene* pScene, IOS
         if (settings.convertToMeters) {
             unit = FbxUnit::m;
         }
+
         // convert the FBX DOM to aiScene
-        ConvertToAssimpScene(pScene,doc, settings.removeEmptyBones, unit);
+        ConvertToAssimpScene(pScene, doc, settings.removeEmptyBones, unit);
+
+        // size relative to cm
+        float size_relative_to_cm = doc.GlobalSettings().UnitScaleFactor();
+
+        // Set FBX file scale is relative to CM must be converted to M for
+        // assimp universal format (M)
+        SetFileScale( size_relative_to_cm * 0.01f);
 
         std::for_each(tokens.begin(),tokens.end(),Util::delete_fun<Token>());
     }
diff --git a/thirdparty/assimp/code/FBX/FBXMeshGeometry.cpp b/thirdparty/assimp/code/FBX/FBXMeshGeometry.cpp
index 44a0264ca0..5c9a0e309d 100644
--- a/thirdparty/assimp/code/FBX/FBXMeshGeometry.cpp
+++ b/thirdparty/assimp/code/FBX/FBXMeshGeometry.cpp
@@ -115,7 +115,6 @@ MeshGeometry::MeshGeometry(uint64_t id, const Element& element, const std::strin
 
     if(tempVerts.empty()) {
         FBXImporter::LogWarn("encountered mesh with no vertices");
-        return;
     }
 
     std::vector<int> tempFaces;
@@ -123,7 +122,6 @@ MeshGeometry::MeshGeometry(uint64_t id, const Element& element, const std::strin
 
     if(tempFaces.empty()) {
         FBXImporter::LogWarn("encountered mesh with no faces");
-        return;
     }
 
     m_vertices.reserve(tempFaces.size());
@@ -612,7 +610,10 @@ void MeshGeometry::ReadVertexDataMaterials(std::vector<int>& materials_out, cons
     const std::string& ReferenceInformationType)
 {
     const size_t face_count = m_faces.size();
-    ai_assert(face_count);
+    if(face_count <= 0)
+    {
+        return;
+    }
 
     // materials are handled separately. First of all, they are assigned per-face
     // and not per polyvert. Secondly, ReferenceInformationType=IndexToDirect
diff --git a/thirdparty/assimp/code/PostProcessing/CalcTangentsProcess.cpp b/thirdparty/assimp/code/PostProcessing/CalcTangentsProcess.cpp
index b30f39c274..a3f7dd2557 100644
--- a/thirdparty/assimp/code/PostProcessing/CalcTangentsProcess.cpp
+++ b/thirdparty/assimp/code/PostProcessing/CalcTangentsProcess.cpp
@@ -212,7 +212,7 @@ bool CalcTangentsProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex)
             // project tangent and bitangent into the plane formed by the vertex' normal
             aiVector3D localTangent = tangent - meshNorm[p] * (tangent * meshNorm[p]);
             aiVector3D localBitangent = bitangent - meshNorm[p] * (bitangent * meshNorm[p]);
-            localTangent.Normalize(); localBitangent.Normalize();
+            localTangent.NormalizeSafe(); localBitangent.NormalizeSafe();
 
             // reconstruct tangent/bitangent according to normal and bitangent/tangent when it's infinite or NaN.
             bool invalid_tangent = is_special_float(localTangent.x) || is_special_float(localTangent.y) || is_special_float(localTangent.z);
@@ -220,10 +220,10 @@ bool CalcTangentsProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshIndex)
             if (invalid_tangent != invalid_bitangent) {
                 if (invalid_tangent) {
                     localTangent = meshNorm[p] ^ localBitangent;
-                    localTangent.Normalize();
+                    localTangent.NormalizeSafe();
                 } else {
                     localBitangent = localTangent ^ meshNorm[p];
-                    localBitangent.Normalize();
+                    localBitangent.NormalizeSafe();
                 }
             }
 
diff --git a/thirdparty/assimp/code/PostProcessing/ScaleProcess.cpp b/thirdparty/assimp/code/PostProcessing/ScaleProcess.cpp
index 6d458c4b11..ac770c41f2 100644
--- a/thirdparty/assimp/code/PostProcessing/ScaleProcess.cpp
+++ b/thirdparty/assimp/code/PostProcessing/ScaleProcess.cpp
@@ -39,19 +39,17 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 ----------------------------------------------------------------------
 */
-#ifndef ASSIMP_BUILD_NO_GLOBALSCALE_PROCESS
-
 #include "ScaleProcess.h"
 
 #include <assimp/scene.h>
 #include <assimp/postprocess.h>
+#include <assimp/BaseImporter.h>
 
 namespace Assimp {
 
 ScaleProcess::ScaleProcess()
 : BaseProcess()
 , mScale( AI_CONFIG_GLOBAL_SCALE_FACTOR_DEFAULT ) {
-    // empty
 }
 
 ScaleProcess::~ScaleProcess() {
@@ -71,10 +69,26 @@ bool ScaleProcess::IsActive( unsigned int pFlags ) const {
 }
 
 void ScaleProcess::SetupProperties( const Importer* pImp ) {
-    mScale = pImp->GetPropertyFloat( AI_CONFIG_GLOBAL_SCALE_FACTOR_KEY, 0 );
+    // User scaling
+    mScale = pImp->GetPropertyFloat( AI_CONFIG_GLOBAL_SCALE_FACTOR_KEY, 1.0f );
+
+    // File scaling * Application Scaling
+    float importerScale = pImp->GetPropertyFloat( AI_CONFIG_APP_SCALE_KEY, 1.0f );
+
+    // apply scale to the scale 
+    // helps prevent bugs with backward compatibility for anyone using normal scaling.
+    mScale *= importerScale;
 }
 
 void ScaleProcess::Execute( aiScene* pScene ) {
+    if(mScale == 1.0f)  {
+        return; // nothing to scale
+    }
+    
+    ai_assert( mScale != 0 );
+    ai_assert( nullptr != pScene );
+    ai_assert( nullptr != pScene->mRootNode );
+
     if ( nullptr == pScene ) {
         return;
     }
@@ -82,22 +96,113 @@ void ScaleProcess::Execute( aiScene* pScene ) {
     if ( nullptr == pScene->mRootNode ) {
         return;
     }
+    
+    // Process animations and update position transform to new unit system
+    for( unsigned int animationID = 0; animationID < pScene->mNumAnimations; animationID++ )
+    {
+        aiAnimation* animation = pScene->mAnimations[animationID];
+
+        for( unsigned int animationChannel = 0; animationChannel < animation->mNumChannels; animationChannel++)
+        {
+            aiNodeAnim* anim = animation->mChannels[animationChannel];
+            
+            for( unsigned int posKey = 0; posKey < anim->mNumPositionKeys; posKey++)
+            {
+                aiVectorKey& vectorKey = anim->mPositionKeys[posKey];
+                vectorKey.mValue *= mScale;
+            }
+        }
+    }
+
+    for( unsigned int meshID = 0; meshID < pScene->mNumMeshes; meshID++)
+    {
+        aiMesh *mesh = pScene->mMeshes[meshID]; 
+        
+        // Reconstruct mesh vertexes to the new unit system
+        for( unsigned int vertexID = 0; vertexID < mesh->mNumVertices; vertexID++)
+        {
+            aiVector3D& vertex = mesh->mVertices[vertexID];
+            vertex *= mScale;
+        }
+
+
+        // bone placement / scaling
+        for( unsigned int boneID = 0; boneID < mesh->mNumBones; boneID++)
+        {
+            // Reconstruct matrix by transform rather than by scale 
+            // This prevent scale values being changed which can
+            // be meaningful in some cases 
+            // like when you want the modeller to see 1:1 compatibility.
+            aiBone* bone = mesh->mBones[boneID];
+
+            aiVector3D pos, scale;
+            aiQuaternion rotation;
+
+            bone->mOffsetMatrix.Decompose( scale, rotation, pos);
+            
+            aiMatrix4x4 translation;
+            aiMatrix4x4::Translation( pos * mScale, translation );
+            
+            aiMatrix4x4 scaling;
+            aiMatrix4x4::Scaling( aiVector3D(scale), scaling );
+
+            aiMatrix4x4 RotMatrix = aiMatrix4x4 (rotation.GetMatrix());
+
+            bone->mOffsetMatrix = translation * RotMatrix * scaling;
+        }
+
+
+        // animation mesh processing
+        // convert by position rather than scale.
+        for( unsigned int animMeshID = 0; animMeshID < mesh->mNumAnimMeshes; animMeshID++)
+        {
+            aiAnimMesh * animMesh = mesh->mAnimMeshes[animMeshID];
+            
+            for( unsigned int vertexID = 0; vertexID < animMesh->mNumVertices; vertexID++)
+            {
+                aiVector3D& vertex = animMesh->mVertices[vertexID];
+                vertex *= mScale;
+            }
+        }
+    }
 
     traverseNodes( pScene->mRootNode );
 }
 
-void ScaleProcess::traverseNodes( aiNode *node ) {
+void ScaleProcess::traverseNodes( aiNode *node, unsigned int nested_node_id ) {    
     applyScaling( node );
+
+    for( size_t i = 0; i < node->mNumChildren; i++)
+    {
+        // recurse into the tree until we are done!
+        traverseNodes( node->mChildren[i], nested_node_id+1 ); 
+    }
 }
 
 void ScaleProcess::applyScaling( aiNode *currentNode ) {
     if ( nullptr != currentNode ) {
-        currentNode->mTransformation.a1 = currentNode->mTransformation.a1 * mScale;
-        currentNode->mTransformation.b2 = currentNode->mTransformation.b2 * mScale;
-        currentNode->mTransformation.c3 = currentNode->mTransformation.c3 * mScale;
+        // Reconstruct matrix by transform rather than by scale 
+        // This prevent scale values being changed which can
+        // be meaningful in some cases 
+        // like when you want the modeller to 
+        // see 1:1 compatibility.
+        
+        aiVector3D pos, scale;
+        aiQuaternion rotation;
+        currentNode->mTransformation.Decompose( scale, rotation, pos);
+        
+        aiMatrix4x4 translation;
+        aiMatrix4x4::Translation( pos * mScale, translation );
+        
+        aiMatrix4x4 scaling;
+
+        // note: we do not use mScale here, this is on purpose.
+        aiMatrix4x4::Scaling( scale, scaling );
+
+        aiMatrix4x4 RotMatrix = aiMatrix4x4 (rotation.GetMatrix());
+
+        currentNode->mTransformation = translation * RotMatrix * scaling;
     }
 }
 
 } // Namespace Assimp
-
-#endif // !! ASSIMP_BUILD_NO_GLOBALSCALE_PROCESS
diff --git a/thirdparty/assimp/code/PostProcessing/ScaleProcess.h b/thirdparty/assimp/code/PostProcessing/ScaleProcess.h
index 2567378759..468a216736 100644
--- a/thirdparty/assimp/code/PostProcessing/ScaleProcess.h
+++ b/thirdparty/assimp/code/PostProcessing/ScaleProcess.h
@@ -39,7 +39,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 ----------------------------------------------------------------------
 */
-#pragma once
+#ifndef SCALE_PROCESS_H_
+#define SCALE_PROCESS_H_
 
 #include "Common/BaseProcess.h"
 
@@ -53,6 +54,11 @@ namespace Assimp {
 
 // ---------------------------------------------------------------------------
 /** ScaleProcess: Class to rescale the whole model.
+ * Now rescales animations, bones, and blend shapes properly.
+ * Please note this will not write to 'scale' transform it will rewrite mesh 
+ * and matrixes so that your scale values 
+ * from your model package are preserved, so this is completely intentional
+ * bugs should be reported as soon as they are found.
 */
 class ASSIMP_API ScaleProcess : public BaseProcess {
 public:
@@ -78,7 +84,7 @@ public:
     virtual void Execute( aiScene* pScene );
 
 private:
-    void traverseNodes( aiNode *currentNode );
+    void traverseNodes( aiNode *currentNode, unsigned int nested_node_id = 0 );
     void applyScaling( aiNode *currentNode );
 
 private:
@@ -86,3 +92,6 @@ private:
 };
 
 } // Namespace Assimp
+
+
+#endif // SCALE_PROCESS_H_
+\ No newline at end of file
diff --git a/thirdparty/assimp/contrib/utf8cpp/doc/ReleaseNotes b/thirdparty/assimp/contrib/utf8cpp/doc/ReleaseNotes
new file mode 100644
index 0000000000..364411a23d
--- /dev/null
+++ b/thirdparty/assimp/contrib/utf8cpp/doc/ReleaseNotes
@@ -0,0 +1,12 @@
+utf8 cpp library
+Release 2.3.4
+
+A minor bug fix release. Thanks to all who reported bugs. 
+
+Note: Version 2.3.3 contained a regression, and therefore was removed.
+
+Changes from version 2.3.2
+- Bug fix [39]: checked.h Line 273 and unchecked.h Line 182 have an extra ';'
+- Bug fix [36]: replace_invalid() only works with back_inserter
+
+Files included in the release: utf8.h, core.h, checked.h, unchecked.h, utf8cpp.html, ReleaseNotes
diff --git a/thirdparty/assimp/contrib/utf8cpp/doc/utf8cpp.html b/thirdparty/assimp/contrib/utf8cpp/doc/utf8cpp.html
new file mode 100644
index 0000000000..6f2aacbe7b
--- /dev/null
+++ b/thirdparty/assimp/contrib/utf8cpp/doc/utf8cpp.html
@@ -0,0 +1,1789 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+  <head>
+    <meta name="generator" content=
+    "HTML Tidy for Linux/x86 (vers 1st November 2002), see www.w3.org">
+    <meta name="description" content=
+    "A simple, portable and lightweigt C++ library for easy handling of UTF-8 encoded strings">
+    <meta name="keywords" content="UTF-8 C++ portable utf8 unicode generic templates">
+    <meta name="author" content="Nemanja Trifunovic">
+    <title>
+      UTF8-CPP: UTF-8 with C++ in a Portable Way
+    </title>
+    <style type="text/css">
+    <!--
+    span.return_value {
+      color: brown;
+    }
+    span.keyword {
+      color: blue;
+    }
+    span.preprocessor {
+      color: navy;
+    }
+    span.literal {
+      color: olive;
+    }
+    span.comment {
+      color: green;
+    }
+    code {
+      font-weight: bold; 
+    }
+    ul.toc {
+      list-style-type: none;
+    }
+    p.version {
+      font-size: small;
+      font-style: italic;
+    }
+    -->
+        </style>
+  </head>
+  <body>
+    <h1>
+      UTF8-CPP: UTF-8 with C++ in a Portable Way
+    </h1>
+    <p>
+      <a href="https://sourceforge.net/projects/utfcpp">The Sourceforge project page</a>
+    </p>
+    <div id="toc">
+      <h2>
+        Table of Contents
+      </h2>
+      <ul class="toc">
+        <li>
+          <a href="#introduction">Introduction</a>
+        </li>
+        <li>
+          <a href="#examples">Examples of Use</a>
+          <ul class="toc">
+            <li>
+              <a href=#introsample>Introductionary Sample </a>
+            </li>
+            <li>
+              <a href=#validfile>Checking if a file contains valid UTF-8 text</a>
+            </li>
+            <li>
+              <a href=#fixinvalid>Ensure that a string contains valid UTF-8 text</a>
+            </li>
+          </ul>
+        <li>
+          <a href="#reference">Reference</a>
+          <ul class="toc">
+            <li>
+              <a href="#funutf8">Functions From utf8 Namespace </a>
+            </li>
+            <li>
+              <a href="#typesutf8">Types From utf8 Namespace </a>
+            </li>
+            <li>
+              <a href="#fununchecked">Functions From utf8::unchecked Namespace </a>
+            </li>
+            <li>
+              <a href="#typesunchecked">Types From utf8::unchecked Namespace </a>
+            </li>
+          </ul>
+        </li>
+        <li>
+          <a href="#points">Points of Interest</a>
+        </li>
+        <li>
+          <a href="#links">Links</a>
+        </li>
+      </ul>
+    </div>
+    <h2 id="introduction">
+      Introduction
+    </h2>
+    <p>
+      Many C++ developers miss an easy and portable way of handling Unicode encoded
+      strings. The original C++ Standard (known as C++98 or C++03) is Unicode agnostic.
+      C++11 provides some support for Unicode on core language and library level:
+      u8, u, and U character and string literals, char16_t and char32_t character types,
+      u16string and u32string library classes, and codecvt support for conversions 
+      between Unicode encoding forms.
+      In the meantime, developers use third party libraries like ICU, OS specific capabilities, or simply
+      roll out their own solutions.
+    </p>
+    <p>
+      In order to easily handle UTF-8 encoded Unicode strings, I came up with a small
+      generic library. For anybody used to work with STL algorithms and iterators, it should be
+      easy and natural to use. The code is freely available for any purpose - check out
+      the license at the beginning of the utf8.h file. If you run into
+      bugs or performance issues, please let me know and I'll do my best to address them.
+    </p>
+    <p>
+      The purpose of this article is not to offer an introduction to Unicode in general,
+      and UTF-8 in particular. If you are not familiar with Unicode, be sure to check out
+      <a href="http://www.unicode.org/">Unicode Home Page</a> or some other source of
+      information for Unicode. Also, it is not my aim to advocate the use of UTF-8
+      encoded strings in C++ programs; if you want to handle UTF-8 encoded strings from
+      C++, I am sure you have good reasons for it.
+    </p>
+    <h2 id="examples">
+      Examples of use
+    </h2>
+    <h3 id="introsample">
+      Introductionary Sample
+    </h3>
+    <p>
+      To illustrate the use of the library, let's start with a small but complete program 
+      that opens a file containing UTF-8 encoded text, reads it line by line, checks each line
+      for invalid UTF-8 byte sequences, and converts it to UTF-16 encoding and back to UTF-8:
+    </p>
+<pre>
+<span class="preprocessor">#include &lt;fstream&gt;</span>
+<span class="preprocessor">#include &lt;iostream&gt;</span>
+<span class="preprocessor">#include &lt;string&gt;</span>
+<span class="preprocessor">#include &lt;vector&gt;</span>
+<span class="preprocessor">#include "utf8.h"</span>
+<span class="keyword">using namespace</span> std;
+<span class="keyword">int</span> main(<span class="keyword">int</span> argc, <span class="keyword">char</span>** argv)
+{
+    <span class="keyword">if</span> (argc != <span class="literal">2</span>) {
+        cout &lt;&lt; <span class="literal">"\nUsage: docsample filename\n"</span>;
+        <span class="keyword">return</span> <span class="literal">0</span>;
+    }
+
+    <span class="keyword">const char</span>* test_file_path = argv[1];
+    <span class="comment">// Open the test file (contains UTF-8 encoded text)</span>
+    ifstream fs8(test_file_path);
+    <span class="keyword">if</span> (!fs8.is_open()) {
+    cout &lt;&lt; <span class=
+"literal">"Could not open "</span> &lt;&lt; test_file_path &lt;&lt; endl;
+    <span class="keyword">return</span> <span class="literal">0</span>;
+    }
+
+    <span class="keyword">unsigned</span> line_count = <span class="literal">1</span>;
+    string line;
+    <span class="comment">// Play with all the lines in the file</span>
+    <span class="keyword">while</span> (getline(fs8, line)) {
+       <span class="comment">// check for invalid utf-8 (for a simple yes/no check, there is also utf8::is_valid function)</span>
+        string::iterator end_it = utf8::find_invalid(line.begin(), line.end());
+        <span class="keyword">if</span> (end_it != line.end()) {
+            cout &lt;&lt; <span class=
+"literal">"Invalid UTF-8 encoding detected at line "</span> &lt;&lt; line_count &lt;&lt; <span
+ class="literal">"\n"</span>;
+            cout &lt;&lt; <span class=
+"literal">"This part is fine: "</span> &lt;&lt; string(line.begin(), end_it) &lt;&lt; <span
+ class="literal">"\n"</span>;
+        }
+
+        <span class="comment">// Get the line length (at least for the valid part)</span>
+        <span class="keyword">int</span> length = utf8::distance(line.begin(), end_it);
+        cout &lt;&lt; <span class=
+"literal">"Length of line "</span> &lt;&lt; line_count &lt;&lt; <span class=
+"literal">" is "</span> &lt;&lt; length &lt;&lt;  <span class="literal">"\n"</span>;
+
+        <span class="comment">// Convert it to utf-16</span>
+        vector&lt;unsigned short&gt; utf16line;
+        utf8::utf8to16(line.begin(), end_it, back_inserter(utf16line));
+
+        <span class="comment">// And back to utf-8</span>
+        string utf8line; 
+        utf8::utf16to8(utf16line.begin(), utf16line.end(), back_inserter(utf8line));
+
+        <span class="comment">// Confirm that the conversion went OK:</span>
+        <span class="keyword">if</span> (utf8line != string(line.begin(), end_it))
+            cout &lt;&lt; <span class=
+"literal">"Error in UTF-16 conversion at line: "</span> &lt;&lt; line_count &lt;&lt; <span
+ class="literal">"\n"</span>;        
+
+        line_count++;
+    }
+    <span class="keyword">return</span> <span class="literal">0</span>;
+}
+</pre>
+    <p>
+      In the previous code sample, for each line we performed
+      a detection of invalid UTF-8 sequences with <code>find_invalid</code>; the number
+      of characters (more precisely - the number of Unicode code points, including the end
+      of line and even BOM if there is one) in each line was
+      determined with a use of <code>utf8::distance</code>; finally, we have converted
+      each line to UTF-16 encoding with <code>utf8to16</code> and back to UTF-8 with
+      <code>utf16to8</code>.
+    </p>
+    <h3 id="validfile">Checking if a file contains valid UTF-8 text</h3>
+<p>
+Here is a function that checks whether the content of a file is valid UTF-8 encoded text without
+reading the content into the memory:
+</p>
+<pre>    
+<span class="keyword">bool</span> valid_utf8_file(i<span class="keyword">const char</span>* file_name)
+{
+    ifstream ifs(file_name);
+    <span class="keyword">if</span> (!ifs)
+        <span class="keyword">return false</span>; <span class="comment">// even better, throw here</span>
+
+    istreambuf_iterator&lt;<span class="keyword">char</span>&gt; it(ifs.rdbuf());
+    istreambuf_iterator&lt;<span class="keyword">char</span>&gt; eos;
+
+    <span class="keyword">return</span> utf8::is_valid(it, eos);
+}
+</pre>
+<p>
+Because the function <code>utf8::is_valid()</code> works with input iterators, we were able
+to pass an <code>istreambuf_iterator</code> to it and read the content of the file directly 
+without loading it to the memory first.</p>
+<p>
+Note that other functions that take input iterator arguments can be used in a similar way. For
+instance, to read the content of a UTF-8 encoded text file and convert the text to UTF-16, just 
+do something like:
+</p>
+<pre>
+    utf8::utf8to16(it, eos, back_inserter(u16string));
+</pre>
+    <h3 id="fixinvalid">Ensure that a string contains valid UTF-8 text</h3>
+<p>
+If we have some text that "probably" contains UTF-8 encoded text and we want to
+replace any invalid UTF-8 sequence with a replacement character, something like 
+the following function may be used:
+</p>
+<pre>
+<span class="keyword">void</span> fix_utf8_string(std::string&amp; str)
+{
+    std::string temp;
+    utf8::replace_invalid(str.begin(), str.end(), back_inserter(temp));
+    str = temp;
+}
+</pre>
+<p>The function will replace any invalid UTF-8 sequence with a Unicode replacement character. 
+There is an overloaded function that enables the caller to supply their own replacement character.
+</p>
+    <h2 id="reference">
+      Reference
+    </h2>
+    <h3 id="funutf8">
+      Functions From utf8 Namespace
+    </h3>
+    <h4>
+      utf8::append
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Encodes a 32 bit code point as a UTF-8 sequence of octets and appends the sequence
+      to a UTF-8 string.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt;
+octet_iterator append(uint32_t cp, octet_iterator result);
+   
+</pre>
+    <p>
+	  <code>octet_iterator</code>: an output iterator.<br>
+      <code>cp</code>: a 32 bit integer representing a code point to append to the
+      sequence.<br>
+       <code>result</code>: an output iterator to the place in the sequence where to
+      append the code point.<br>
+       <span class="return_value">Return value</span>: an iterator pointing to the place
+      after the newly appended sequence.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">unsigned char</span> u[<span class="literal">5</span>] = {<span
+class="literal">0</span>,<span class="literal">0</span>,<span class=
+"literal">0</span>,<span class="literal">0</span>,<span class="literal">0</span>};
+<span class="keyword">unsigned char</span>* end = append(<span class=
+"literal">0x0448</span>, u);
+assert (u[<span class="literal">0</span>] == <span class=
+"literal">0xd1</span> &amp;&amp; u[<span class="literal">1</span>] == <span class=
+"literal">0x88</span> &amp;&amp; u[<span class="literal">2</span>] == <span class=
+"literal">0</span> &amp;&amp; u[<span class="literal">3</span>] == <span class=
+"literal">0</span> &amp;&amp; u[<span class="literal">4</span>] == <span class=
+"literal">0</span>);
+</pre>
+    <p>
+      Note that <code>append</code> does not allocate any memory - it is the burden of
+      the caller to make sure there is enough memory allocated for the operation. To make
+      things more interesting, <code>append</code> can add anywhere between 1 and 4
+      octets to the sequence. In practice, you would most often want to use
+      <code>std::back_inserter</code> to ensure that the necessary memory is allocated.
+    </p>
+    <p>
+      In case of an invalid code point, a <code>utf8::invalid_code_point</code> exception
+      is thrown.
+    </p>
+    <h4>
+      utf8::next
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Given the iterator to the beginning of the UTF-8 sequence, it returns the code
+      point and moves the iterator to the next position.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+uint32_t next(octet_iterator&amp; it, octet_iterator end);
+   
+</pre>
+    <p>
+	  <code>octet_iterator</code>: an input iterator.<br>
+      <code>it</code>: a reference to an iterator pointing to the beginning of an UTF-8
+      encoded code point. After the function returns, it is incremented to point to the
+      beginning of the next code point.<br>
+       <code>end</code>: end of the UTF-8 sequence to be processed. If <code>it</code>
+      gets equal to <code>end</code> during the extraction of a code point, an
+      <code>utf8::not_enough_room</code> exception is thrown.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      processed UTF-8 code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars;
+<span class="keyword">int</span> cp = next(w, twochars + <span class="literal">6</span>);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars + <span class="literal">3</span>);
+</pre>
+    <p>
+      This function is typically used to iterate through a UTF-8 encoded string.
+    </p>
+    <p>
+      In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
+      thrown.
+    </p>
+    <h4>
+      utf8::peek_next
+    </h4>
+    <p class="version">
+    Available in version 2.1 and later.
+    </p>
+    <p>
+      Given the iterator to the beginning of the UTF-8 sequence, it returns the code
+      point for the following sequence without changing the value of the iterator. 
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+uint32_t peek_next(octet_iterator it, octet_iterator end);
+   
+</pre>
+    <p>
+	  <code>octet_iterator</code>: an input iterator.<br>
+      <code>it</code>: an iterator pointing to the beginning of an UTF-8
+      encoded code point.<br>
+       <code>end</code>: end of the UTF-8 sequence to be processed. If <code>it</code>
+      gets equal to <code>end</code> during the extraction of a code point, an
+      <code>utf8::not_enough_room</code> exception is thrown.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      processed UTF-8 code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars;
+<span class="keyword">int</span> cp = peek_next(w, twochars + <span class="literal">6</span>);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars);
+</pre>
+    <p>
+      In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
+      thrown.
+    </p>
+    <h4>
+      utf8::prior
+    </h4>
+    <p class="version">
+    Available in version 1.02 and later.
+    </p>
+    <p>
+      Given a reference to an iterator pointing to an octet in a UTF-8 sequence, it
+      decreases the iterator until it hits the beginning of the previous UTF-8 encoded
+      code point and returns the 32 bits representation of the code point.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+uint32_t prior(octet_iterator&amp; it, octet_iterator start);
+   
+</pre>
+    <p>
+	  <code>octet_iterator</code>: a bidirectional iterator.<br>
+      <code>it</code>: a reference pointing to an octet within a UTF-8 encoded string.
+      After the function returns, it is decremented to point to the beginning of the
+      previous code point.<br>
+       <code>start</code>: an iterator to the beginning of the sequence where the search
+      for the beginning of a code point is performed. It is a
+      safety measure to prevent passing the beginning of the string in the search for a
+      UTF-8 lead octet.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      previous code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">unsigned char</span>* w = twochars + <span class=
+"literal">3</span>;
+<span class="keyword">int</span> cp = prior (w, twochars);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars);
+</pre>
+    <p> 
+      This function has two purposes: one is two iterate backwards through a UTF-8
+      encoded string. Note that it is usually a better idea to iterate forward instead,
+      since <code>utf8::next</code> is faster. The second purpose is to find a beginning
+      of a UTF-8 sequence if we have a random position within a string. Note that in that
+      case <code>utf8::prior</code> may not detect an invalid UTF-8 sequence in some scenarios:
+      for instance if there are superfluous trail octets, it will just skip them.
+    </p> 
+    <p>
+      <code>it</code> will typically point to the beginning of
+      a code point, and <code>start</code> will point to the
+      beginning of the string to ensure we don't go backwards too far. <code>it</code> is
+      decreased until it points to a lead UTF-8 octet, and then the UTF-8 sequence
+      beginning with that octet is decoded to a 32 bit representation and returned.
+    </p>
+    <p>
+      In case <code>start</code> is reached before a UTF-8 lead octet is hit, or if an
+      invalid UTF-8 sequence is started by the lead octet, an <code>invalid_utf8</code>
+      exception is thrown.
+    </p>
+    <p>In case <code>start</code> equals <code>it</code>, a <code>not_enough_room</code>
+      exception is thrown.
+    <h4>
+      utf8::previous
+    </h4>
+    <p class="version">
+    Deprecated in version 1.02 and later.
+    </p>
+    <p>
+      Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it
+      decreases the iterator until it hits the beginning of the previous UTF-8 encoded
+      code point and returns the 32 bits representation of the code point.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+uint32_t previous(octet_iterator&amp; it, octet_iterator pass_start);
+   
+</pre>
+    <p>
+	  <code>octet_iterator</code>: a random access iterator.<br>
+      <code>it</code>: a reference pointing to an octet within a UTF-8 encoded string.
+      After the function returns, it is decremented to point to the beginning of the
+      previous code point.<br>
+       <code>pass_start</code>: an iterator to the point in the sequence where the search
+      for the beginning of a code point is aborted if no result was reached. It is a
+      safety measure to prevent passing the beginning of the string in the search for a
+      UTF-8 lead octet.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      previous code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">unsigned char</span>* w = twochars + <span class=
+"literal">3</span>;
+<span class="keyword">int</span> cp = previous (w, twochars - <span class=
+"literal">1</span>);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars);
+</pre>
+    <p>
+      <code>utf8::previous</code> is deprecated, and <code>utf8::prior</code> should
+      be used instead, although the existing code can continue using this function.
+      The problem is the parameter <code>pass_start</code> that points to the position
+      just before the beginning of the sequence. Standard containers don't have the 
+      concept of "pass start" and the function can not be used with their iterators.
+    </p>
+    <p>
+      <code>it</code> will typically point to the beginning of
+      a code point, and <code>pass_start</code> will point to the octet just before the
+      beginning of the string to ensure we don't go backwards too far. <code>it</code> is
+      decreased until it points to a lead UTF-8 octet, and then the UTF-8 sequence
+      beginning with that octet is decoded to a 32 bit representation and returned.
+    </p>
+    <p>
+      In case <code>pass_start</code> is reached before a UTF-8 lead octet is hit, or if an
+      invalid UTF-8 sequence is started by the lead octet, an <code>invalid_utf8</code>
+      exception is thrown
+    </p>
+    <h4>
+      utf8::advance
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Advances an iterator by the specified number of code points within an UTF-8
+      sequence.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator, typename distance_type&gt; 
+<span class=
+"keyword">void</span> advance (octet_iterator&amp; it, distance_type n, octet_iterator end);
+   
+</pre>
+    <p>
+      <code>octet_iterator</code>: an input iterator.<br>
+      <code>distance_type</code>: an integral type convertible to <code>octet_iterator</code>'s difference type.<br>
+      <code>it</code>: a reference to an iterator pointing to the beginning of an UTF-8
+      encoded code point. After the function returns, it is incremented to point to the
+      nth following code point.<br>
+       <code>n</code>: a positive integer that shows how many code points we want to
+      advance.<br>
+       <code>end</code>: end of the UTF-8 sequence to be processed. If <code>it</code>
+      gets equal to <code>end</code> during the extraction of a code point, an
+      <code>utf8::not_enough_room</code> exception is thrown.<br>
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">unsigned char</span>* w = twochars;
+advance (w, <span class="literal">2</span>, twochars + <span class="literal">6</span>);
+assert (w == twochars + <span class="literal">5</span>);
+</pre>
+    <p>
+      This function works only "forward". In case of a negative <code>n</code>, there is
+      no effect.
+    </p>
+    <p>
+      In case of an invalid code point, a <code>utf8::invalid_code_point</code> exception
+      is thrown.
+    </p>
+    <h4>
+      utf8::distance
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Given the iterators to two UTF-8 encoded code points in a seqence, returns the
+      number of code points between them.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+<span class=
+"keyword">typename</span> std::iterator_traits&lt;octet_iterator&gt;::difference_type distance (octet_iterator first, octet_iterator last);
+   
+</pre>
+    <p>
+      <code>octet_iterator</code>: an input iterator.<br>
+      <code>first</code>: an iterator to a beginning of a UTF-8 encoded code point.<br>
+      <code>last</code>: an iterator to a "post-end" of the last UTF-8 encoded code
+      point in the sequence we are trying to determine the length. It can be the
+      beginning of a new code point, or not.<br>
+       <span class="return_value">Return value</span> the distance between the iterators,
+      in code points.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+size_t dist = utf8::distance(twochars, twochars + <span class="literal">5</span>);
+assert (dist == <span class="literal">2</span>);
+</pre>
+    <p>
+      This function is used to find the length (in code points) of a UTF-8 encoded
+      string. The reason it is called <em>distance</em>, rather than, say,
+      <em>length</em> is mainly because developers are used that <em>length</em> is an
+      O(1) function. Computing the length of an UTF-8 string is a linear operation, and
+      it looked better to model it after <code>std::distance</code> algorithm.
+    </p>
+    <p>
+      In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
+      thrown. If <code>last</code> does not point to the past-of-end of a UTF-8 seqence,
+      a <code>utf8::not_enough_room</code> exception is thrown.
+    </p>
+    <h4>
+      utf8::utf16to8
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Converts a UTF-16 encoded string to UTF-8.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> u16bit_iterator, <span class=
+"keyword">typename</span> octet_iterator&gt;
+octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result);
+   
+</pre>
+    <p>
+      <code>u16bit_iterator</code>: an input iterator.<br>
+      <code>octet_iterator</code>: an output iterator.<br>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-16 encoded
+      string to convert.<br>
+       <code>end</code>: an iterator pointing to pass-the-end of the UTF-16 encoded
+      string to convert.<br>
+       <code>result</code>: an output iterator to the place in the UTF-8 string where to
+      append the result of conversion.<br>
+       <span class="return_value">Return value</span>: An iterator pointing to the place
+      after the appended UTF-8 string.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">unsigned short</span> utf16string[] = {<span class=
+"literal">0x41</span>, <span class="literal">0x0448</span>, <span class=
+"literal">0x65e5</span>, <span class="literal">0xd834</span>, <span class=
+"literal">0xdd1e</span>};
+vector&lt;<span class="keyword">unsigned char</span>&gt; utf8result;
+utf16to8(utf16string, utf16string + <span class=
+"literal">5</span>, back_inserter(utf8result));
+assert (utf8result.size() == <span class="literal">10</span>);    
+</pre>
+    <p>
+      In case of invalid UTF-16 sequence, a <code>utf8::invalid_utf16</code> exception is
+      thrown.
+    </p>
+    <h4>
+      utf8::utf8to16
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Converts an UTF-8 encoded string to UTF-16
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> u16bit_iterator, typename octet_iterator&gt;
+u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result);
+   
+</pre>
+    <p>
+      <code>octet_iterator</code>: an input iterator.<br>
+      <code>u16bit_iterator</code>: an output iterator.<br>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-8 encoded
+      string to convert. &lt; br /&gt; <code>end</code>: an iterator pointing to
+      pass-the-end of the UTF-8 encoded string to convert.<br>
+       <code>result</code>: an output iterator to the place in the UTF-16 string where to
+      append the result of conversion.<br>
+       <span class="return_value">Return value</span>: An iterator pointing to the place
+      after the appended UTF-16 string.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span> utf8_with_surrogates[] = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"</span>;
+vector &lt;<span class="keyword">unsigned short</span>&gt; utf16result;
+utf8to16(utf8_with_surrogates, utf8_with_surrogates + <span class=
+"literal">9</span>, back_inserter(utf16result));
+assert (utf16result.size() == <span class="literal">4</span>);
+assert (utf16result[<span class="literal">2</span>] == <span class=
+"literal">0xd834</span>);
+assert (utf16result[<span class="literal">3</span>] == <span class=
+"literal">0xdd1e</span>);
+</pre>
+    <p>
+      In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
+      thrown. If <code>end</code> does not point to the past-of-end of a UTF-8 seqence, a
+      <code>utf8::not_enough_room</code> exception is thrown.
+    </p>
+    <h4>
+      utf8::utf32to8
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Converts a UTF-32 encoded string to UTF-8.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator, typename u32bit_iterator&gt;
+octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result);
+   
+</pre>
+    <p>
+      <code>octet_iterator</code>: an output iterator.<br>
+      <code>u32bit_iterator</code>: an input iterator.<br>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-32 encoded
+      string to convert.<br>
+       <code>end</code>: an iterator pointing to pass-the-end of the UTF-32 encoded
+      string to convert.<br>
+       <code>result</code>: an output iterator to the place in the UTF-8 string where to
+      append the result of conversion.<br>
+       <span class="return_value">Return value</span>: An iterator pointing to the place
+      after the appended UTF-8 string.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">int</span> utf32string[] = {<span class=
+"literal">0x448</span>, <span class="literal">0x65E5</span>, <span class=
+"literal">0x10346</span>, <span class="literal">0</span>};
+vector&lt;<span class="keyword">unsigned char</span>&gt; utf8result;
+utf32to8(utf32string, utf32string + <span class=
+"literal">3</span>, back_inserter(utf8result));
+assert (utf8result.size() == <span class="literal">9</span>);
+</pre>
+    <p>
+      In case of invalid UTF-32 string, a <code>utf8::invalid_code_point</code> exception
+      is thrown.
+    </p>
+    <h4>
+      utf8::utf8to32
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Converts a UTF-8 encoded string to UTF-32.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator, <span class=
+"keyword">typename</span> u32bit_iterator&gt;
+u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result);
+   
+</pre>
+    <p>
+      <code>octet_iterator</code>: an input iterator.<br>
+      <code>u32bit_iterator</code>: an output iterator.<br>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-8 encoded
+      string to convert.<br>
+       <code>end</code>: an iterator pointing to pass-the-end of the UTF-8 encoded string
+      to convert.<br>
+       <code>result</code>: an output iterator to the place in the UTF-32 string where to
+      append the result of conversion.<br>
+       <span class="return_value">Return value</span>: An iterator pointing to the place
+      after the appended UTF-32 string.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+vector&lt;<span class="keyword">int</span>&gt; utf32result;
+utf8to32(twochars, twochars + <span class=
+"literal">5</span>, back_inserter(utf32result));
+assert (utf32result.size() == <span class="literal">2</span>);
+</pre>
+    <p>
+      In case of an invalid UTF-8 seqence, a <code>utf8::invalid_utf8</code> exception is
+      thrown. If <code>end</code> does not point to the past-of-end of a UTF-8 seqence, a
+      <code>utf8::not_enough_room</code> exception is thrown.
+    </p>
+    <h4>
+      utf8::find_invalid
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Detects an invalid sequence within a UTF-8 string.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+octet_iterator find_invalid(octet_iterator start, octet_iterator end);
+</pre>
+    <p>
+      <code>octet_iterator</code>: an input iterator.<br>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-8 string to
+      test for validity.<br>
+       <code>end</code>: an iterator pointing to pass-the-end of the UTF-8 string to test
+      for validity.<br>
+       <span class="return_value">Return value</span>: an iterator pointing to the first
+      invalid octet in the UTF-8 string. In case none were found, equals
+      <code>end</code>.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span> utf_invalid[] = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88\xfa"</span>;
+<span class=
+"keyword">char</span>* invalid = find_invalid(utf_invalid, utf_invalid + <span class=
+"literal">6</span>);
+assert (invalid == utf_invalid + <span class="literal">5</span>);
+</pre>
+    <p>
+      This function is typically used to make sure a UTF-8 string is valid before
+      processing it with other functions. It is especially important to call it if before
+      doing any of the <em>unchecked</em> operations on it.
+    </p>
+    <h4>
+      utf8::is_valid
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Checks whether a sequence of octets is a valid UTF-8 string.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+<span class="keyword">bool</span> is_valid(octet_iterator start, octet_iterator end);
+   
+</pre>
+    <p>
+      <code>octet_iterator</code>: an input iterator.<br>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-8 string to
+      test for validity.<br>
+       <code>end</code>: an iterator pointing to pass-the-end of the UTF-8 string to test
+      for validity.<br>
+       <span class="return_value">Return value</span>: <code>true</code> if the sequence
+      is a valid UTF-8 string; <code>false</code> if not.
+    </p>
+    Example of use: 
+<pre>
+<span class="keyword">char</span> utf_invalid[] = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88\xfa"</span>;
+<span class="keyword">bool</span> bvalid = is_valid(utf_invalid, utf_invalid + <span
+class="literal">6</span>);
+assert (bvalid == false);
+</pre>
+    <p>
+      <code>is_valid</code> is a shorthand for <code>find_invalid(start, end) ==
+      end;</code>. You may want to use it to make sure that a byte seqence is a valid
+      UTF-8 string without the need to know where it fails if it is not valid.
+    </p>
+    <h4>
+      utf8::replace_invalid
+    </h4>
+    <p class="version">
+    Available in version 2.0 and later.
+    </p>
+    <p>
+      Replaces all invalid UTF-8 sequences within a string with a replacement marker.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator, <span class=
+"keyword">typename</span> output_iterator&gt;
+output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement);
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator, <span class=
+"keyword">typename</span> output_iterator&gt;
+output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out);
+   
+</pre>
+    <p>
+      <code>octet_iterator</code>: an input iterator.<br>
+      <code>output_iterator</code>: an output iterator.<br>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-8 string to
+      look for invalid UTF-8 sequences.<br>
+       <code>end</code>: an iterator pointing to pass-the-end of the UTF-8 string to look
+      for invalid UTF-8 sequences.<br>
+       <code>out</code>: An output iterator to the range where the result of replacement
+      is stored.<br>
+       <code>replacement</code>: A Unicode code point for the replacement marker. The
+      version without this parameter assumes the value <code>0xfffd</code><br>
+       <span class="return_value">Return value</span>: An iterator pointing to the place
+      after the UTF-8 string with replaced invalid sequences.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span> invalid_sequence[] = <span class=
+"literal">"a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"</span>;
+vector&lt;<span class="keyword">char</span>&gt; replace_invalid_result;
+replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), back_inserter(replace_invalid_result), <span
+ class="literal">'?'</span>);
+bvalid = is_valid(replace_invalid_result.begin(), replace_invalid_result.end());
+assert (bvalid);
+<span class="keyword">char</span>* fixed_invalid_sequence = <span class=
+"literal">"a????z"</span>;
+assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.end(), fixed_invalid_sequence));
+</pre>
+    <p>
+      <code>replace_invalid</code> does not perform in-place replacement of invalid
+      sequences. Rather, it produces a copy of the original string with the invalid
+      sequences replaced with a replacement marker. Therefore, <code>out</code> must not
+      be in the <code>[start, end]</code> range.
+    </p>
+    <p>
+      If <code>end</code> does not point to the past-of-end of a UTF-8 sequence, a
+      <code>utf8::not_enough_room</code> exception is thrown.
+    </p>
+    <h4>
+      utf8::starts_with_bom
+    </h4>
+    <p class="version">
+    Available in version 2.3 and later. Relaces deprecated <code>is_bom()</code> function.
+    </p>
+    <p>
+      Checks whether an octet sequence starts with a UTF-8 byte order mark (BOM)
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+<span class="keyword">bool</span> starts_with_bom (octet_iterator it, octet_iterator end);
+</pre>
+    <p>
+      <code>octet_iterator</code>: an input iterator.<br>
+      <code>it</code>: beginning of the octet sequence to check<br>
+      <code>end</code>: pass-end of the sequence to check<br>
+       <span class="return_value">Return value</span>: <code>true</code> if the sequence
+      starts with a UTF-8 byte order mark; <code>false</code> if not.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">unsigned char</span> byte_order_mark[] = {<span class=
+"literal">0xef</span>, <span class="literal">0xbb</span>, <span class=
+"literal">0xbf</span>};
+<span class="keyword">bool</span> bbom = starts_with_bom(byte_order_mark, byte_order_mark + <span class="keyword">sizeof</span>(byte_order_mark));
+assert (bbom == <span class="literal">true</span>);
+</pre>
+    <p>
+      The typical use of this function is to check the first three bytes of a file. If
+      they form the UTF-8 BOM, we want to skip them before processing the actual UTF-8
+      encoded text.
+    </p>
+    <h4>
+      utf8::is_bom
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later. Deprecated in version 2.3. <code>starts_with_bom()</code> should be used
+    instead.
+    </p>
+    <p>
+      Checks whether a sequence of three octets is a UTF-8 byte order mark (BOM)
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt; 
+<span class="keyword">bool</span> is_bom (octet_iterator it); <span class="comment"> // Deprecated</span>
+</pre>
+    <p>
+      <code>octet_iterator</code>: an input iterator.<br>
+      <code>it</code>: beginning of the 3-octet sequence to check<br>
+       <span class="return_value">Return value</span>: <code>true</code> if the sequence
+      is UTF-8 byte order mark; <code>false</code> if not.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">unsigned char</span> byte_order_mark[] = {<span class=
+"literal">0xef</span>, <span class="literal">0xbb</span>, <span class=
+"literal">0xbf</span>};
+<span class="keyword">bool</span> bbom = is_bom(byte_order_mark);
+assert (bbom == <span class="literal">true</span>);
+</pre>
+    <p>
+      The typical use of this function is to check the first three bytes of a file. If
+      they form the UTF-8 BOM, we want to skip them before processing the actual UTF-8
+      encoded text.
+    </p>
+    <p>
+      If a sequence is 
+      shorter than three bytes, an invalid iterator will be dereferenced. Therefore, this function is deprecated
+      in favor of <code>starts_with_bom()</code>that takes the end of sequence as an argument.
+    </p>
+    <h3 id="typesutf8">
+      Types From utf8 Namespace
+    </h3>
+    <h4>utf8::exception
+    </h4>
+    <p class="version">
+    Available in version 2.3 and later.
+    </p>
+    <p>
+    Base class for the exceptions thrown by UTF CPP library functions.
+    </p>
+<pre>
+<span class="keyword">class</span> exception : <span class="keyword">public</span> std::exception {};
+</pre>
+    <p>
+    Example of use:
+    </p>
+<pre>
+<span class="keyword">try</span> {
+  code_that_uses_utf_cpp_library();
+}
+<span class="keyword">catch</span>(<span class="keyword">const</span> utf8::exception&amp; utfcpp_ex) {
+  cerr &lt;&lt; utfcpp_ex.what();
+}
+</pre>
+    
+    <h4>utf8::invalid_code_point
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+    Thrown by UTF8 CPP functions such as <code>advance</code> and <code>next</code> if an UTF-8 sequence represents and invalid code point.
+    </p>
+
+<pre>
+<span class="keyword">class</span> invalid_code_point : <span class="keyword">public</span> exception {
+<span class="keyword">public</span>: 
+    uint32_t code_point() <span class="keyword">const</span>;
+};
+
+</pre>
+    <p>
+    Member function <code>code_point()</code> can be used to determine the invalid code point that
+    caused the exception to be thrown.
+    </p>
+    <h4>utf8::invalid_utf8
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+    Thrown by UTF8 CPP functions such as <code>next</code> and <code>prior</code> if an invalid UTF-8 sequence
+    is detected during decoding.
+    </p>
+
+<pre>
+<span class="keyword">class</span> invalid_utf8 : <span class="keyword">public</span> exception {
+<span class="keyword">public</span>: 
+    uint8_t utf8_octet() <span class="keyword">const</span>;
+};
+</pre>
+
+    <p>
+    Member function <code>utf8_octet()</code> can be used to determine the beginning of the byte 
+    sequence that caused the exception to be thrown.
+    </p>
+</pre>
+    <h4>utf8::invalid_utf16
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+    Thrown by UTF8 CPP function <code>utf16to8</code> if an invalid UTF-16 sequence
+    is detected during decoding.
+    </p>
+
+<pre>
+<span class="keyword">class</span> invalid_utf16 : <span class="keyword">public</span> exception {
+<span class="keyword">public</span>: 
+    uint16_t utf16_word() <span class="keyword">const</span>;
+};
+</pre>
+
+    <p>
+    Member function <code>utf16_word()</code> can be used to determine the UTF-16 code unit 
+    that caused the exception to be thrown.
+    </p>
+    <h4>utf8::not_enough_room
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+    Thrown by UTF8 CPP functions such as <code>next</code> if the end of the decoded UTF-8 sequence
+    was reached before the code point was decoded.
+    </p>
+
+<pre>
+<span class="keyword">class</span> not_enough_room : <span class="keyword">public</span> exception {};
+</pre>
+    <h4>
+      utf8::iterator
+    </h4>
+    <p class="version">
+    Available in version 2.0 and later.
+    </p>
+    <p>
+      Adapts the underlying octet iterator to iterate over the sequence of code points,
+      rather than raw octets.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class="keyword">typename</span> octet_iterator&gt;
+<span class="keyword">class</span> iterator;
+</pre>
+    
+    <h5>Member functions</h5>
+      <dl>
+      <dt><code>iterator();</code> <dd> the deafult constructor; the underlying <code>octet_iterator</code> is
+      constructed with its default constructor.
+      <dt><code><span class="keyword">explicit</span> iterator (const octet_iterator&amp; octet_it, 
+                         const octet_iterator&amp; range_start,
+                         const octet_iterator&amp; range_end);</code> <dd> a constructor 
+      that initializes the underlying <code>octet_iterator</code> with <code>octet_it</code>
+      and sets the range in which the iterator is considered valid.
+      <dt><code>octet_iterator base () <span class="keyword">const</span>;</code> <dd> returns the 
+      underlying <code>octet_iterator</code>.
+      <dt><code>uint32_t operator * () <span class="keyword">const</span>;</code> <dd> decodes the utf-8 sequence
+      the underlying <code>octet_iterator</code> is pointing to and returns the code point.
+      <dt><code><span class="keyword">bool operator</span> == (const iterator&amp; rhs)
+      <span class="keyword">const</span>;</code> <dd> returns <span class="keyword">true</span>
+      if the two underlaying iterators are equal.
+      <dt><code><span class="keyword">bool operator</span> != (const iterator&amp; rhs)
+      <span class="keyword">const</span>;</code> <dd> returns <span class="keyword">true</span>
+      if the two underlaying iterators are not equal.
+      <dt><code>iterator&amp; <span class="keyword">operator</span> ++ (); </code> <dd> the prefix increment - moves
+      the iterator to the next UTF-8 encoded code point.
+      <dt><code>iterator <span class="keyword">operator</span> ++ (<span class="keyword">int</span>); </code> <dd>
+      the postfix increment - moves the iterator to the next UTF-8 encoded code point and returns the current one.
+      <dt><code>iterator&amp; <span class="keyword">operator</span> -- (); </code> <dd> the prefix decrement - moves
+      the iterator to the previous UTF-8 encoded code point.
+      <dt><code>iterator <span class="keyword">operator</span> -- (<span class="keyword">int</span>); </code> <dd>
+      the postfix decrement - moves the iterator to the previous UTF-8 encoded code point and returns the current one.
+      </dl>
+      <p>
+      Example of use:
+      </p>
+<pre>
+<span class="keyword">char</span>* threechars = <span class="literal">"\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"</span>;
+utf8::iterator&lt;<span class="keyword">char</span>*&gt; it(threechars, threechars, threechars + <span class="literal">9</span>);
+utf8::iterator&lt;<span class="keyword">char</span>*&gt; it2 = it;
+assert (it2 == it);
+assert (*it == <span class="literal">0x10346</span>);
+assert (*(++it) == <span class="literal">0x65e5</span>);
+assert ((*it++) == <span class="literal">0x65e5</span>);
+assert (*it == <span class="literal">0x0448</span>);
+assert (it != it2);
+utf8::iterator&lt;<span class="keyword">char</span>*&gt; endit (threechars + <span class="literal">9</span>, threechars, threechars + <span class="literal">9</span>);  
+assert (++it == endit);
+assert (*(--it) == <span class="literal">0x0448</span>);
+assert ((*it--) == <span class="literal">0x0448</span>);
+assert (*it == <span class="literal">0x65e5</span>);
+assert (--it == utf8::iterator&lt;<span class="keyword">char</span>*&gt;(threechars, threechars, threechars + <span class="literal">9</span>));
+assert (*it == <span class="literal">0x10346</span>);
+</pre>
+      <p>
+      The purpose of <code>utf8::iterator</code> adapter is to enable easy iteration as well as the use of STL
+      algorithms with UTF-8 encoded strings. Increment and decrement operators are implemented in terms of 
+      <code>utf8::next()</code> and <code>utf8::prior()</code> functions. 
+      </p>
+      <p>
+      Note that <code>utf8::iterator</code> adapter is a checked iterator. It operates on the range specified in
+      the constructor; any attempt to go out of that range will result in an exception. Even the comparison operators
+      require both iterator object to be constructed against the same range - otherwise an exception is thrown. Typically,
+      the range will be determined by sequence container functions <code>begin</code> and <code>end</code>, i.e.:
+      </p>
+<pre>
+std::string s = <span class="literal">"example"</span>;
+utf8::iterator i (s.begin(), s.begin(), s.end());
+</pre>
+    <h3 id="fununchecked">
+      Functions From utf8::unchecked Namespace
+    </h3>
+    <h4>
+      utf8::unchecked::append
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Encodes a 32 bit code point as a UTF-8 sequence of octets and appends the sequence
+      to a UTF-8 string.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt;
+octet_iterator append(uint32_t cp, octet_iterator result);
+   
+</pre>
+    <p>
+      <code>cp</code>: A 32 bit integer representing a code point to append to the
+      sequence.<br>
+       <code>result</code>: An output iterator to the place in the sequence where to
+      append the code point.<br>
+       <span class="return_value">Return value</span>: An iterator pointing to the place
+      after the newly appended sequence.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">unsigned char</span> u[<span class="literal">5</span>] = {<span
+class="literal">0</span>,<span class="literal">0</span>,<span class=
+"literal">0</span>,<span class="literal">0</span>,<span class="literal">0</span>};
+<span class="keyword">unsigned char</span>* end = unchecked::append(<span class=
+"literal">0x0448</span>, u);
+assert (u[<span class="literal">0</span>] == <span class=
+"literal">0xd1</span> &amp;&amp; u[<span class="literal">1</span>] == <span class=
+"literal">0x88</span> &amp;&amp; u[<span class="literal">2</span>] == <span class=
+"literal">0</span> &amp;&amp; u[<span class="literal">3</span>] == <span class=
+"literal">0</span> &amp;&amp; u[<span class="literal">4</span>] == <span class=
+"literal">0</span>);
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::append</code>. It does not
+      check for validity of the supplied code point, and may produce an invalid UTF-8
+      sequence.
+    </p>
+    <h4>
+      utf8::unchecked::next
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Given the iterator to the beginning of a UTF-8 sequence, it returns the code point
+      and moves the iterator to the next position.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt;
+uint32_t next(octet_iterator&amp; it);
+   
+</pre>
+    <p>
+      <code>it</code>: a reference to an iterator pointing to the beginning of an UTF-8
+      encoded code point. After the function returns, it is incremented to point to the
+      beginning of the next code point.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      processed UTF-8 code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars;
+<span class="keyword">int</span> cp = unchecked::next(w);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars + <span class="literal">3</span>);
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::next</code>. It does not
+      check for validity of the supplied UTF-8 sequence.
+    </p>
+    <h4>
+      utf8::unchecked::peek_next
+    </h4>
+    <p class="version">
+    Available in version 2.1 and later.
+    </p>
+    <p>
+      Given the iterator to the beginning of a UTF-8 sequence, it returns the code point.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt;
+uint32_t peek_next(octet_iterator it);
+   
+</pre>
+    <p>
+      <code>it</code>: an iterator pointing to the beginning of an UTF-8
+      encoded code point.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      processed UTF-8 code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars;
+<span class="keyword">int</span> cp = unchecked::peek_next(w);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars);
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::peek_next</code>. It does not
+      check for validity of the supplied UTF-8 sequence.
+    </p>
+    <h4>
+      utf8::unchecked::prior
+    </h4>
+    <p class="version">
+    Available in version 1.02 and later.
+    </p>
+    <p>
+      Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it
+      decreases the iterator until it hits the beginning of the previous UTF-8 encoded
+      code point and returns the 32 bits representation of the code point.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt;
+uint32_t prior(octet_iterator&amp; it);
+   
+</pre>
+    <p>
+      <code>it</code>: a reference pointing to an octet within a UTF-8 encoded string.
+      After the function returns, it is decremented to point to the beginning of the
+      previous code point.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      previous code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars + <span class="literal">3</span>;
+<span class="keyword">int</span> cp = unchecked::prior (w);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars);
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::prior</code>. It does not
+      check for validity of the supplied UTF-8 sequence and offers no boundary checking.
+    </p>
+    <h4>
+      utf8::unchecked::previous (deprecated, see utf8::unchecked::prior)
+    </h4>
+    <p class="version">
+    Deprecated in version 1.02 and later.
+    </p>
+    <p>
+      Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it
+      decreases the iterator until it hits the beginning of the previous UTF-8 encoded
+      code point and returns the 32 bits representation of the code point.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt;
+uint32_t previous(octet_iterator&amp; it);
+   
+</pre>
+    <p>
+      <code>it</code>: a reference pointing to an octet within a UTF-8 encoded string.
+      After the function returns, it is decremented to point to the beginning of the
+      previous code point.<br>
+       <span class="return_value">Return value</span>: the 32 bit representation of the
+      previous code point.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars + <span class="literal">3</span>;
+<span class="keyword">int</span> cp = unchecked::previous (w);
+assert (cp == <span class="literal">0x65e5</span>);
+assert (w == twochars);
+</pre>
+    <p>
+     The reason this function is deprecated is just the consistency with the "checked"
+     versions, where <code>prior</code> should be used instead of <code>previous</code>.
+     In fact, <code>unchecked::previous</code> behaves exactly the same as <code>
+     unchecked::prior</code>
+    </p>
+    <p>
+      This is a faster but less safe version of <code>utf8::previous</code>. It does not
+      check for validity of the supplied UTF-8 sequence and offers no boundary checking.
+    </p>
+    <h4>
+      utf8::unchecked::advance
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Advances an iterator by the specified number of code points within an UTF-8
+      sequence.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator, typename distance_type&gt;
+<span class="keyword">void</span> advance (octet_iterator&amp; it, distance_type n);
+   
+</pre>
+    <p>
+      <code>it</code>: a reference to an iterator pointing to the beginning of an UTF-8
+      encoded code point. After the function returns, it is incremented to point to the
+      nth following code point.<br>
+       <code>n</code>: a positive integer that shows how many code points we want to
+      advance.<br>
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+<span class="keyword">char</span>* w = twochars;
+unchecked::advance (w, <span class="literal">2</span>);
+assert (w == twochars + <span class="literal">5</span>);
+</pre>
+    <p>
+      This function works only "forward". In case of a negative <code>n</code>, there is
+      no effect.
+    </p>
+    <p>
+      This is a faster but less safe version of <code>utf8::advance</code>. It does not
+      check for validity of the supplied UTF-8 sequence and offers no boundary checking.
+    </p>
+    <h4>
+      utf8::unchecked::distance
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Given the iterators to two UTF-8 encoded code points in a seqence, returns the
+      number of code points between them.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator&gt;
+<span class=
+"keyword">typename</span> std::iterator_traits&lt;octet_iterator&gt;::difference_type distance (octet_iterator first, octet_iterator last);
+</pre>
+    <p>
+      <code>first</code>: an iterator to a beginning of a UTF-8 encoded code point.<br>
+       <code>last</code>: an iterator to a "post-end" of the last UTF-8 encoded code
+      point in the sequence we are trying to determine the length. It can be the
+      beginning of a new code point, or not.<br>
+       <span class="return_value">Return value</span> the distance between the iterators,
+      in code points.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+size_t dist = utf8::unchecked::distance(twochars, twochars + <span class=
+"literal">5</span>);
+assert (dist == <span class="literal">2</span>);
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::distance</code>. It does not
+      check for validity of the supplied UTF-8 sequence.
+    </p>
+    <h4>
+      utf8::unchecked::utf16to8
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Converts a UTF-16 encoded string to UTF-8.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> u16bit_iterator, <span class=
+"keyword">typename</span> octet_iterator&gt;
+octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result);
+   
+</pre>
+    <p>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-16 encoded
+      string to convert.<br>
+       <code>end</code>: an iterator pointing to pass-the-end of the UTF-16 encoded
+      string to convert.<br>
+       <code>result</code>: an output iterator to the place in the UTF-8 string where to
+      append the result of conversion.<br>
+       <span class="return_value">Return value</span>: An iterator pointing to the place
+      after the appended UTF-8 string.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">unsigned short</span> utf16string[] = {<span class=
+"literal">0x41</span>, <span class="literal">0x0448</span>, <span class=
+"literal">0x65e5</span>, <span class="literal">0xd834</span>, <span class=
+"literal">0xdd1e</span>};
+vector&lt;<span class="keyword">unsigned char</span>&gt; utf8result;
+unchecked::utf16to8(utf16string, utf16string + <span class=
+"literal">5</span>, back_inserter(utf8result));
+assert (utf8result.size() == <span class="literal">10</span>);    
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::utf16to8</code>. It does not
+      check for validity of the supplied UTF-16 sequence.
+    </p>
+    <h4>
+      utf8::unchecked::utf8to16
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Converts an UTF-8 encoded string to UTF-16
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> u16bit_iterator, typename octet_iterator&gt;
+u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result);
+   
+</pre>
+    <p>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-8 encoded
+      string to convert. &lt; br /&gt; <code>end</code>: an iterator pointing to
+      pass-the-end of the UTF-8 encoded string to convert.<br>
+       <code>result</code>: an output iterator to the place in the UTF-16 string where to
+      append the result of conversion.<br>
+       <span class="return_value">Return value</span>: An iterator pointing to the place
+      after the appended UTF-16 string.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span> utf8_with_surrogates[] = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"</span>;
+vector &lt;<span class="keyword">unsigned short</span>&gt; utf16result;
+unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + <span class=
+"literal">9</span>, back_inserter(utf16result));
+assert (utf16result.size() == <span class="literal">4</span>);
+assert (utf16result[<span class="literal">2</span>] == <span class=
+"literal">0xd834</span>);
+assert (utf16result[<span class="literal">3</span>] == <span class=
+"literal">0xdd1e</span>);
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::utf8to16</code>. It does not
+      check for validity of the supplied UTF-8 sequence.
+    </p>
+    <h4>
+      utf8::unchecked::utf32to8
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Converts a UTF-32 encoded string to UTF-8.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator, <span class=
+"keyword">typename</span> u32bit_iterator&gt;
+octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result);
+   
+</pre>
+    <p>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-32 encoded
+      string to convert.<br>
+       <code>end</code>: an iterator pointing to pass-the-end of the UTF-32 encoded
+      string to convert.<br>
+       <code>result</code>: an output iterator to the place in the UTF-8 string where to
+      append the result of conversion.<br>
+       <span class="return_value">Return value</span>: An iterator pointing to the place
+      after the appended UTF-8 string.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">int</span> utf32string[] = {<span class=
+"literal">0x448</span>, <span class="literal">0x65e5</span>, <span class=
+"literal">0x10346</span>, <span class="literal">0</span>};
+vector&lt;<span class="keyword">unsigned char</span>&gt; utf8result;
+utf32to8(utf32string, utf32string + <span class=
+"literal">3</span>, back_inserter(utf8result));
+assert (utf8result.size() == <span class="literal">9</span>);
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::utf32to8</code>. It does not
+      check for validity of the supplied UTF-32 sequence.
+    </p>
+    <h4>
+      utf8::unchecked::utf8to32
+    </h4>
+    <p class="version">
+    Available in version 1.0 and later.
+    </p>
+    <p>
+      Converts a UTF-8 encoded string to UTF-32.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class=
+"keyword">typename</span> octet_iterator, typename u32bit_iterator&gt;
+u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result);
+   
+</pre>
+    <p>
+      <code>start</code>: an iterator pointing to the beginning of the UTF-8 encoded
+      string to convert.<br>
+       <code>end</code>: an iterator pointing to pass-the-end of the UTF-8 encoded string
+      to convert.<br>
+       <code>result</code>: an output iterator to the place in the UTF-32 string where to
+      append the result of conversion.<br>
+       <span class="return_value">Return value</span>: An iterator pointing to the place
+      after the appended UTF-32 string.
+    </p>
+    <p>
+      Example of use:
+    </p>
+<pre>
+<span class="keyword">char</span>* twochars = <span class=
+"literal">"\xe6\x97\xa5\xd1\x88"</span>;
+vector&lt;<span class="keyword">int</span>&gt; utf32result;
+unchecked::utf8to32(twochars, twochars + <span class=
+"literal">5</span>, back_inserter(utf32result));
+assert (utf32result.size() == <span class="literal">2</span>);
+</pre>
+    <p>
+      This is a faster but less safe version of <code>utf8::utf8to32</code>. It does not
+      check for validity of the supplied UTF-8 sequence.
+    </p>
+    <h3 id="typesunchecked">
+      Types From utf8::unchecked Namespace
+    </h3>
+    <h4>
+      utf8::iterator
+    </h4>
+    <p class="version">
+    Available in version 2.0 and later.
+    </p>
+    <p>
+      Adapts the underlying octet iterator to iterate over the sequence of code points,
+      rather than raw octets.
+    </p>
+<pre>
+<span class="keyword">template</span> &lt;<span class="keyword">typename</span> octet_iterator&gt;
+<span class="keyword">class</span> iterator;
+</pre>
+    
+    <h5>Member functions</h5>
+      <dl>
+      <dt><code>iterator();</code> <dd> the deafult constructor; the underlying <code>octet_iterator</code> is
+      constructed with its default constructor.
+      <dt><code><span class="keyword">explicit</span> iterator (const octet_iterator&amp; octet_it); 
+                         </code> <dd> a constructor 
+      that initializes the underlying <code>octet_iterator</code> with <code>octet_it</code>
+      <dt><code>octet_iterator base () <span class="keyword">const</span>;</code> <dd> returns the 
+      underlying <code>octet_iterator</code>.
+      <dt><code>uint32_t operator * () <span class="keyword">const</span>;</code> <dd> decodes the utf-8 sequence
+      the underlying <code>octet_iterator</code> is pointing to and returns the code point.
+      <dt><code><span class="keyword">bool operator</span> == (const iterator&amp; rhs)
+      <span class="keyword">const</span>;</code> <dd> returns <span class="keyword">true</span>
+      if the two underlaying iterators are equal.
+      <dt><code><span class="keyword">bool operator</span> != (const iterator&amp; rhs)
+      <span class="keyword">const</span>;</code> <dd> returns <span class="keyword">true</span>
+      if the two underlaying iterators are not equal.
+      <dt><code>iterator&amp; <span class="keyword">operator</span> ++ (); </code> <dd> the prefix increment - moves
+      the iterator to the next UTF-8 encoded code point.
+      <dt><code>iterator <span class="keyword">operator</span> ++ (<span class="keyword">int</span>); </code> <dd>
+      the postfix increment - moves the iterator to the next UTF-8 encoded code point and returns the current one.
+      <dt><code>iterator&amp; <span class="keyword">operator</span> -- (); </code> <dd> the prefix decrement - moves
+      the iterator to the previous UTF-8 encoded code point.
+      <dt><code>iterator <span class="keyword">operator</span> -- (<span class="keyword">int</span>); </code> <dd>
+      the postfix decrement - moves the iterator to the previous UTF-8 encoded code point and returns the current one.
+      </dl>
+      <p>
+      Example of use:
+      </p>
+<pre>
+<span class="keyword">char</span>* threechars = <span class="literal">"\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"</span>;
+utf8::unchecked::iterator&lt;<span class="keyword">char</span>*&gt; un_it(threechars);
+utf8::unchecked::iterator&lt;<span class="keyword">char</span>*&gt; un_it2 = un_it;
+assert (un_it2 == un_it);
+assert (*un_it == <span class="literal">0x10346</span>);
+assert (*(++un_it) == <span class="literal">0x65e5</span>);
+assert ((*un_it++) == <span class="literal">0x65e5</span>);
+assert (*un_it == <span class="literal">0x0448</span>);
+assert (un_it != un_it2);
+utf8::::unchecked::iterator&lt;<span class="keyword">char</span>*&gt; un_endit (threechars + <span class="literal">9</span>);  
+assert (++un_it == un_endit);
+assert (*(--un_it) == <span class="literal">0x0448</span>);
+assert ((*un_it--) == <span class="literal">0x0448</span>);
+assert (*un_it == <span class="literal">0x65e5</span>);
+assert (--un_it == utf8::unchecked::iterator&lt;<span class="keyword">char</span>*&gt;(threechars));
+assert (*un_it == <span class="literal">0x10346</span>);
+</pre>
+      <p>
+      This is an unchecked version of <code>utf8::iterator</code>. It is faster in many cases, but offers
+      no validity or range checks.
+      </p>
+    <h2 id="points">
+      Points of interest
+    </h2>
+    <h4>
+      Design goals and decisions
+    </h4>
+    <p>
+      The library was designed to be:
+    </p>
+    <ol>
+      <li>
+        Generic: for better or worse, there are many C++ string classes out there, and
+        the library should work with as many of them as possible.
+      </li>
+      <li>
+        Portable: the library should be portable both accross different platforms and
+        compilers. The only non-portable code is a small section that declares unsigned
+        integers of different sizes: three typedefs. They can be changed by the users of
+        the library if they don't match their platform. The default setting should work
+        for Windows (both 32 and 64 bit), and most 32 bit and 64 bit Unix derivatives.
+      </li>
+      <li>
+        Lightweight: follow the "pay only for what you use" guideline.
+      </li>
+      <li>
+        Unintrusive: avoid forcing any particular design or even programming style on the
+        user. This is a library, not a framework.
+      </li>
+    </ol>
+    <h4>
+      Alternatives
+    </h4>
+    <p>
+      In case you want to look into other means of working with UTF-8 strings from C++,
+      here is the list of solutions I am aware of:
+    </p>
+    <ol>
+      <li>
+        <a href="http://icu.sourceforge.net/">ICU Library</a>. It is very powerful,
+        complete, feature-rich, mature, and widely used. Also big, intrusive,
+        non-generic, and doesn't play well with the Standard Library. I definitelly
+        recommend looking at ICU even if you don't plan to use it.
+      </li>
+      <li>
+        C++11 language and library features. Still far from complete, and not widely
+        supported by compiler vendors. 
+      </li>
+      <li>
+        <a href=
+        "http://www.gtkmm.org/gtkmm2/docs/tutorial/html/ch03s04.html">Glib::ustring</a>.
+        A class specifically made to work with UTF-8 strings, and also feel like
+        <code>std::string</code>. If you prefer to have yet another string class in your
+        code, it may be worth a look. Be aware of the licensing issues, though.
+      </li>
+      <li>
+        Platform dependent solutions: Windows and POSIX have functions to convert strings
+        from one encoding to another. That is only a subset of what my library offers,
+        but if that is all you need it may be good enough.
+      </li>
+    </ol>
+    <h2 id="links">
+      Links
+    </h2>
+    <ol>
+      <li>
+        <a href="http://www.unicode.org/">The Unicode Consortium</a>.
+      </li>
+      <li>
+        <a href="http://icu.sourceforge.net/">ICU Library</a>.
+      </li>
+      <li>
+        <a href="http://en.wikipedia.org/wiki/UTF-8">UTF-8 at Wikipedia</a>
+      </li>
+      <li>
+        <a href="http://www.cl.cam.ac.uk/~mgk25/unicode.html">UTF-8 and Unicode FAQ for
+        Unix/Linux</a>
+      </li>
+    </ol>
+  </body>
+</html>
diff --git a/thirdparty/assimp/include/assimp/.editorconfig b/thirdparty/assimp/include/assimp/.editorconfig
deleted file mode 100644
index 9ea66423ad..0000000000
--- a/thirdparty/assimp/include/assimp/.editorconfig
+++ /dev/null
@@ -1,8 +0,0 @@
-# See <http://EditorConfig.org> for details
-
-[*.{h,hpp,inl}]
-end_of_line = lf
-insert_final_newline = true
-trim_trailing_whitespace = true
-indent_size = 4
-indent_style = space
diff --git a/thirdparty/assimp/include/assimp/BaseImporter.h b/thirdparty/assimp/include/assimp/BaseImporter.h
index 48dfc8ed8b..55f7fe3754 100644
--- a/thirdparty/assimp/include/assimp/BaseImporter.h
+++ b/thirdparty/assimp/include/assimp/BaseImporter.h
@@ -48,8 +48,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <vector>
 #include <set>
+#include <map>
 #include <assimp/types.h>
 #include <assimp/ProgressHandler.hpp>
+#include <assimp/ai_assert.h>
 
 struct aiScene;
 struct aiImporterDesc;
@@ -80,6 +82,10 @@ class IOStream;
 class ASSIMP_API BaseImporter {
     friend class Importer;
 
+private:
+    /* Pushes state into importer for the importer scale */
+    virtual void UpdateImporterScale( Importer* pImp );
+
 public:
 
     /** Constructor to be privately used by #Importer */
@@ -132,7 +138,7 @@ public:
      *  a suitable response to the caller.
      */
     aiScene* ReadFile(
-        const Importer* pImp,
+        Importer* pImp,
         const std::string& pFile,
         IOSystem* pIOHandler
         );
@@ -161,14 +167,65 @@ public:
      *  some loader features. Importers must provide this information. */
     virtual const aiImporterDesc* GetInfo() const = 0;
 
+    /**
+     * Will be called only by scale process when scaling is requested.
+     */
+    virtual void SetFileScale(double scale)
+    {
+        fileScale = scale;
+    }
+
+    virtual double GetFileScale() const
+    {
+        return fileScale;
+    }
+
+    enum ImporterUnits {
+        M,
+        MM,
+        CM,
+        INCHES,
+        FEET
+    };
+
+    /**
+     * Assimp Importer
+     * unit conversions available 
+     * if you need another measurment unit add it below.
+     * it's currently defined in assimp that we prefer meters.
+     * */
+    std::map<ImporterUnits, double> importerUnits = {
+        {ImporterUnits::M, 1},
+        {ImporterUnits::CM, 0.01},
+        {ImporterUnits::MM, 0.001},
+        {ImporterUnits::INCHES, 0.0254},
+        {ImporterUnits::FEET, 0.3048}
+    };
+
+    virtual void SetApplicationUnits( const ImporterUnits& unit )
+    {
+        importerScale = importerUnits[unit];
+        applicationUnits = unit;
+    }
+
+    virtual const ImporterUnits& GetApplicationUnits()
+    {
+        return applicationUnits;
+    }
+
     // -------------------------------------------------------------------
     /** Called by #Importer::GetExtensionList for each loaded importer.
      *  Take the extension list contained in the structure returned by
      *  #GetInfo and insert all file extensions into the given set.
      *  @param extension set to collect file extensions in*/
     void GetExtensionList(std::set<std::string>& extensions);
+    
+protected:    
+    ImporterUnits applicationUnits = ImporterUnits::M;
+    double importerScale = 1.0;
+    double fileScale = 1.0;
+
 
-protected:
 
     // -------------------------------------------------------------------
     /** Imports the given file into the given scene structure. The
diff --git a/thirdparty/assimp/include/assimp/config.h.in b/thirdparty/assimp/include/assimp/config.h.in
index d08b929a10..3a6379bf40 100644
--- a/thirdparty/assimp/include/assimp/config.h.in
+++ b/thirdparty/assimp/include/assimp/config.h.in
@@ -142,7 +142,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 /** @brief  Specifies the maximum angle that may be between two vertex tangents
  *         that their tangents and bi-tangents are smoothed.
  *
- * This applies to the CalcTangentSpace-Step. TFvhe angle is specified
+ * This applies to the CalcTangentSpace-Step. The angle is specified
  * in degrees. The maximum value is 175.
  * Property type: float. Default value: 45 degrees
  */
@@ -999,6 +999,13 @@ enum aiComponent
 #   define AI_CONFIG_GLOBAL_SCALE_FACTOR_DEFAULT  1.0f
 #endif // !! AI_DEBONE_THRESHOLD
 
+#define AI_CONFIG_APP_SCALE_KEY "APP_SCALE_FACTOR"
+
+#if (!defined AI_CONFIG_APP_SCALE_KEY)
+#   define AI_CONFIG_APP_SCALE_KEY 1.0
+#endif // AI_CONFIG_APP_SCALE_KEY
+
+
 // ---------- All the Build/Compile-time defines ------------
 
 /** @brief Specifies if double precision is supported inside assimp
diff --git a/thirdparty/assimp/include/assimp/irrXMLWrapper.h b/thirdparty/assimp/include/assimp/irrXMLWrapper.h
deleted file mode 100644
index ec8ee7c76e..0000000000
--- a/thirdparty/assimp/include/assimp/irrXMLWrapper.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
-Open Asset Import Library (assimp)
-----------------------------------------------------------------------
-
-Copyright (c) 2006-2019, assimp team
-
-
-All rights reserved.
-
-Redistribution and use of this software in source and binary forms,
-with or without modification, are permitted provided that the
-following conditions are met:
-
-* Redistributions of source code must retain the above
-copyright notice, this list of conditions and the
-following disclaimer.
-
-* Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the
-following disclaimer in the documentation and/or other
-materials provided with the distribution.
-
-* Neither the name of the assimp team, nor the names of its
-contributors may be used to endorse or promote products
-derived from this software without specific prior
-written permission of the assimp team.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-----------------------------------------------------------------------
-*/
-
-#ifndef INCLUDED_AI_IRRXML_WRAPPER
-#define INCLUDED_AI_IRRXML_WRAPPER
-
-// some long includes ....
-#include <irrXML.h>
-#include "IOStream.hpp"
-#include "BaseImporter.h"
-#include <vector>
-
-namespace Assimp    {
-
-// ---------------------------------------------------------------------------------
-/** @brief Utility class to make IrrXML work together with our custom IO system
- *  See the IrrXML docs for more details.
- *
- *  Construct IrrXML-Reader in BaseImporter::InternReadFile():
- *  @code
- * // open the file
- * std::unique_ptr<IOStream> file( pIOHandler->Open( pFile));
- * if( file.get() == NULL) {
- *    throw DeadlyImportError( "Failed to open file " + pFile + ".");
- * }
- *
- * // generate a XML reader for it
- * std::unique_ptr<CIrrXML_IOStreamReader> mIOWrapper( new CIrrXML_IOStreamReader( file.get()));
- * mReader = irr::io::createIrrXMLReader( mIOWrapper.get());
- * if( !mReader) {
- *    ThrowException( "xxxx: Unable to open file.");
- * }
- * @endcode
- **/
-class CIrrXML_IOStreamReader : public irr::io::IFileReadCallBack {
-public:
-
-    // ----------------------------------------------------------------------------------
-    //! Construction from an existing IOStream
-    explicit CIrrXML_IOStreamReader(IOStream* _stream)
-        : stream (_stream)
-        , t (0)
-    {
-
-        // Map the buffer into memory and convert it to UTF8. IrrXML provides its
-        // own conversion, which is merely a cast from uintNN_t to uint8_t. Thus,
-        // it is not suitable for our purposes and we have to do it BEFORE IrrXML
-        // gets the buffer. Sadly, this forces us to map the whole file into
-        // memory.
-
-        data.resize(stream->FileSize());
-        stream->Read(&data[0],data.size(),1);
-
-        // Remove null characters from the input sequence otherwise the parsing will utterly fail
-        unsigned int size = 0;
-        unsigned int size_max = static_cast<unsigned int>(data.size());
-        for(unsigned int i = 0; i < size_max; i++) {
-            if(data[i] != '\0') {
-                data[size++] = data[i];
-            }
-        }
-        data.resize(size);
-
-        BaseImporter::ConvertToUTF8(data);
-    }
-
-    // ----------------------------------------------------------------------------------
-    //! Virtual destructor
-    virtual ~CIrrXML_IOStreamReader() {}
-
-    // ----------------------------------------------------------------------------------
-    //!   Reads an amount of bytes from the file.
-    /**  @param buffer:       Pointer to output buffer.
-     *   @param sizeToRead:   Amount of bytes to read
-     *   @return              Returns how much bytes were read.  */
-    virtual int read(void* buffer, int sizeToRead)  {
-        if(sizeToRead<0) {
-            return 0;
-        }
-        if(t+sizeToRead>data.size()) {
-            sizeToRead = static_cast<int>(data.size()-t);
-        }
-
-        memcpy(buffer,&data.front()+t,sizeToRead);
-
-        t += sizeToRead;
-        return sizeToRead;
-    }
-
-    // ----------------------------------------------------------------------------------
-    //! Returns size of file in bytes
-    virtual int getSize()   {
-        return (int)data.size();
-    }
-
-private:
-    IOStream* stream;
-    std::vector<char> data;
-    size_t t;
-
-}; // ! class CIrrXML_IOStreamReader
-
-} // ! Assimp
-
-#endif // !! INCLUDED_AI_IRRXML_WRAPPER
diff --git a/thirdparty/assimp/include/assimp/scene.h b/thirdparty/assimp/include/assimp/scene.h
index df5d6f3b5e..2667db85b3 100644
--- a/thirdparty/assimp/include/assimp/scene.h
+++ b/thirdparty/assimp/include/assimp/scene.h
@@ -56,9 +56,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "material.h"
 #include "anim.h"
 #include "metadata.h"
-#include <cstdlib>
 
 #ifdef __cplusplus
+#  include <cstdlib>
 extern "C" {
 #endif
 
diff --git a/thirdparty/xatlas/avoid-failing-on-bad-geometry.patch b/thirdparty/xatlas/avoid-failing-on-bad-geometry.patch
deleted file mode 100644
index a28cd9f82b..0000000000
--- a/thirdparty/xatlas/avoid-failing-on-bad-geometry.patch
+++ /dev/null
@@ -1,157 +0,0 @@
-diff --git a/thirdparty/xatlas/xatlas.cpp b/thirdparty/xatlas/xatlas.cpp
-index df5ef94db..eb0824a51 100644
---- a/thirdparty/xatlas/xatlas.cpp
-+++ b/thirdparty/xatlas/xatlas.cpp
-@@ -1276,6 +1276,9 @@ class Vertex
- {
- public:
- 	uint32_t id;
-+	// -- GODOT start --
-+	uint32_t original_id;
-+	// -- GODOT end --
- 	Edge *edge;
- 	Vertex *next;
- 	Vertex *prev;
-@@ -1283,7 +1286,10 @@ public:
- 	Vector3 nor;
- 	Vector2 tex;
- 
--	Vertex(uint32_t id) : id(id), edge(NULL), pos(0.0f), nor(0.0f), tex(0.0f)
-+	// -- GODOT start --
-+	//Vertex(uint32_t id) : id(id), edge(NULL), pos(0.0f), nor(0.0f), tex(0.0f)
-+	Vertex(uint32_t id) : id(id), original_id(id), edge(NULL), pos(0.0f), nor(0.0f), tex(0.0f)
-+	// -- GODOT end --
- 	{
- 		next = this;
- 		prev = this;
-@@ -1934,6 +1940,64 @@ public:
- 		return f;
- 	}
- 
-+	// -- GODOT start --
-+	Face *addUniqueFace(uint32_t v0, uint32_t v1, uint32_t v2) {
-+
-+		int base_vertex = m_vertexArray.size();
-+
-+		uint32_t ids[3] = { v0, v1, v2 };
-+
-+		Vector3 base[3] = {
-+			m_vertexArray[v0]->pos,
-+			m_vertexArray[v1]->pos,
-+			m_vertexArray[v2]->pos,
-+		};
-+
-+		//make sure its not a degenerate
-+		bool degenerate = distanceSquared(base[0], base[1]) < NV_EPSILON || distanceSquared(base[0], base[2]) < NV_EPSILON || distanceSquared(base[1], base[2]) < NV_EPSILON;
-+		xaDebugAssert(!degenerate);
-+
-+		float min_x = 0;
-+
-+		for (int i = 0; i < 3; i++) {
-+			if (i == 0 || m_vertexArray[v0]->pos.x < min_x) {
-+				min_x = m_vertexArray[v0]->pos.x;
-+			}
-+		}
-+
-+		float max_x = 0;
-+
-+		for (int j = 0; j < m_vertexArray.size(); j++) {
-+			if (j == 0 || m_vertexArray[j]->pos.x > max_x) { //vertex already exists
-+				max_x = m_vertexArray[j]->pos.x;
-+			}
-+		}
-+
-+		//separate from everything else, in x axis
-+		for (int i = 0; i < 3; i++) {
-+
-+			base[i].x -= min_x;
-+			base[i].x += max_x + 10.0;
-+		}
-+
-+		for (int i = 0; i < 3; i++) {
-+			Vertex *v = new Vertex(m_vertexArray.size());
-+			v->pos = base[i];
-+			v->nor = m_vertexArray[ids[i]]->nor,
-+			v->tex = m_vertexArray[ids[i]]->tex,
-+
-+			v->original_id = ids[i];
-+			m_vertexArray.push_back(v);
-+		}
-+
-+		uint32_t indexArray[3];
-+		indexArray[0] = base_vertex + 0;
-+		indexArray[1] = base_vertex + 1;
-+		indexArray[2] = base_vertex + 2;
-+		return addFace(indexArray, 3, 0, 3);
-+	}
-+	// -- GODOT end --
-+
- 	// These functions disconnect the given element from the mesh and delete it.
- 
- 	// @@ We must always disconnect edge pairs simultaneously.
-@@ -2915,6 +2979,14 @@ Mesh *triangulate(const Mesh *inputMesh)
- 					Vector2 p0 = polygonPoints[i0];
- 					Vector2 p1 = polygonPoints[i1];
- 					Vector2 p2 = polygonPoints[i2];
-+
-+					// -- GODOT start --
-+					bool degenerate = distance(p0, p1) < NV_EPSILON || distance(p0, p2) < NV_EPSILON || distance(p1, p2) < NV_EPSILON;
-+					if (degenerate) {
-+						continue;
-+					}
-+					// -- GODOT end --
-+
- 					float d = clamp(dot(p0 - p1, p2 - p1) / (length(p0 - p1) * length(p2 - p1)), -1.0f, 1.0f);
- 					float angle = acosf(d);
- 					float area = triangleArea(p0, p1, p2);
-@@ -2938,6 +3010,11 @@ Mesh *triangulate(const Mesh *inputMesh)
- 						}
- 					}
- 				}
-+				// -- GODOT start --
-+				if (!bestIsValid)
-+					break;
-+				// -- GODOT end --
-+
- 				xaDebugAssert(minAngle <= 2 * PI);
- 				// Clip best ear:
- 				uint32_t i0 = (bestEar + size - 1) % size;
-@@ -5606,7 +5683,10 @@ public:
- 				}
- 				if (chartMeshIndices[vertex->id] == ~0) {
- 					chartMeshIndices[vertex->id] = m_chartMesh->vertexCount();
--					m_chartToOriginalMap.push_back(vertex->id);
-+					// -- GODOT start --
-+					//m_chartToOriginalMap.push_back(vertex->id);
-+					m_chartToOriginalMap.push_back(vertex->original_id);
-+					// -- GODOT end --
- 					m_chartToUnifiedMap.push_back(unifiedMeshIndices[unifiedVertex->id]);
- 					halfedge::Vertex *v = m_chartMesh->addVertex(vertex->pos);
- 					v->nor = vertex->nor;
-@@ -5699,7 +5779,10 @@ public:
- 				const halfedge::Vertex *vertex = it.current()->vertex;
- 				if (chartMeshIndices[vertex->id] == ~0) {
- 					chartMeshIndices[vertex->id] = m_chartMesh->vertexCount();
--					m_chartToOriginalMap.push_back(vertex->id);
-+					// -- GODOT start --
-+					//m_chartToOriginalMap.push_back(vertex->id);
-+					m_chartToOriginalMap.push_back(vertex->original_id);
-+					// -- GODOT end --
- 					halfedge::Vertex *v = m_chartMesh->addVertex(vertex->pos);
- 					v->nor = vertex->nor;
- 					v->tex = vertex->tex; // @@ Not necessary.
-@@ -7573,6 +7656,14 @@ AddMeshError AddMesh(Atlas *atlas, const InputMesh &mesh, bool useColocalVertice
- 			}
- 		}
- 		internal::halfedge::Face *face = heMesh->addFace(tri[0], tri[1], tri[2]);
-+
-+		// -- GODOT start --
-+		if (!face && heMesh->errorCode == internal::halfedge::Mesh::ErrorCode::AlreadyAddedEdge) {
-+			//there is still hope for this, no reason to not add, at least add as separate
-+			face = heMesh->addUniqueFace(tri[0], tri[1], tri[2]);
-+		}
-+		// -- GODOT end --
-+
- 		if (!face) {
- 			if (heMesh->errorCode == internal::halfedge::Mesh::ErrorCode::AlreadyAddedEdge)
- 				error.code = AddMeshErrorCode::AlreadyAddedEdge;
diff --git a/thirdparty/xatlas/build-fix-limits.patch b/thirdparty/xatlas/build-fix-limits.patch
deleted file mode 100644
index 00d07371c0..0000000000
--- a/thirdparty/xatlas/build-fix-limits.patch
+++ /dev/null
@@ -1,14 +0,0 @@
-diff --git a/thirdparty/xatlas/xatlas.h b/thirdparty/xatlas/xatlas.h
-index 7e556c6c3..dbf8ca08c 100644
---- a/thirdparty/xatlas/xatlas.h
-+++ b/thirdparty/xatlas/xatlas.h
-@@ -3,6 +3,9 @@
- #ifndef XATLAS_H
- #define XATLAS_H
- #include <float.h> // FLT_MAX
-+// -- GODOT start --
-+#include <limits.h> // INT_MAX, UINT_MAX
-+// -- GODOT end --
- 
- namespace xatlas {
- 
diff --git a/thirdparty/xatlas/xatlas.cpp b/thirdparty/xatlas/xatlas.cpp
index 1b30305cd4..56794211a6 100644
--- a/thirdparty/xatlas/xatlas.cpp
+++ b/thirdparty/xatlas/xatlas.cpp
@@ -93,8 +93,24 @@ Copyright (c) 2012 Brandon Pelfrey
 #define XA_ALLOC(tag, type) (type *)internal::Realloc(nullptr, sizeof(type), tag, __FILE__, __LINE__)
 #define XA_ALLOC_ARRAY(tag, type, num) (type *)internal::Realloc(nullptr, sizeof(type) * num, tag, __FILE__, __LINE__)
 #define XA_REALLOC(tag, ptr, type, num) (type *)internal::Realloc(ptr, sizeof(type) * num, tag, __FILE__, __LINE__)
+#define XA_REALLOC_SIZE(tag, ptr, size) (uint8_t *)internal::Realloc(ptr, size, tag, __FILE__, __LINE__)
 #define XA_FREE(ptr) internal::Realloc(ptr, 0, internal::MemTag::Default, __FILE__, __LINE__)
-#define XA_NEW(tag, type, ...) new (XA_ALLOC(tag, type)) type(__VA_ARGS__)
+#define XA_NEW(tag, type) new (XA_ALLOC(tag, type)) type()
+#define XA_NEW_ARGS(tag, type, ...) new (XA_ALLOC(tag, type)) type(__VA_ARGS__)
+
+#ifdef _MSC_VER
+#define XA_INLINE __forceinline
+#else
+#define XA_INLINE inline
+#endif
+
+#if defined(__clang__) || defined(__GNUC__)
+#define XA_NODISCARD [[nodiscard]]
+#elif defined(_MSC_VER)
+#define XA_NODISCARD _Check_return_
+#else
+#define XA_NODISCARD
+#endif
 
 #define XA_UNUSED(a) ((void)(a))
 
@@ -102,6 +118,7 @@ Copyright (c) 2012 Brandon Pelfrey
 #define XA_MERGE_CHARTS 1
 #define XA_MERGE_CHARTS_MIN_NORMAL_DEVIATION 0.5f
 #define XA_RECOMPUTE_CHARTS 1
+#define XA_SKIP_PARAMETERIZATION 0 // Use the orthogonal parameterization from segment::Atlas
 #define XA_CLOSE_HOLES_CHECK_EDGE_INTERSECTION 0
 
 #define XA_DEBUG_HEAP 0
@@ -140,6 +157,7 @@ namespace xatlas {
 namespace internal {
 
 static ReallocFunc s_realloc = realloc;
+static FreeFunc s_free = free;
 static PrintFunc s_print = printf;
 static bool s_printVerbose = false;
 
@@ -167,6 +185,7 @@ struct AllocHeader
 	const char *file;
 	int line;
 	int tag;
+	uint32_t id;
 	AllocHeader *prev, *next;
 	bool free;
 };
@@ -174,6 +193,7 @@ struct AllocHeader
 static std::mutex s_allocMutex;
 static AllocHeader *s_allocRoot = nullptr;
 static size_t s_allocTotalSize = 0, s_allocPeakSize = 0, s_allocTotalTagSize[MemTag::Count] = { 0 }, s_allocPeakTagSize[MemTag::Count] = { 0 };
+static uint32_t s_allocId =0 ;
 static constexpr uint32_t kAllocRedzone = 0x12345678;
 
 static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line)
@@ -214,6 +234,7 @@ static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line
 	header->file = file;
 	header->line = line;
 	header->tag = tag;
+	header->id = s_allocId++;
 	header->free = false;
 	if (!s_allocRoot) {
 		s_allocRoot = header;
@@ -242,7 +263,7 @@ static void ReportLeaks()
 	AllocHeader *header = s_allocRoot;
 	while (header) {
 		if (!header->free) {
-			printf("   Leak: %zu bytes %s %d\n", header->size, header->file, header->line);
+			printf("   Leak: ID %u, %zu bytes, %s %d\n", header->id, header->size, header->file, header->line);
 			anyLeaks = true;
 		}
 		auto redzone = (const uint32_t *)((const uint8_t *)header + header->size - sizeof(kAllocRedzone));
@@ -287,6 +308,10 @@ static void PrintMemoryUsage()
 #else
 static void *Realloc(void *ptr, size_t size, int /*tag*/, const char * /*file*/, int /*line*/)
 {
+	if (ptr && size == 0 && s_free) {
+		s_free(ptr);
+		return nullptr;
+	}
 	void *mem = s_realloc(ptr, size);
 	if (size > 0) {
 		XA_DEBUG_ASSERT(mem);
@@ -304,6 +329,7 @@ static void *Realloc(void *ptr, size_t size, int /*tag*/, const char * /*file*/,
 struct ProfileData
 {
 	clock_t addMeshReal;
+	clock_t addMeshCopyData;
 	std::atomic<clock_t> addMeshThread;
 	std::atomic<clock_t> addMeshCreateColocals;
 	std::atomic<clock_t> addMeshCreateFaceGroups;
@@ -312,11 +338,14 @@ struct ProfileData
 	std::atomic<clock_t> addMeshCreateChartGroupsThread;
 	clock_t computeChartsReal;
 	std::atomic<clock_t> computeChartsThread;
-	std::atomic<clock_t> atlasBuilder;
-	std::atomic<clock_t> atlasBuilderInit;
-	std::atomic<clock_t> atlasBuilderCreateInitialCharts;
-	std::atomic<clock_t> atlasBuilderGrowCharts;
-	std::atomic<clock_t> atlasBuilderMergeCharts;
+	std::atomic<clock_t> buildAtlas;
+	std::atomic<clock_t> buildAtlasInit;
+	std::atomic<clock_t> buildAtlasPlaceSeeds;
+	std::atomic<clock_t> buildAtlasRelocateSeeds;
+	std::atomic<clock_t> buildAtlasResetCharts;
+	std::atomic<clock_t> buildAtlasGrowCharts;
+	std::atomic<clock_t> buildAtlasMergeCharts;
+	std::atomic<clock_t> buildAtlasFillHoles;
 	std::atomic<clock_t> createChartMeshesReal;
 	std::atomic<clock_t> createChartMeshesThread;
 	std::atomic<clock_t> fixChartMeshTJunctions;
@@ -327,11 +356,15 @@ struct ProfileData
 	std::atomic<clock_t> parameterizeChartsLSCM;
 	std::atomic<clock_t> parameterizeChartsEvaluateQuality;
 	clock_t packCharts;
+	clock_t packChartsAddCharts;
+	std::atomic<clock_t> packChartsAddChartsThread;
+	std::atomic<clock_t> packChartsAddChartsRestoreTexcoords;
 	clock_t packChartsRasterize;
 	clock_t packChartsDilate;
 	clock_t packChartsFindLocation;
 	std::atomic<clock_t> packChartsFindLocationThread;
 	clock_t packChartsBlit;
+	clock_t buildOutputMeshes;
 };
 
 static ProfileData s_profile;
@@ -540,10 +573,10 @@ static bool operator!=(const Vector2 &a, const Vector2 &b)
 	return a.x != b.x || a.y != b.y;
 }
 
-static Vector2 operator+(const Vector2 &a, const Vector2 &b)
+/*static Vector2 operator+(const Vector2 &a, const Vector2 &b)
 {
 	return Vector2(a.x + b.x, a.y + b.y);
-}
+}*/
 
 static Vector2 operator-(const Vector2 &a, const Vector2 &b)
 {
@@ -738,11 +771,6 @@ static Vector3 operator*(const Vector3 &v, float s)
 	return Vector3(v.x * s, v.y * s, v.z * s);
 }
 
-static Vector3 operator*(float s, const Vector3 &v)
-{
-	return Vector3(v.x * s, v.y * s, v.z * s);
-}
-
 static Vector3 operator/(const Vector3 &v, float s)
 {
 	return v * (1.0f / s);
@@ -949,282 +977,202 @@ struct AABB
 	Vector3 min, max;
 };
 
-template <typename T>
-static void construct_range(T * ptr, uint32_t new_size, uint32_t old_size) {
-	for (uint32_t i = old_size; i < new_size; i++) {
-		new(ptr+i) T; // placement new
-	}
-}
-
-template <typename T>
-static void construct_range(T * ptr, uint32_t new_size, uint32_t old_size, const T & elem) {
-	for (uint32_t i = old_size; i < new_size; i++) {
-		new(ptr+i) T(elem); // placement new
-	}
-}
-
-template <typename T>
-static void construct_range(T * ptr, uint32_t new_size, uint32_t old_size, const T * src) {
-	for (uint32_t i = old_size; i < new_size; i++) {
-		new(ptr+i) T(src[i]); // placement new
-	}
-}
-
-template <typename T>
-static void destroy_range(T * ptr, uint32_t new_size, uint32_t old_size) {
-	for (uint32_t i = new_size; i < old_size; i++) {
-		(ptr+i)->~T(); // Explicit call to the destructor
-	}
-}
-
-/**
-* Replacement for std::vector that is easier to debug and provides
-* some nice foreach enumerators. 
-*/
-template<typename T>
-class Array {
-public:
-	typedef uint32_t size_type;
-
-	Array(int memTag = MemTag::Default) : m_memTag(memTag), m_buffer(nullptr), m_capacity(0), m_size(0) {}
+struct ArrayBase
+{
+	ArrayBase(uint32_t elementSize, int memTag = MemTag::Default) : buffer(nullptr), elementSize(elementSize), size(0), capacity(0), memTag(memTag) {}
 
-	Array(const Array &a) : m_memTag(a.m_memTag), m_buffer(nullptr), m_capacity(0), m_size(0)
+	~ArrayBase()
 	{
-		copy(a.m_buffer, a.m_size);
+		XA_FREE(buffer);
 	}
 
-	~Array()
+	XA_INLINE void clear()
 	{
-		destroy();
+		size = 0;
 	}
 
-	const Array<T> &operator=(const Array<T> &other)
+	void copyTo(ArrayBase &other) const
 	{
-		m_memTag = other.m_memTag;
-		m_buffer = other.m_buffer;
-		m_capacity = other.m_capacity;
-		m_size = other.m_size;
-		return *this;
+		XA_DEBUG_ASSERT(elementSize == other.elementSize);
+		other.resize(size, true);
+		memcpy(other.buffer, buffer, size * elementSize);
 	}
 
-	const T & operator[]( uint32_t index ) const
-	{
-		XA_DEBUG_ASSERT(index < m_size);
-		return m_buffer[index];
-	}
-	
-	T & operator[] ( uint32_t index )
+	void destroy()
 	{
-		XA_DEBUG_ASSERT(index < m_size);
-		return m_buffer[index];
+		size = 0;
+		XA_FREE(buffer);
+		buffer = nullptr;
+		capacity = 0;
+		size = 0;
 	}
 
-	uint32_t size() const { return m_size; }
-	const T * data() const { return m_buffer; }
-	T * data() { return m_buffer; }
-	T * begin() { return m_buffer; }
-	T * end() { return m_buffer + m_size; }
-	const T * begin() const { return m_buffer; }
-	const T * end() const { return m_buffer + m_size; }
-	bool isEmpty() const { return m_size == 0; }
-
-	void push_back( const T & val )
+	// Insert the given element at the given index shifting all the elements up.
+	void insertAt(uint32_t index, const uint8_t *value)
 	{
-		XA_DEBUG_ASSERT(&val < m_buffer || &val >= m_buffer+m_size);
-		uint32_t old_size = m_size;
-		uint32_t new_size = m_size + 1;
-		setArraySize(new_size);
-		construct_range(m_buffer, new_size, old_size, val);
+		XA_DEBUG_ASSERT(index >= 0 && index <= size);
+		resize(size + 1, false);
+		if (index < size - 1)
+			memmove(buffer + elementSize * (index + 1), buffer + elementSize * index, elementSize * (size - 1 - index));
+		memcpy(&buffer[index * elementSize], value, elementSize);
 	}
 
-	void pop_back()
+	void moveTo(ArrayBase &other)
 	{
-		XA_DEBUG_ASSERT( m_size > 0 );
-		resize( m_size - 1 );
+		XA_DEBUG_ASSERT(elementSize == other.elementSize);
+		other.destroy();
+		other.buffer = buffer;
+		other.elementSize = elementSize;
+		other.size = size;
+		other.capacity = capacity;
+		other.memTag = memTag;
+		buffer = nullptr;
+		elementSize = size = capacity = 0;
 	}
 
-	const T & back() const
+	void pop_back()
 	{
-		XA_DEBUG_ASSERT( m_size > 0 );
-		return m_buffer[m_size-1];
+		XA_DEBUG_ASSERT(size > 0);
+		resize(size - 1, false);
 	}
 
-	T & back()
+	void push_back(const uint8_t *value)
 	{
-		XA_DEBUG_ASSERT( m_size > 0 );
-		return m_buffer[m_size-1];
+		XA_DEBUG_ASSERT(value < buffer || value >= buffer + size);
+		resize(size + 1, false);
+		memcpy(&buffer[(size - 1) * elementSize], value, elementSize);
 	}
 
-	const T & front() const
+	// Remove the element at the given index. This is an expensive operation!
+	void removeAt(uint32_t index)
 	{
-		XA_DEBUG_ASSERT( m_size > 0 );
-		return m_buffer[0];
+		XA_DEBUG_ASSERT(index >= 0 && index < size);
+		if (size != 1)
+			memmove(buffer + elementSize * index, buffer + elementSize * (index + 1), elementSize * (size - 1 - index));
+		size--;
 	}
 
-	T & front()
+	void reserve(uint32_t desiredSize)
 	{
-		XA_DEBUG_ASSERT( m_size > 0 );
-		return m_buffer[0];
+		if (desiredSize > capacity)
+			setArrayCapacity(desiredSize);
 	}
 
-	// Remove the element at the given index. This is an expensive operation!
-	void removeAt(uint32_t index)
+	void resize(uint32_t newSize, bool exact)
 	{
-		XA_DEBUG_ASSERT(index >= 0 && index < m_size);
-		if (m_size == 1) {
-			clear();
-		}
-		else {
-			m_buffer[index].~T();
-			memmove(m_buffer+index, m_buffer+index+1, sizeof(T) * (m_size - 1 - index));
-			m_size--;
+		size = newSize;
+		if (size > capacity) {
+			// First allocation is always exact. Otherwise, following allocations grow array to 150% of desired size.
+			uint32_t newBufferSize;
+			if (capacity == 0 || exact)
+				newBufferSize = size;
+			else
+				newBufferSize = size + (size >> 2);
+			setArrayCapacity(newBufferSize);
 		}
 	}
 
-	// Insert the given element at the given index shifting all the elements up.
-	void insertAt(uint32_t index, const T & val = T())
+	void setArrayCapacity(uint32_t newCapacity)
 	{
-		XA_DEBUG_ASSERT( index >= 0 && index <= m_size );
-		setArraySize(m_size + 1);
-		if (index < m_size - 1) {
-			memmove(m_buffer+index+1, m_buffer+index, sizeof(T) * (m_size - 1 - index));
+		XA_DEBUG_ASSERT(newCapacity >= size);
+		if (newCapacity == 0) {
+			// free the buffer.
+			if (buffer != nullptr) {
+				XA_FREE(buffer);
+				buffer = nullptr;
+			}
+		} else {
+			// realloc the buffer
+			buffer = XA_REALLOC_SIZE(memTag, buffer, newCapacity * elementSize);
 		}
-		// Copy-construct into the newly opened slot.
-		new(m_buffer+index) T(val);
-	}
-
-	void append(const Array<T> & other)
-	{
-		append(other.m_buffer, other.m_size);
+		capacity = newCapacity;
 	}
 
-	void resize(uint32_t new_size)
-	{
-		uint32_t old_size = m_size;
-		// Destruct old elements (if we're shrinking).
-		destroy_range(m_buffer, new_size, old_size);
-		setArraySize(new_size);
-		// Call default constructors
-		construct_range(m_buffer, new_size, old_size);
-	}
+	uint8_t *buffer;
+	uint32_t elementSize;
+	uint32_t size;
+	uint32_t capacity;
+	int memTag;
+};
 
-	void resize(uint32_t new_size, const T & elem)
-	{
-		XA_DEBUG_ASSERT(&elem < m_buffer || &elem > m_buffer+m_size);
-		uint32_t old_size = m_size;
-		// Destruct old elements (if we're shrinking).
-		destroy_range(m_buffer, new_size, old_size);
-		setArraySize(new_size);
-		// Call copy constructors
-		construct_range(m_buffer, new_size, old_size, elem);
-	}
+template<typename T>
+class Array
+{
+public:
+	Array(int memTag = MemTag::Default) : m_base(sizeof(T), memTag) {}
+	Array(const Array&) = delete;
+	const Array &operator=(const Array &) = delete;
 
-	void clear()
+	XA_INLINE const T &operator[](uint32_t index) const
 	{
-		// Destruct old elements
-		destroy_range(m_buffer, 0, m_size);
-		m_size = 0;
+		XA_DEBUG_ASSERT(index < m_base.size);
+		return ((const T *)m_base.buffer)[index];
 	}
 
-	void destroy()
+	XA_INLINE T &operator[](uint32_t index)
 	{
-		clear();
-		XA_FREE(m_buffer);
-		m_buffer = nullptr;
-		m_capacity = 0;
-		m_size = 0;
+		XA_DEBUG_ASSERT(index < m_base.size);
+		return ((T *)m_base.buffer)[index];
 	}
 
-	void reserve(uint32_t desired_size)
+	XA_INLINE const T &back() const
 	{
-		if (desired_size > m_capacity) {
-			setArrayCapacity(desired_size);
-		}
+		XA_DEBUG_ASSERT(!isEmpty());
+		return ((const T *)m_base.buffer)[m_base.size - 1];
 	}
 
-	void copy(const T * data, uint32_t count)
-	{
-		destroy_range(m_buffer, 0, m_size);
-		setArraySize(count);
-		construct_range(m_buffer, count, 0, data);
-	}
+	XA_INLINE T *begin() { return (T *)m_base.buffer; }
+	XA_INLINE void clear() { m_base.clear(); }
+	void copyTo(Array &other) const { m_base.copyTo(other.m_base); }
+	XA_INLINE const T *data() const { return (const T *)m_base.buffer; }
+	XA_INLINE T *data() { return (T *)m_base.buffer; }
+	XA_INLINE T *end() { return (T *)m_base.buffer + m_base.size; }
+	XA_INLINE bool isEmpty() const { return m_base.size == 0; }
+	void insertAt(uint32_t index, const T &value) { m_base.insertAt(index, (const uint8_t *)&value); }
+	void moveTo(Array &other) { m_base.moveTo(other.m_base); }
+	void push_back(const T &value) { m_base.push_back((const uint8_t *)&value); }
+	void pop_back() { m_base.pop_back(); }
+	void removeAt(uint32_t index) { m_base.removeAt(index); }
+	void reserve(uint32_t desiredSize) { m_base.reserve(desiredSize); }
+	void resize(uint32_t newSize) { m_base.resize(newSize, true); }
 
-	void moveTo(Array<T> &other)
+	void setAll(const T &value)
 	{
-		other.destroy();
-		swap(m_buffer, other.m_buffer);
-		swap(m_capacity, other.m_capacity);
-		swap(m_size, other.m_size);
+		auto buffer = (T *)m_base.buffer;
+		for (uint32_t i = 0; i < m_base.size; i++)
+			buffer[i] = value;
 	}
 
-protected:
-	void setArraySize(uint32_t new_size)
-	{
-		m_size = new_size;
-		if (new_size > m_capacity) {
-			uint32_t new_buffer_size;
-			if (m_capacity == 0) {
-				// first allocation is exact
-				new_buffer_size = new_size;
-			}
-			else {
-				// following allocations grow array by 25%
-				new_buffer_size = new_size + (new_size >> 2);
-			}
-			setArrayCapacity( new_buffer_size );
-		}
-	}
-	void setArrayCapacity(uint32_t new_capacity)
-	{
-		XA_DEBUG_ASSERT(new_capacity >= m_size);
-		if (new_capacity == 0) {
-			// free the buffer.
-			if (m_buffer != nullptr) {
-				XA_FREE(m_buffer);
-				m_buffer = nullptr;
-			}
-		}
-		else {
-			// realloc the buffer
-			m_buffer = XA_REALLOC(m_memTag, m_buffer, T, new_capacity);
-		}
-		m_capacity = new_capacity;
-	}
+	XA_INLINE uint32_t size() const { return m_base.size; }
+	XA_INLINE void zeroOutMemory() { memset(m_base.buffer, 0, m_base.elementSize * m_base.size); }
 
-	int m_memTag;
-	T * m_buffer;
-	uint32_t m_capacity;
-	uint32_t m_size;
+private:
+	ArrayBase m_base;
 };
 
-/// Basis class to compute tangent space basis, ortogonalizations and to
-/// transform vectors from one space to another.
+/// Basis class to compute tangent space basis, ortogonalizations and to transform vectors from one space to another.
 struct Basis
 {
-	void buildFrameForDirection(const Vector3 &d, float angle = 0)
+	XA_NODISCARD static Vector3 computeTangent(const Vector3 &normal)
 	{
-		XA_ASSERT(isNormalized(d));
-		normal = d;
+		XA_ASSERT(isNormalized(normal));
 		// Choose minimum axis.
-		if (fabsf(normal.x) < fabsf(normal.y) && fabsf(normal.x) < fabsf(normal.z)) {
+		Vector3 tangent;
+		if (fabsf(normal.x) < fabsf(normal.y) && fabsf(normal.x) < fabsf(normal.z))
 			tangent = Vector3(1, 0, 0);
-		} else if (fabsf(normal.y) < fabsf(normal.z)) {
+		else if (fabsf(normal.y) < fabsf(normal.z))
 			tangent = Vector3(0, 1, 0);
-		} else {
+		else
 			tangent = Vector3(0, 0, 1);
-		}
 		// Ortogonalize
 		tangent -= normal * dot(normal, tangent);
 		tangent = normalize(tangent, kEpsilon);
-		bitangent = cross(normal, tangent);
-		// Rotate frame around normal according to angle.
-		if (angle != 0.0f) {
-			float c = cosf(angle);
-			float s = sinf(angle);
-			Vector3 tmp = c * tangent - s * bitangent;
-			bitangent = s * tangent + c * bitangent;
-			tangent = tmp;
-		}
+		return tangent;
+	}
+
+	XA_NODISCARD static Vector3 computeBitangent(const Vector3 &normal, const Vector3 &tangent)
+	{
+		return cross(normal, tangent);
 	}
 
 	Vector3 tangent = Vector3(0.0f);
@@ -1246,7 +1194,7 @@ public:
 	void resize(uint32_t new_size)
 	{
 		m_size = new_size;
-		m_wordArray.resize( (m_size + 31) >> 5 );
+		m_wordArray.resize((m_size + 31) >> 5);
 	}
 
 	/// Get bit.
@@ -1286,35 +1234,28 @@ public:
 	{
 		m_rowStride = (m_width + 63) >> 6;
 		m_data.resize(m_rowStride * m_height);
+		m_data.zeroOutMemory();
 	}
 
-	BitImage(const BitImage &other)
-	{
-		m_width = other.m_width;
-		m_height = other.m_height;
-		m_rowStride = other.m_rowStride;
-		m_data.resize(m_rowStride * m_height);
-		memcpy(m_data.data(), other.m_data.data(), m_rowStride * m_height * sizeof(uint64_t));
-	}
+	BitImage(const BitImage &other) = delete;
+	const BitImage &operator=(const BitImage &other) = delete;
+	uint32_t width() const { return m_width; }
+	uint32_t height() const { return m_height; }
 
-	const BitImage &operator=(const BitImage &other)
+	void copyTo(BitImage &other)
 	{
-		m_width = other.m_width;
-		m_height = other.m_height;
-		m_rowStride = other.m_rowStride;
-		m_data = other.m_data;
-		return *this;
+		other.m_width = m_width;
+		other.m_height = m_height;
+		other.m_rowStride = m_rowStride;
+		m_data.copyTo(other.m_data);
 	}
 
-	uint32_t width() const { return m_width; }
-	uint32_t height() const { return m_height; }
-
 	void resize(uint32_t w, uint32_t h, bool discard)
 	{
 		const uint32_t rowStride = (w + 63) >> 6;
 		if (discard) {
 			m_data.resize(rowStride * h);
-			memset(m_data.data(), 0, m_data.size() * sizeof(uint64_t));
+			m_data.zeroOutMemory();
 		} else {
 			Array<uint64_t> tmp;
 			tmp.resize(rowStride * h);
@@ -1351,7 +1292,7 @@ public:
 
 	void clearAll()
 	{
-		memset(m_data.data(), 0, m_data.size() * sizeof(uint64_t));
+		m_data.zeroOutMemory();
 	}
 
 	bool canBlit(const BitImage &image, uint32_t offsetX, uint32_t offsetY) const
@@ -1405,7 +1346,7 @@ public:
 						tmp.setBitAt(x, y);
 				}
 			}
-			swap(m_data, tmp.m_data);
+			tmp.m_data.copyTo(m_data);
 		}
 	}
 
@@ -1754,36 +1695,28 @@ class FullVector
 {
 public:
 	FullVector(uint32_t dim) { m_array.resize(dim); }
-	FullVector(const FullVector &v) : m_array(v.m_array) {}
-
-	const FullVector &operator=(const FullVector &v)
-	{
-		XA_ASSERT(dimension() == v.dimension());
-		m_array = v.m_array;
-		return *this;
-	}
-
-	uint32_t dimension() const { return m_array.size(); }
-	const float &operator[]( uint32_t index ) const { return m_array[index]; }
-	float &operator[] ( uint32_t index ) { return m_array[index]; }
+	FullVector(const FullVector &v) { v.m_array.copyTo(m_array); }
+	const FullVector &operator=(const FullVector &v) = delete;
+	XA_INLINE uint32_t dimension() const { return m_array.size(); }
+	XA_INLINE const float &operator[](uint32_t index) const { return m_array[index]; }
+	XA_INLINE float &operator[](uint32_t index) { return m_array[index]; }
 
 	void fill(float f)
 	{
 		const uint32_t dim = dimension();
-		for (uint32_t i = 0; i < dim; i++) {
+		for (uint32_t i = 0; i < dim; i++)
 			m_array[i] = f;
-		}
 	}
 
 private:
 	Array<float> m_array;
 };
 
-template<typename Key, typename Value, typename H = Hash<Key>, typename E = Equal<Key> >
+template<typename Key, typename H = Hash<Key>, typename E = Equal<Key> >
 class HashMap
 {
 public:
-	HashMap(int memTag, uint32_t size) : m_memTag(memTag), m_size(size), m_numSlots(0), m_slots(nullptr), m_keys(memTag), m_values(memTag), m_next(memTag)
+	HashMap(int memTag, uint32_t size) : m_memTag(memTag), m_size(size), m_numSlots(0), m_slots(nullptr), m_keys(memTag), m_next(memTag)
 	{
 	}
 
@@ -1793,15 +1726,12 @@ public:
 			XA_FREE(m_slots);
 	}
 
-	const Value &value(uint32_t index) const { return m_values[index]; }
-
-	void add(const Key &key, const Value &value)
+	void add(const Key &key)
 	{
 		if (!m_slots)
 			alloc();
 		const uint32_t hash = computeHash(key);
 		m_keys.push_back(key);
-		m_values.push_back(value);
 		m_next.push_back(m_slots[hash]);
 		m_slots[hash] = m_next.size() - 1;
 	}
@@ -1842,7 +1772,6 @@ private:
 		for (uint32_t i = 0; i < m_numSlots; i++)
 			m_slots[i] = UINT32_MAX;
 		m_keys.reserve(m_size);
-		m_values.reserve(m_size);
 		m_next.reserve(m_size);
 	}
 
@@ -1857,7 +1786,6 @@ private:
 	uint32_t m_numSlots;
 	uint32_t *m_slots;
 	Array<Key> m_keys;
-	Array<Value> m_values;
 	Array<uint32_t> m_next;
 };
 
@@ -2190,7 +2118,7 @@ private:
 			}
 		}
 		// Remove duplicate element.
-		XA_DEBUG_ASSERT(output.front() == output.back());
+		XA_DEBUG_ASSERT(output.size() > 0);
 		output.pop_back();
 	}
 
@@ -2284,7 +2212,7 @@ public:
 				const EdgeKey key(vertex0, vertex1);
 				if (m_edgeMap.get(key) != UINT32_MAX)
 					result = AddFaceResult::DuplicateEdge;
-				m_edgeMap.add(key, firstIndex + i);
+				m_edgeMap.add(key);
 			}
 		}
 		return result;
@@ -2301,7 +2229,9 @@ public:
 		Array<uint32_t> colocals;
 		Array<uint32_t> potential;
 		m_colocalVertexCount = 0;
-		m_nextColocalVertex.resize(vertexCount, UINT32_MAX);
+		m_nextColocalVertex.resize(vertexCount);
+		for (uint32_t i = 0; i < vertexCount; i++)
+			m_nextColocalVertex[i] = UINT32_MAX;
 		for (uint32_t i = 0; i < vertexCount; i++) {
 			if (m_nextColocalVertex[i] != UINT32_MAX)
 				continue; // Already linked.
@@ -2467,12 +2397,10 @@ public:
 	void linkBoundaries()
 	{
 		const uint32_t edgeCount = m_indices.size();
-		HashMap<uint32_t, uint32_t> vertexToEdgeMap(MemTag::Mesh, edgeCount);
+		HashMap<uint32_t> vertexToEdgeMap(MemTag::Mesh, edgeCount); // Edge is index / 2
 		for (uint32_t i = 0; i < edgeCount; i++) {
-			const uint32_t vertex0 = m_indices[meshEdgeIndex0(i)];
-			const uint32_t vertex1 = m_indices[meshEdgeIndex1(i)];
-			vertexToEdgeMap.add(vertex0, i);
-			vertexToEdgeMap.add(vertex1, i);
+			vertexToEdgeMap.add(m_indices[meshEdgeIndex0(i)]);
+			vertexToEdgeMap.add(m_indices[meshEdgeIndex1(i)]);
 		}
 		m_nextBoundaryEdges.resize(edgeCount);
 		for (uint32_t i = 0; i < edgeCount; i++)
@@ -2497,9 +2425,9 @@ public:
 				const uint32_t startVertex = m_indices[meshEdgeIndex1(currentEdge)];
 				uint32_t bestNextEdge = UINT32_MAX;
 				for (ColocalVertexIterator it(this, startVertex); !it.isDone(); it.advance()) {
-					uint32_t mapOtherEdgeIndex = vertexToEdgeMap.get(it.vertex());
-					while (mapOtherEdgeIndex != UINT32_MAX) {
-						const uint32_t otherEdge = vertexToEdgeMap.value(mapOtherEdgeIndex);
+					uint32_t mapIndex = vertexToEdgeMap.get(it.vertex());
+					while (mapIndex != UINT32_MAX) {
+						const uint32_t otherEdge = mapIndex / 2; // Two vertices added per edge.
 						if (m_oppositeEdges[otherEdge] != UINT32_MAX)
 							goto next; // Not a boundary edge.
 						if (linkedEdges.bitAt(otherEdge))
@@ -2515,7 +2443,7 @@ public:
 						if (bestNextEdge != firstEdge && (bestNextEdge == UINT32_MAX || it.vertex() == startVertex))
 							bestNextEdge = otherEdge;
 					next:
-						mapOtherEdgeIndex = vertexToEdgeMap.getNext(mapOtherEdgeIndex);
+						mapIndex = vertexToEdgeMap.getNext(mapIndex);
 					}
 				}
 				if (bestNextEdge == UINT32_MAX) {
@@ -2567,9 +2495,8 @@ public:
 		uint32_t result = UINT32_MAX;
 		if (m_nextColocalVertex.isEmpty()) {
 			EdgeKey key(vertex0, vertex1);
-			uint32_t mapEdgeIndex = m_edgeMap.get(key);
-			while (mapEdgeIndex != UINT32_MAX) {
-				const uint32_t edge = m_edgeMap.value(mapEdgeIndex);
+			uint32_t edge = m_edgeMap.get(key);
+			while (edge != UINT32_MAX) {
 				// Don't find edges of ignored faces.
 				if ((faceGroup == UINT32_MAX || m_faceGroups[meshEdgeFace(edge)] == faceGroup) && !isFaceIgnored(meshEdgeFace(edge))) {
 					//XA_DEBUG_ASSERT(m_id != UINT32_MAX || (m_id == UINT32_MAX && result == UINT32_MAX)); // duplicate edge - ignore on initial meshes
@@ -2578,15 +2505,14 @@ public:
 					return result;
 #endif
 				}
-				mapEdgeIndex = m_edgeMap.getNext(mapEdgeIndex);
+				edge = m_edgeMap.getNext(edge);
 			}
 		} else {
 			for (ColocalVertexIterator it0(this, vertex0); !it0.isDone(); it0.advance()) {
 				for (ColocalVertexIterator it1(this, vertex1); !it1.isDone(); it1.advance()) {
 					EdgeKey key(it0.vertex(), it1.vertex());
-					uint32_t mapEdgeIndex = m_edgeMap.get(key);
-					while (mapEdgeIndex != UINT32_MAX) {
-						const uint32_t edge = m_edgeMap.value(mapEdgeIndex);
+					uint32_t edge = m_edgeMap.get(key);
+					while (edge != UINT32_MAX) {
 						// Don't find edges of ignored faces.
 						if ((faceGroup == UINT32_MAX || m_faceGroups[meshEdgeFace(edge)] == faceGroup) && !isFaceIgnored(meshEdgeFace(edge))) {
 							XA_DEBUG_ASSERT(m_id != UINT32_MAX || (m_id == UINT32_MAX && result == UINT32_MAX)); // duplicate edge - ignore on initial meshes
@@ -2595,7 +2521,7 @@ public:
 							return result;
 #endif
 						}
-						mapEdgeIndex = m_edgeMap.getNext(mapEdgeIndex);
+						edge = m_edgeMap.getNext(edge);
 					}
 				}
 			}
@@ -2802,24 +2728,24 @@ public:
 		return false;
 	}
 
-	float epsilon() const { return m_epsilon; }
-	uint32_t edgeCount() const { return m_indices.size(); }
-	uint32_t oppositeEdge(uint32_t edge) const { return m_oppositeEdges[edge]; }
-	bool isBoundaryEdge(uint32_t edge) const { return m_oppositeEdges[edge] == UINT32_MAX; }
-	bool isBoundaryVertex(uint32_t vertex) const { return m_boundaryVertices[vertex]; }
-	uint32_t colocalVertexCount() const { return m_colocalVertexCount; }
-	uint32_t vertexCount() const { return m_positions.size(); }
-	uint32_t vertexAt(uint32_t i) const { return m_indices[i]; }
-	const Vector3 &position(uint32_t vertex) const { return m_positions[vertex]; }
-	const Vector3 &normal(uint32_t vertex) const { XA_DEBUG_ASSERT(m_flags & MeshFlags::HasNormals); return m_normals[vertex]; }
-	const Vector2 &texcoord(uint32_t vertex) const { return m_texcoords[vertex]; }
-	Vector2 &texcoord(uint32_t vertex) { return m_texcoords[vertex]; }
-	Vector2 *texcoords() { return m_texcoords.data(); }
-	uint32_t faceCount() const { return m_indices.size() / 3; }
-	uint32_t faceGroupCount() const { XA_DEBUG_ASSERT(m_flags & MeshFlags::HasFaceGroups); return m_faceGroups.size(); }
-	uint32_t faceGroupAt(uint32_t face) const { XA_DEBUG_ASSERT(m_flags & MeshFlags::HasFaceGroups); return m_faceGroups[face]; }
-	const uint32_t *indices() const { return m_indices.data(); }
-	uint32_t indexCount() const { return m_indices.size(); }
+	XA_INLINE float epsilon() const { return m_epsilon; }
+	XA_INLINE uint32_t edgeCount() const { return m_indices.size(); }
+	XA_INLINE uint32_t oppositeEdge(uint32_t edge) const { return m_oppositeEdges[edge]; }
+	XA_INLINE bool isBoundaryEdge(uint32_t edge) const { return m_oppositeEdges[edge] == UINT32_MAX; }
+	XA_INLINE bool isBoundaryVertex(uint32_t vertex) const { return m_boundaryVertices[vertex]; }
+	XA_INLINE uint32_t colocalVertexCount() const { return m_colocalVertexCount; }
+	XA_INLINE uint32_t vertexCount() const { return m_positions.size(); }
+	XA_INLINE uint32_t vertexAt(uint32_t i) const { return m_indices[i]; }
+	XA_INLINE const Vector3 &position(uint32_t vertex) const { return m_positions[vertex]; }
+	XA_INLINE const Vector3 &normal(uint32_t vertex) const { XA_DEBUG_ASSERT(m_flags & MeshFlags::HasNormals); return m_normals[vertex]; }
+	XA_INLINE const Vector2 &texcoord(uint32_t vertex) const { return m_texcoords[vertex]; }
+	XA_INLINE Vector2 &texcoord(uint32_t vertex) { return m_texcoords[vertex]; }
+	XA_INLINE Vector2 *texcoords() { return m_texcoords.data(); }
+	XA_INLINE uint32_t faceCount() const { return m_indices.size() / 3; }
+	XA_INLINE uint32_t faceGroupCount() const { XA_DEBUG_ASSERT(m_flags & MeshFlags::HasFaceGroups); return m_faceGroups.size(); }
+	XA_INLINE uint32_t faceGroupAt(uint32_t face) const { XA_DEBUG_ASSERT(m_flags & MeshFlags::HasFaceGroups); return m_faceGroups[face]; }
+	XA_INLINE const uint32_t *indices() const { return m_indices.data(); }
+	XA_INLINE uint32_t indexCount() const { return m_indices.size(); }
 
 private:
 	bool isFaceIgnored(uint32_t face) const { return (m_flags & MeshFlags::HasIgnoredFaces) && m_faceIgnore[face]; }
@@ -2865,7 +2791,7 @@ private:
 		uint32_t v1;
 	};
 
-	HashMap<EdgeKey, uint32_t> m_edgeMap;
+	HashMap<EdgeKey> m_edgeMap;
 
 public:
 	class BoundaryEdgeIterator
@@ -2950,37 +2876,37 @@ public:
 
 		bool isDone() const
 		{
-			return m_vertex0It.isDone() && m_vertex1It.isDone() && m_mapEdgeIndex == UINT32_MAX;
+			return m_vertex0It.isDone() && m_vertex1It.isDone() && m_edge == UINT32_MAX;
 		}
 
 		uint32_t edge() const
 		{
-			return m_mesh->m_edgeMap.value(m_mapEdgeIndex);
+			return m_edge;
 		}
 
 	private:
 		void resetElement()
 		{
-			m_mapEdgeIndex = m_mesh->m_edgeMap.get(Mesh::EdgeKey(m_vertex0It.vertex(), m_vertex1It.vertex()));
-			while (m_mapEdgeIndex != UINT32_MAX) {
+			m_edge = m_mesh->m_edgeMap.get(Mesh::EdgeKey(m_vertex0It.vertex(), m_vertex1It.vertex()));
+			while (m_edge != UINT32_MAX) {
 				if (!isIgnoredFace())
 					break;
-				m_mapEdgeIndex = m_mesh->m_edgeMap.getNext(m_mapEdgeIndex);
+				m_edge = m_mesh->m_edgeMap.getNext(m_edge);
 			}
-			if (m_mapEdgeIndex == UINT32_MAX)
+			if (m_edge == UINT32_MAX)
 				advanceVertex1();
 		}
 
 		void advanceElement()
 		{
 			for (;;) {
-				m_mapEdgeIndex = m_mesh->m_edgeMap.getNext(m_mapEdgeIndex);
-				if (m_mapEdgeIndex == UINT32_MAX)
+				m_edge = m_mesh->m_edgeMap.getNext(m_edge);
+				if (m_edge == UINT32_MAX)
 					break;
 				if (!isIgnoredFace())
 					break;
 			}
-			if (m_mapEdgeIndex == UINT32_MAX)
+			if (m_edge == UINT32_MAX)
 				advanceVertex1();
 		}
 
@@ -3004,14 +2930,13 @@ public:
 
 		bool isIgnoredFace() const
 		{
-			const uint32_t edge = m_mesh->m_edgeMap.value(m_mapEdgeIndex);
-			return m_mesh->m_faceIgnore[meshEdgeFace(edge)];
+			return m_mesh->m_faceIgnore[meshEdgeFace(m_edge)];
 		}
 
 		const Mesh *m_mesh;
 		ColocalVertexIterator m_vertex0It, m_vertex1It;
 		const uint32_t m_vertex1;
-		uint32_t m_mapEdgeIndex;
+		uint32_t m_edge;
 	};
 
 	class FaceEdgeIterator 
@@ -3334,7 +3259,7 @@ static Mesh *meshFixTJunctions(const Mesh &inputMesh, bool *duplicatedEdge, bool
 	if (splitEdges.isEmpty())
 		return nullptr;
 	const uint32_t faceCount = inputMesh.faceCount();
-	Mesh *mesh = XA_NEW(MemTag::Mesh, Mesh, inputMesh.epsilon(), vertexCount + splitEdges.size(), faceCount);
+	Mesh *mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, inputMesh.epsilon(), vertexCount + splitEdges.size(), faceCount);
 	for (uint32_t v = 0; v < vertexCount; v++)
 		mesh->addVertex(inputMesh.position(v));
 	Array<uint32_t> indexArray;
@@ -3567,8 +3492,9 @@ public:
 		}
 		m_workers.resize(std::thread::hardware_concurrency() <= 1 ? 1 : std::thread::hardware_concurrency() - 1);
 		for (uint32_t i = 0; i < m_workers.size(); i++) {
+			new (&m_workers[i]) Worker();
 			m_workers[i].wakeup = false;
-			m_workers[i].thread = XA_NEW(MemTag::Default, std::thread, workerThread, this, &m_workers[i]);
+			m_workers[i].thread = XA_NEW_ARGS(MemTag::Default, std::thread, workerThread, this, &m_workers[i]);
 		}
 	}
 
@@ -3584,6 +3510,7 @@ public:
 				worker.thread->join();
 			worker.thread->~thread();
 			XA_FREE(worker.thread);
+			worker.~Worker();
 		}
 		for (uint32_t i = 0; i < m_maxGroups; i++)
 			m_groups[i].~TaskGroup();
@@ -3770,6 +3697,7 @@ private:
 
 struct UvMeshChart
 {
+	Array<uint32_t> faces;
 	Array<uint32_t> indices;
 	uint32_t material;
 };
@@ -3828,7 +3756,7 @@ public:
 		m_numVertices = p;
 	}
 
-	void clipVerticalPlane(float offset, float clipdirection )
+	void clipVerticalPlane(float offset, float clipdirection)
 	{
 		Vector2 *v  = m_vertexBuffers[m_activeVertexBuffer];
 		m_activeVertexBuffer ^= 1;
@@ -3877,7 +3805,7 @@ public:
 		computeArea();
 	}
 
-	float area()
+	float area() const
 	{
 		return m_area;
 	}
@@ -3959,9 +3887,8 @@ struct Triangle
 				if ( (aC >= BK_INSIDE) && (bC >= BK_INSIDE) && (cC >= BK_INSIDE) ) {
 					for (float y = y0; y < y0 + BK_SIZE; y++) {
 						for (float x = x0; x < x0 + BK_SIZE; x++) {
-							if (!cb(param, (int)x, (int)y)) {
+							if (!cb(param, (int)x, (int)y))
 								return false;
-							}
 						}
 					}
 				} else { // Partially covered block
@@ -3974,17 +3901,15 @@ struct Triangle
 						float CX3 = CY3;
 						for (float x = x0; x < x0 + BK_SIZE; x++) { // @@ This is not clipping to scissor rectangle correctly.
 							if (CX1 >= PX_INSIDE && CX2 >= PX_INSIDE && CX3 >= PX_INSIDE) {
-								if (!cb(param, (int)x, (int)y)) {
+								if (!cb(param, (int)x, (int)y))
 									return false;
-								}
 							} else if ((CX1 >= PX_OUTSIDE) && (CX2 >= PX_OUTSIDE) && (CX3 >= PX_OUTSIDE)) {
 								// triangle partially covers pixel. do clipping.
 								ClippedTriangle ct(v1 - Vector2(x, y), v2 - Vector2(x, y), v3 - Vector2(x, y));
 								ct.clipAABox(-0.5, -0.5, 0.5, 0.5);
 								if (ct.area() > 0.0f) {
-									if (!cb(param, (int)x, (int)y)) {
+									if (!cb(param, (int)x, (int)y))
 										return false;
-									}
 								}
 							}
 							CX1 += n1.x;
@@ -4065,18 +3990,28 @@ public:
 		float v; // value
 	};
 
-	Matrix(uint32_t d) : m_width(d) { m_array.resize(d); }
-	Matrix(uint32_t w, uint32_t h) : m_width(w) { m_array.resize(h); }
-	Matrix(const Matrix &m) : m_width(m.m_width) { m_array = m.m_array; }
-
-	const Matrix &operator=(const Matrix &m)
+	Matrix(uint32_t d) : m_width(d)
 	{
-		XA_ASSERT(width() == m.width());
-		XA_ASSERT(height() == m.height());
-		m_array = m.m_array;
-		return *this;
+		m_array.resize(d);
+		for (uint32_t i = 0; i < m_array.size(); i++)
+			new (&m_array[i]) Array<Coefficient>();
+	}
+	
+	Matrix(uint32_t w, uint32_t h) : m_width(w)
+	{
+		m_array.resize(h);
+		for (uint32_t i = 0; i < m_array.size(); i++)
+			new (&m_array[i]) Array<Coefficient>();
+	}
+	
+	~Matrix()
+	{
+		for (uint32_t i = 0; i < m_array.size(); i++)
+			m_array[i].~Array();
 	}
 
+	Matrix(const Matrix &m) = delete;
+	const Matrix &operator=(const Matrix &m) = delete;
 	uint32_t width() const { return m_width; }
 	uint32_t height() const { return m_array.size(); }
 	bool isSquare() const { return width() == height(); }
@@ -4274,428 +4209,7 @@ static void mult(const Matrix &A, const Matrix &B, Matrix &C)
 
 } // namespace sparse
 
-class JacobiPreconditioner
-{
-public:
-	JacobiPreconditioner(const sparse::Matrix &M, bool symmetric) : m_inverseDiagonal(M.width())
-	{
-		XA_ASSERT(M.isSquare());
-		for (uint32_t x = 0; x < M.width(); x++) {
-			float elem = M.getCoefficient(x, x);
-			//XA_DEBUG_ASSERT( elem != 0.0f ); // This can be zero in the presence of zero area triangles.
-			if (symmetric) {
-				m_inverseDiagonal[x] = (elem != 0) ? 1.0f / sqrtf(fabsf(elem)) : 1.0f;
-			} else {
-				m_inverseDiagonal[x] = (elem != 0) ? 1.0f / elem : 1.0f;
-			}
-		}
-	}
-
-	void apply(const FullVector &x, FullVector &y) const
-	{
-		XA_DEBUG_ASSERT(x.dimension() == m_inverseDiagonal.dimension());
-		XA_DEBUG_ASSERT(y.dimension() == m_inverseDiagonal.dimension());
-		// @@ Wrap vector component-wise product into a separate function.
-		const uint32_t D = x.dimension();
-		for (uint32_t i = 0; i < D; i++) {
-			y[i] = m_inverseDiagonal[i] * x[i];
-		}
-	}
-
-private:
-	FullVector m_inverseDiagonal;
-};
-
-// Linear solvers.
-class Solver
-{
-public:
-	// Solve the symmetric system: At·A·x = At·b
-	static bool LeastSquaresSolver(const sparse::Matrix &A, const FullVector &b, FullVector &x, float epsilon = 1e-5f)
-	{
-		XA_DEBUG_ASSERT(A.width() == x.dimension());
-		XA_DEBUG_ASSERT(A.height() == b.dimension());
-		XA_DEBUG_ASSERT(A.height() >= A.width()); // @@ If height == width we could solve it directly...
-		const uint32_t D = A.width();
-		sparse::Matrix At(A.height(), A.width());
-		sparse::transpose(A, At);
-		FullVector Atb(D);
-		sparse::mult(At, b, Atb);
-		sparse::Matrix AtA(D);
-		sparse::mult(At, A, AtA);
-		return SymmetricSolver(AtA, Atb, x, epsilon);
-	}
-
-	// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
-	static bool LeastSquaresSolver(const sparse::Matrix &A, const FullVector &b, FullVector &x, const uint32_t *lockedParameters, uint32_t lockedCount, float epsilon = 1e-5f)
-	{
-		XA_DEBUG_ASSERT(A.width() == x.dimension());
-		XA_DEBUG_ASSERT(A.height() == b.dimension());
-		XA_DEBUG_ASSERT(A.height() >= A.width() - lockedCount);
-		// @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
-		const uint32_t D = A.width() - lockedCount;
-		XA_DEBUG_ASSERT(D > 0);
-		// Compute: b - Al * xl
-		FullVector b_Alxl(b);
-		for (uint32_t y = 0; y < A.height(); y++) {
-			const uint32_t count = A.getRow(y).size();
-			for (uint32_t e = 0; e < count; e++) {
-				uint32_t column = A.getRow(y)[e].x;
-				bool isFree = true;
-				for (uint32_t i = 0; i < lockedCount; i++) {
-					isFree &= (lockedParameters[i] != column);
-				}
-				if (!isFree) {
-					b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
-				}
-			}
-		}
-		// Remove locked columns from A.
-		sparse::Matrix Af(D, A.height());
-		for (uint32_t y = 0; y < A.height(); y++) {
-			const uint32_t count = A.getRow(y).size();
-			for (uint32_t e = 0; e < count; e++) {
-				uint32_t column = A.getRow(y)[e].x;
-				uint32_t ix = column;
-				bool isFree = true;
-				for (uint32_t i = 0; i < lockedCount; i++) {
-					isFree &= (lockedParameters[i] != column);
-					if (column > lockedParameters[i]) ix--; // shift columns
-				}
-				if (isFree) {
-					Af.setCoefficient(ix, y, A.getRow(y)[e].v);
-				}
-			}
-		}
-		// Remove elements from x
-		FullVector xf(D);
-		for (uint32_t i = 0, j = 0; i < A.width(); i++) {
-			bool isFree = true;
-			for (uint32_t l = 0; l < lockedCount; l++) {
-				isFree &= (lockedParameters[l] != i);
-			}
-			if (isFree) {
-				xf[j++] = x[i];
-			}
-		}
-		// Solve reduced system.
-		bool result = LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
-		// Copy results back to x.
-		for (uint32_t i = 0, j = 0; i < A.width(); i++) {
-			bool isFree = true;
-			for (uint32_t l = 0; l < lockedCount; l++) {
-				isFree &= (lockedParameters[l] != i);
-			}
-			if (isFree) {
-				x[i] = xf[j++];
-			}
-		}
-		return result;
-	}
-
-private:
-	/**
-	* Compute the solution of the sparse linear system Ab=x using the Conjugate
-	* Gradient method.
-	*
-	* Solving sparse linear systems:
-	* (1)		A·x = b
-	*
-	* The conjugate gradient algorithm solves (1) only in the case that A is
-	* symmetric and positive definite. It is based on the idea of minimizing the
-	* function
-	*
-	* (2)		f(x) = 1/2·x·A·x - b·x
-	*
-	* This function is minimized when its gradient
-	*
-	* (3)		df = A·x - b
-	*
-	* is zero, which is equivalent to (1). The minimization is carried out by
-	* generating a succession of search directions p.k and improved minimizers x.k.
-	* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k·p.k),
-	* and x.k+1 is set equal to the new point x.k + alfa.k·p.k. The p.k and x.k are
-	* built up in such a way that x.k+1 is also the minimizer of f over the whole
-	* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N
-	* iterations you arrive at the minimizer over the entire vector space, i.e., the
-	* solution to (1).
-	*
-	* For a really good explanation of the method see:
-	*
-	* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
-	* Jonhathan Richard Shewchuk.
-	*
-	**/
-	// Conjugate gradient with preconditioner.
-	static bool ConjugateGradientSolver(const JacobiPreconditioner &preconditioner, const sparse::Matrix &A, const FullVector &b, FullVector &x, float epsilon)
-	{
-		XA_DEBUG_ASSERT( A.isSquare() );
-		XA_DEBUG_ASSERT( A.width() == b.dimension() );
-		XA_DEBUG_ASSERT( A.width() == x.dimension() );
-		int i = 0;
-		const int D = A.width();
-		const int i_max = 4 * D;   // Convergence should be linear, but in some cases, it's not.
-		FullVector r(D);    // residual
-		FullVector p(D);    // search direction
-		FullVector q(D);    //
-		FullVector s(D);    // preconditioned
-		float delta_0;
-		float delta_old;
-		float delta_new;
-		float alpha;
-		float beta;
-		// r = b - A·x
-		sparse::copy(b, r);
-		sparse::sgemv(-1, A, x, 1, r);
-		// p = M^-1 · r
-		preconditioner.apply(r, p);
-		delta_new = sparse::dot(r, p);
-		delta_0 = delta_new;
-		while (i < i_max && delta_new > epsilon * epsilon * delta_0) {
-			i++;
-			// q = A·p
-			sparse::mult(A, p, q);
-			// alpha = delta_new / p·q
-			alpha = delta_new / sparse::dot(p, q);
-			// x = alfa·p + x
-			sparse::saxpy(alpha, p, x);
-			if ((i & 31) == 0) { // recompute r after 32 steps
-				// r = b - A·x
-				sparse::copy(b, r);
-				sparse::sgemv(-1, A, x, 1, r);
-			} else {
-				// r = r - alfa·q
-				sparse::saxpy(-alpha, q, r);
-			}
-			// s = M^-1 · r
-			preconditioner.apply(r, s);
-			delta_old = delta_new;
-			delta_new = sparse::dot( r, s );
-			beta = delta_new / delta_old;
-			// p = s + beta·p
-			sparse::scal(beta, p);
-			sparse::saxpy(1, s, p);
-		}
-		return delta_new <= epsilon * epsilon * delta_0;
-	}
-
-	static bool SymmetricSolver(const sparse::Matrix &A, const FullVector &b, FullVector &x, float epsilon = 1e-5f)
-	{
-		XA_DEBUG_ASSERT(A.height() == A.width());
-		XA_DEBUG_ASSERT(A.height() == b.dimension());
-		XA_DEBUG_ASSERT(b.dimension() == x.dimension());
-		JacobiPreconditioner jacobi(A, true);
-		return ConjugateGradientSolver(jacobi, A, b, x, epsilon);
-	}
-};
-
-namespace param {
-
-// Fast sweep in 3 directions
-static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b)
-{
-	XA_DEBUG_ASSERT(a != nullptr);
-	XA_DEBUG_ASSERT(b != nullptr);
-	const uint32_t vertexCount = mesh->vertexCount();
-	uint32_t minVertex[3];
-	uint32_t maxVertex[3];
-	minVertex[0] = minVertex[1] = minVertex[2] = UINT32_MAX;
-	maxVertex[0] = maxVertex[1] = maxVertex[2] = UINT32_MAX;
-	for (uint32_t v = 1; v < vertexCount; v++) {
-		if (mesh->isBoundaryVertex(v)) {
-			minVertex[0] = minVertex[1] = minVertex[2] = v;
-			maxVertex[0] = maxVertex[1] = maxVertex[2] = v;
-			break;
-		}
-	}
-	if (minVertex[0] == UINT32_MAX) {
-		// Input mesh has not boundaries.
-		return false;
-	}
-	for (uint32_t v = 1; v < vertexCount; v++) {
-		if (!mesh->isBoundaryVertex(v)) {
-			// Skip interior vertices.
-			continue;
-		}
-		const Vector3 &pos = mesh->position(v);
-		if (pos.x < mesh->position(minVertex[0]).x)
-			minVertex[0] = v;
-		else if (pos.x > mesh->position(maxVertex[0]).x)
-			maxVertex[0] = v;
-		if (pos.y < mesh->position(minVertex[1]).y)
-			minVertex[1] = v;
-		else if (pos.y > mesh->position(maxVertex[1]).y)
-			maxVertex[1] = v;
-		if (pos.z < mesh->position(minVertex[2]).z)
-			minVertex[2] = v;
-		else if (pos.z > mesh->position(maxVertex[2]).z)
-			maxVertex[2] = v;
-	}
-	float lengths[3];
-	for (int i = 0; i < 3; i++) {
-		lengths[i] = length(mesh->position(minVertex[i]) - mesh->position(maxVertex[i]));
-	}
-	if (lengths[0] > lengths[1] && lengths[0] > lengths[2]) {
-		*a = minVertex[0];
-		*b = maxVertex[0];
-	} else if (lengths[1] > lengths[2]) {
-		*a = minVertex[1];
-		*b = maxVertex[1];
-	} else {
-		*a = minVertex[2];
-		*b = maxVertex[2];
-	}
-	return true;
-}
-
-// Conformal relations from Brecht Van Lommel (based on ABF):
-
-static float vec_angle_cos(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3)
-{
-	Vector3 d1 = v1 - v2;
-	Vector3 d2 = v3 - v2;
-	return clamp(dot(d1, d2) / (length(d1) * length(d2)), -1.0f, 1.0f);
-}
-
-static float vec_angle(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3)
-{
-	float dot = vec_angle_cos(v1, v2, v3);
-	return acosf(dot);
-}
-
-static void triangle_angles(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3, float *a1, float *a2, float *a3)
-{
-	*a1 = vec_angle(v3, v1, v2);
-	*a2 = vec_angle(v1, v2, v3);
-	*a3 = kPi - *a2 - *a1;
-}
-
-static void setup_abf_relations(sparse::Matrix &A, int row, int id0, int id1, int id2, const Vector3 &p0, const Vector3 &p1, const Vector3 &p2)
-{
-	// @@ IC: Wouldn't it be more accurate to return cos and compute 1-cos^2?
-	// It does indeed seem to be a little bit more robust.
-	// @@ Need to revisit this more carefully!
-	float a0, a1, a2;
-	triangle_angles(p0, p1, p2, &a0, &a1, &a2);
-	float s0 = sinf(a0);
-	float s1 = sinf(a1);
-	float s2 = sinf(a2);
-	if (s1 > s0 && s1 > s2) {
-		swap(s1, s2);
-		swap(s0, s1);
-		swap(a1, a2);
-		swap(a0, a1);
-		swap(id1, id2);
-		swap(id0, id1);
-	} else if (s0 > s1 && s0 > s2) {
-		swap(s0, s2);
-		swap(s0, s1);
-		swap(a0, a2);
-		swap(a0, a1);
-		swap(id0, id2);
-		swap(id0, id1);
-	}
-	float c0 = cosf(a0);
-	float ratio = (s2 == 0.0f) ? 1.0f : s1 / s2;
-	float cosine = c0 * ratio;
-	float sine = s0 * ratio;
-	// Note  : 2*id + 0 --> u
-	//         2*id + 1 --> v
-	int u0_id = 2 * id0 + 0;
-	int v0_id = 2 * id0 + 1;
-	int u1_id = 2 * id1 + 0;
-	int v1_id = 2 * id1 + 1;
-	int u2_id = 2 * id2 + 0;
-	int v2_id = 2 * id2 + 1;
-	// Real part
-	A.setCoefficient(u0_id, 2 * row + 0, cosine - 1.0f);
-	A.setCoefficient(v0_id, 2 * row + 0, -sine);
-	A.setCoefficient(u1_id, 2 * row + 0, -cosine);
-	A.setCoefficient(v1_id, 2 * row + 0, sine);
-	A.setCoefficient(u2_id, 2 * row + 0, 1);
-	// Imaginary part
-	A.setCoefficient(u0_id, 2 * row + 1, sine);
-	A.setCoefficient(v0_id, 2 * row + 1, cosine - 1.0f);
-	A.setCoefficient(u1_id, 2 * row + 1, -sine);
-	A.setCoefficient(v1_id, 2 * row + 1, -cosine);
-	A.setCoefficient(v2_id, 2 * row + 1, 1);
-}
-
-static bool computeLeastSquaresConformalMap(Mesh *mesh)
-{
-	// For this to work properly, mesh should not have colocals that have the same
-	// attributes, unless you want the vertices to actually have different texcoords.
-	const uint32_t vertexCount = mesh->vertexCount();
-	const uint32_t D = 2 * vertexCount;
-	const uint32_t N = 2 * mesh->faceCount();
-	// N is the number of equations (one per triangle)
-	// D is the number of variables (one per vertex; there are 2 pinned vertices).
-	if (N < D - 4) {
-		return false;
-	}
-	sparse::Matrix A(D, N);
-	FullVector b(N);
-	FullVector x(D);
-	// Fill b:
-	b.fill(0.0f);
-	// Fill x:
-	uint32_t v0, v1;
-	if (!findApproximateDiameterVertices(mesh, &v0, &v1)) {
-		// Mesh has no boundaries.
-		return false;
-	}
-	if (mesh->texcoord(v0) == mesh->texcoord(v1)) {
-		// LSCM expects an existing parameterization.
-		return false;
-	}
-	for (uint32_t v = 0; v < vertexCount; v++) {
-		// Initial solution.
-		x[2 * v + 0] = mesh->texcoord(v).x;
-		x[2 * v + 1] = mesh->texcoord(v).y;
-	}
-	// Fill A:
-	const uint32_t faceCount = mesh->faceCount();
-	for (uint32_t f = 0, t = 0; f < faceCount; f++) {
-		const uint32_t vertex0 = mesh->vertexAt(f * 3 + 0);
-		const uint32_t vertex1 = mesh->vertexAt(f * 3 + 1);
-		const uint32_t vertex2 = mesh->vertexAt(f * 3 + 2);
-		setup_abf_relations(A, t, vertex0, vertex1, vertex2, mesh->position(vertex0), mesh->position(vertex1), mesh->position(vertex2));
-		t++;
-	}
-	const uint32_t lockedParameters[] = {
-		2 * v0 + 0,
-		2 * v0 + 1,
-		2 * v1 + 0,
-		2 * v1 + 1
-	};
-	// Solve
-	Solver::LeastSquaresSolver(A, b, x, lockedParameters, 4, 0.000001f);
-	// Map x back to texcoords:
-	for (uint32_t v = 0; v < vertexCount; v++)
-		mesh->texcoord(v) = Vector2(x[2 * v + 0], x[2 * v + 1]);
-	return true;
-}
-
-static bool computeOrthogonalProjectionMap(Mesh *mesh)
-{
-	uint32_t vertexCount = mesh->vertexCount();
-	// Avoid redundant computations.
-	float matrix[6];
-	Fit::computeCovariance(vertexCount, &mesh->position(0), matrix);
-	if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
-		return false;
-	float eigenValues[3];
-	Vector3 eigenVectors[3];
-	if (!Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors))
-		return false;
-	Vector3 axis[2];
-	axis[0] = normalize(eigenVectors[0], kEpsilon);
-	axis[1] = normalize(eigenVectors[1], kEpsilon);
-	// Project vertices to plane.
-	for (uint32_t i = 0; i < vertexCount; i++)
-		mesh->texcoord(i) = Vector2(dot(axis[0], mesh->position(i)), dot(axis[1], mesh->position(i)));
-	return true;
-}
+namespace segment {
 
 // Dummy implementation of a priority queue using sort at insertion.
 // - Insertion is o(n)
@@ -4729,6 +4243,7 @@ struct PriorityQueue
 
 	uint32_t pop()
 	{
+		XA_DEBUG_ASSERT(!pairs.isEmpty());
 		uint32_t f = pairs.back().face;
 		pairs.pop_back();
 		return f;
@@ -4740,12 +4255,12 @@ struct PriorityQueue
 		std::sort(pairs.begin(), pairs.end());
 	}
 
-	void clear()
+	XA_INLINE void clear()
 	{
 		pairs.clear();
 	}
 
-	uint32_t count() const
+	XA_INLINE uint32_t count() const
 	{
 		return pairs.size();
 	}
@@ -4771,7 +4286,7 @@ struct PriorityQueue
 	Array<Pair> pairs;
 };
 
-struct ChartBuildData
+struct Chart
 {
 	int id = -1;
 	Vector3 averageNormal = Vector3(0.0f);
@@ -4786,31 +4301,39 @@ struct ChartBuildData
 	Basis basis; // Of first face.
 };
 
-struct AtlasBuilder
+struct Atlas
 {
 	// @@ Hardcoded to 10?
-	AtlasBuilder(const Mesh *mesh, Array<uint32_t> *meshFaces, const ChartOptions &options) : m_mesh(mesh), m_meshFaces(meshFaces), m_facesLeft(mesh->faceCount()), m_bestTriangles(10), m_options(options)
+	Atlas(const Mesh *mesh, Array<uint32_t> *meshFaces, const ChartOptions &options) : m_mesh(mesh), m_meshFaces(meshFaces), m_facesLeft(mesh->faceCount()), m_bestTriangles(10), m_options(options)
 	{
-		XA_PROFILE_START(atlasBuilderInit)
+		XA_PROFILE_START(buildAtlasInit)
 		const uint32_t faceCount = m_mesh->faceCount();
 		if (meshFaces) {
-			m_ignoreFaces.resize(faceCount, true);
+			m_ignoreFaces.resize(faceCount);
+			m_ignoreFaces.setAll(true);
 			for (uint32_t f = 0; f < meshFaces->size(); f++)
 				m_ignoreFaces[(*meshFaces)[f]] = false;
 			m_facesLeft = meshFaces->size();
 		} else {
-			m_ignoreFaces.resize(faceCount, false);
+			m_ignoreFaces.resize(faceCount);
+			m_ignoreFaces.setAll(false);
 		}
-		m_faceChartArray.resize(faceCount, -1);
-		m_faceCandidateArray.resize(faceCount, (uint32_t)-1);
+		m_faceChartArray.resize(faceCount);
+		m_faceChartArray.setAll(-1);
+		m_faceCandidateCharts.resize(faceCount);
+		m_faceCandidateCosts.resize(faceCount);
 		m_texcoords.resize(faceCount * 3);
 		// @@ Floyd for the whole mesh is too slow. We could compute floyd progressively per patch as the patch grows. We need a better solution to compute most central faces.
 		//computeShortestPaths();
 		// Precompute edge lengths and face areas.
 		const uint32_t edgeCount = m_mesh->edgeCount();
-		m_edgeLengths.resize(edgeCount, 0.0f);
-		m_faceAreas.resize(m_mesh->faceCount(), 0.0f);
-		m_faceNormals.resize(m_mesh->faceCount());
+		m_edgeLengths.resize(edgeCount);
+		m_edgeLengths.zeroOutMemory();
+		m_faceAreas.resize(faceCount);
+		m_faceAreas.zeroOutMemory();
+		m_faceNormals.resize(faceCount);
+		m_faceTangents.resize(faceCount);
+		m_faceBitangents.resize(faceCount);
 		for (uint32_t f = 0; f < faceCount; f++) {
 			if (m_ignoreFaces[f])
 				continue;
@@ -4821,15 +4344,52 @@ struct AtlasBuilder
 			m_faceAreas[f] = mesh->faceArea(f);
 			XA_DEBUG_ASSERT(m_faceAreas[f] > 0.0f);
 			m_faceNormals[f] = m_mesh->triangleNormal(f);
+			m_faceTangents[f] = Basis::computeTangent(m_faceNormals[f]);
+			m_faceBitangents[f] = Basis::computeBitangent(m_faceNormals[f], m_faceTangents[f]);
 		}
-		XA_PROFILE_END(atlasBuilderInit)
+#if XA_GROW_CHARTS_COPLANAR
+		// Precompute regions of coplanar incident faces.
+		m_nextPlanarRegionFace.resize(faceCount);
+		for (uint32_t f = 0; f < faceCount; f++)
+			m_nextPlanarRegionFace[f] = f;
+		Array<uint32_t> faceStack;
+		faceStack.reserve(min(faceCount, 16u));
+		for (uint32_t f = 0; f < faceCount; f++) {
+			if (m_nextPlanarRegionFace[f] != f)
+				continue; // Already assigned.
+			if (m_ignoreFaces[f])
+				continue;
+			faceStack.clear();
+			faceStack.push_back(f);
+			for (;;) {
+				if (faceStack.isEmpty())
+					break;
+				const uint32_t face = faceStack.back();
+				faceStack.pop_back();
+				for (Mesh::FaceEdgeIterator it(m_mesh, face); !it.isDone(); it.advance()) {
+					const uint32_t oface = it.oppositeFace();
+					if (it.isBoundary() || m_ignoreFaces[oface])
+						continue;
+					if (m_nextPlanarRegionFace[oface] != oface)
+						continue; // Already assigned.
+					if (!equal(dot(m_faceNormals[face], m_faceNormals[oface]), 1.0f, kEpsilon))
+						continue; // Not coplanar.
+					const uint32_t next = m_nextPlanarRegionFace[face];
+					m_nextPlanarRegionFace[face] = oface;
+					m_nextPlanarRegionFace[oface] = next;
+					faceStack.push_back(oface);
+				}
+			}
+		}
+#endif
+		XA_PROFILE_END(buildAtlasInit)
 	}
 
-	~AtlasBuilder()
+	~Atlas()
 	{
 		const uint32_t chartCount = m_chartArray.size();
 		for (uint32_t i = 0; i < chartCount; i++) {
-			m_chartArray[i]->~ChartBuildData();
+			m_chartArray[i]->~Chart();
 			XA_FREE(m_chartArray[i]);
 		}
 	}
@@ -4838,9 +4398,11 @@ struct AtlasBuilder
 	uint32_t chartCount() const { return m_chartArray.size(); }
 	const Array<uint32_t> &chartFaces(uint32_t i) const { return m_chartArray[i]->faces; }
 	const Basis &chartBasis(uint32_t chartIndex) const { return m_chartArray[chartIndex]->basis; }
+	const Vector2 *faceTexcoords(uint32_t face) const { return &m_texcoords[face * 3]; }
 
 	void placeSeeds(float threshold)
 	{
+		XA_PROFILE_START(buildAtlasPlaceSeeds)
 		// Instead of using a predefiened number of seeds:
 		// - Add seeds one by one, growing chart until a certain treshold.
 		// - Undo charts and restart growing process.
@@ -4849,43 +4411,44 @@ struct AtlasBuilder
 		//   - how do we weight the probabilities?
 		while (m_facesLeft > 0)
 			createRandomChart(threshold);
+		XA_PROFILE_END(buildAtlasPlaceSeeds)
 	}
 
 	// Returns true if any of the charts can grow more.
-	bool growCharts(float threshold, uint32_t faceCount)
-	{
-		XA_PROFILE_START(atlasBuilderGrowCharts)
-		// Using one global list.
-		faceCount = min(faceCount, m_facesLeft);
+	bool growCharts(float threshold)
+	{
+		XA_PROFILE_START(buildAtlasGrowCharts)
+		// Build global candidate list.
+		m_faceCandidateCharts.zeroOutMemory();
+		for (uint32_t i = 0; i < m_chartArray.size(); i++)
+			addChartCandidateToGlobalCandidates(m_chartArray[i]);
+		// Add one candidate face per chart (threshold permitting).
+		const uint32_t faceCount = m_mesh->faceCount();
 		bool canAddAny = false;
-		for (uint32_t i = 0; i < faceCount; i++) {
-			const Candidate &candidate = getBestCandidate();
-			if (candidate.metric > threshold) {
-				XA_PROFILE_END(atlasBuilderGrowCharts)
-				return false; // Can't grow more.
-			}
-			createFaceTexcoords(candidate.chart, candidate.face);
-			if (!canAddFaceToChart(candidate.chart, candidate.face))
+		for (uint32_t f = 0; f < faceCount; f++) {
+			Chart *chart = m_faceCandidateCharts[f];
+			if (!chart || m_faceCandidateCosts[f] > threshold)
+				continue;
+			createFaceTexcoords(chart, f);
+			if (!canAddFaceToChart(chart, f))
 				continue;
-			addFaceToChart(candidate.chart, candidate.face);
+			addFaceToChart(chart, f);
 			canAddAny = true;
 		}
-		XA_PROFILE_END(atlasBuilderGrowCharts)
+		XA_PROFILE_END(buildAtlasGrowCharts)
 		return canAddAny && m_facesLeft != 0; // Can continue growing.
 	}
 
 	void resetCharts()
 	{
+		XA_PROFILE_START(buildAtlasResetCharts)
 		const uint32_t faceCount = m_mesh->faceCount();
-		for (uint32_t i = 0; i < faceCount; i++) {
+		for (uint32_t i = 0; i < faceCount; i++)
 			m_faceChartArray[i] = -1;
-			m_faceCandidateArray[i] = (uint32_t)-1;
-		}
 		m_facesLeft = m_meshFaces ? m_meshFaces->size() : faceCount;
-		m_candidateArray.clear();
 		const uint32_t chartCount = m_chartArray.size();
 		for (uint32_t i = 0; i < chartCount; i++) {
-			ChartBuildData *chart = m_chartArray[i];
+			Chart *chart = m_chartArray[i];
 			const uint32_t seed = chart->seeds.back();
 			chart->area = 0.0f;
 			chart->boundaryLength = 0.0f;
@@ -4898,30 +4461,32 @@ struct AtlasBuilder
 		}
 #if XA_GROW_CHARTS_COPLANAR
 		for (uint32_t i = 0; i < chartCount; i++) {
-			ChartBuildData *chart = m_chartArray[i];
+			Chart *chart = m_chartArray[i];
 			growChartCoplanar(chart);
 		}
 #endif
+		XA_PROFILE_END(buildAtlasResetCharts)
 	}
 
-	void updateCandidates(ChartBuildData *chart, uint32_t f)
+	void updateChartCandidates(Chart *chart, uint32_t f)
 	{
 		// Traverse neighboring faces, add the ones that do not belong to any chart yet.
 		for (Mesh::FaceEdgeIterator it(m_mesh, f); !it.isDone(); it.advance()) {
 			if (!it.isBoundary() && !m_ignoreFaces[it.oppositeFace()] && m_faceChartArray[it.oppositeFace()] == -1)
 				chart->candidates.push(it.oppositeFace());
 		}
-	}
-
-	void updateProxies()
-	{
-		const uint32_t chartCount = m_chartArray.size();
-		for (uint32_t i = 0; i < chartCount; i++)
-			updateProxy(m_chartArray[i]);
+		// Re-evaluate all candidate priorities.
+		uint32_t candidateCount = chart->candidates.count();
+		for (uint32_t i = 0; i < candidateCount; i++) {
+			PriorityQueue::Pair &pair = chart->candidates.pairs[i];
+			pair.priority = evaluateCost(chart, pair.face);
+		}
+		chart->candidates.sort();
 	}
 
 	bool relocateSeeds()
 	{
+		XA_PROFILE_START(buildAtlasRelocateSeeds)
 		bool anySeedChanged = false;
 		const uint32_t chartCount = m_chartArray.size();
 		for (uint32_t i = 0; i < chartCount; i++) {
@@ -4929,19 +4494,22 @@ struct AtlasBuilder
 				anySeedChanged = true;
 			}
 		}
+		XA_PROFILE_END(buildAtlasRelocateSeeds)
 		return anySeedChanged;
 	}
 
 	void fillHoles(float threshold)
 	{
+		XA_PROFILE_START(buildAtlasFillHoles)
 		while (m_facesLeft > 0)
 			createRandomChart(threshold);
+		XA_PROFILE_END(buildAtlasFillHoles)
 	}
 
 #if XA_MERGE_CHARTS
 	void mergeCharts()
 	{
-		XA_PROFILE_START(atlasBuilderMergeCharts)
+		XA_PROFILE_START(buildAtlasMergeCharts)
 		Array<float> sharedBoundaryLengths;
 		Array<float> sharedBoundaryLengthsNoSeams;
 		Array<uint32_t> sharedBoundaryEdgeCountNoSeams;
@@ -4951,16 +4519,19 @@ struct AtlasBuilder
 		for (;;) {
 			bool merged = false;
 			for (int c = chartCount - 1; c >= 0; c--) {
-				ChartBuildData *chart = m_chartArray[c];
+				Chart *chart = m_chartArray[c];
 				if (chart == nullptr)
 					continue;
 				float externalBoundaryLength = 0.0f;
 				sharedBoundaryLengths.clear();
-				sharedBoundaryLengths.resize(chartCount, 0.0f);
+				sharedBoundaryLengths.resize(chartCount);
+				sharedBoundaryLengths.zeroOutMemory();
 				sharedBoundaryLengthsNoSeams.clear();
-				sharedBoundaryLengthsNoSeams.resize(chartCount, 0.0f);
+				sharedBoundaryLengthsNoSeams.resize(chartCount);
+				sharedBoundaryLengthsNoSeams.zeroOutMemory();
 				sharedBoundaryEdgeCountNoSeams.clear();
-				sharedBoundaryEdgeCountNoSeams.resize(chartCount, 0u);
+				sharedBoundaryEdgeCountNoSeams.resize(chartCount);
+				sharedBoundaryEdgeCountNoSeams.zeroOutMemory();
 				const uint32_t faceCount = chart->faces.size();
 				for (uint32_t i = 0; i < faceCount; i++) {
 					const uint32_t f = chart->faces[i];
@@ -4985,7 +4556,7 @@ struct AtlasBuilder
 				for (int cc = chartCount - 1; cc >= 0; cc--) {
 					if (cc == c)
 						continue;
-					ChartBuildData *chart2 = m_chartArray[cc];
+					Chart *chart2 = m_chartArray[cc];
 					if (chart2 == nullptr)
 						continue;
 					// Compare proxies.
@@ -5013,16 +4584,19 @@ struct AtlasBuilder
 					continue;
 				merge:
 					// Create texcoords for chart 2 using chart 1 basis. Backup chart 2 texcoords for restoration if charts cannot be merged.
-					tempTexcoords.resize(chart2->faces.size());
+					tempTexcoords.resize(chart2->faces.size() * 3);
 					for (uint32_t i = 0; i < chart2->faces.size(); i++) {
 						const uint32_t face = chart2->faces[i];
-						tempTexcoords[i] = m_texcoords[face];
+						for (uint32_t j = 0; j < 3; j++)
+							tempTexcoords[i * 3 + j] = m_texcoords[face * 3 + j];
 						createFaceTexcoords(chart, face);
 					}
 					if (!canMergeCharts(chart, chart2)) {
 						// Restore chart 2 texcoords.
-						for (uint32_t i = 0; i < chart2->faces.size(); i++)
-							m_texcoords[chart2->faces[i]] = tempTexcoords[i];
+						for (uint32_t i = 0; i < chart2->faces.size(); i++) {
+							for (uint32_t j = 0; j < 3; j++)
+								m_texcoords[chart2->faces[i] * 3 + j] = tempTexcoords[i * 3 + j];
+						}
 						continue;
 					}
 					mergeChart(chart, chart2, sharedBoundaryLengthsNoSeams[cc]);
@@ -5053,14 +4627,14 @@ struct AtlasBuilder
 				c++;
 			}
 		}
-		XA_PROFILE_END(atlasBuilderMergeCharts)
+		XA_PROFILE_END(buildAtlasMergeCharts)
 	}
 #endif
 
 private:
 	void createRandomChart(float threshold)
 	{
-		ChartBuildData *chart = XA_NEW(MemTag::Default, ChartBuildData);
+		Chart *chart = XA_NEW(MemTag::Default, Chart);
 		chart->id = (int)m_chartArray.size();
 		m_chartArray.push_back(chart);
 		// Pick random face that is not used by any chart yet.
@@ -5070,15 +4644,52 @@ private:
 				face = 0;
 		}
 		chart->seeds.push_back(face);
-		addFaceToChart(chart, face, true);
+		addFaceToChart(chart, face);
 #if XA_GROW_CHARTS_COPLANAR
 		growChartCoplanar(chart);
 #endif
 		// Grow the chart as much as possible within the given threshold.
-		growChart(chart, threshold, m_facesLeft);
+		for (uint32_t i = 0; i < m_facesLeft; ) {
+			if (chart->candidates.count() == 0 || chart->candidates.firstPriority() > threshold)
+				break;
+			const uint32_t f = chart->candidates.pop();
+			if (m_faceChartArray[f] != -1)
+				continue;
+			createFaceTexcoords(chart, f);
+			if (!canAddFaceToChart(chart, f))
+				continue;
+			addFaceToChart(chart, f);
+			i++;
+		}
+	}
+
+	void addChartCandidateToGlobalCandidates(Chart *chart)
+	{
+		if (chart->candidates.count() == 0)
+			return;
+		const float cost = chart->candidates.firstPriority();
+		const uint32_t face = chart->candidates.pop();
+		if (m_faceChartArray[face] != -1) {
+			addChartCandidateToGlobalCandidates(chart);
+		} else if (!m_faceCandidateCharts[face]) {
+			// No candidate assigned to this face yet.
+			m_faceCandidateCharts[face] = chart;
+			m_faceCandidateCosts[face] = cost;
+		} else {
+			if (cost < m_faceCandidateCosts[face]) {
+				// This is a better candidate for this face (lower cost). The other chart can choose another candidate.
+				Chart *otherChart = m_faceCandidateCharts[face];
+				m_faceCandidateCharts[face] = chart;
+				m_faceCandidateCosts[face] = cost;
+				addChartCandidateToGlobalCandidates(otherChart);
+			} else {
+				// Existing candidate is better. This chart can choose another candidate.
+				addChartCandidateToGlobalCandidates(chart);
+			}
+		}
 	}
 
-	void createFaceTexcoords(ChartBuildData *chart, uint32_t face)
+	void createFaceTexcoords(Chart *chart, uint32_t face)
 	{
 		for (uint32_t i = 0; i < 3; i++) {
 			const Vector3 &pos = m_mesh->position(m_mesh->vertexAt(face * 3 + i));
@@ -5086,30 +4697,71 @@ private:
 		}
 	}
 
-	bool isChartBoundaryEdge(ChartBuildData *chart, uint32_t edge) const
+	bool isChartBoundaryEdge(const Chart *chart, uint32_t edge) const
 	{
 		const uint32_t oppositeEdge = m_mesh->oppositeEdge(edge);
 		const uint32_t oppositeFace = meshEdgeFace(oppositeEdge);
 		return oppositeEdge == UINT32_MAX || m_ignoreFaces[oppositeFace] || m_faceChartArray[oppositeFace] != chart->id;
 	}
 
-	bool canAddFaceToChart(ChartBuildData *chart, uint32_t face)
+	bool edgeArraysIntersect(const uint32_t *edges1, uint32_t edges1Count, const uint32_t *edges2, uint32_t edges2Count)
+	{
+		for (uint32_t i = 0; i < edges1Count; i++) {
+			const uint32_t edge1 = edges1[i];
+			for (uint32_t j = 0; j < edges2Count; j++) {
+				const uint32_t edge2 = edges2[j];
+				const Vector2 &a1 = m_texcoords[meshEdgeIndex0(edge1)];
+				const Vector2 &a2 = m_texcoords[meshEdgeIndex1(edge1)];
+				const Vector2 &b1 = m_texcoords[meshEdgeIndex0(edge2)];
+				const Vector2 &b2 = m_texcoords[meshEdgeIndex1(edge2)];
+				if (linesIntersect(a1, a2, b1, b2, m_mesh->epsilon()))
+					return true;
+			}
+		}
+		return false;
+	}
+
+	bool isFaceFlipped(uint32_t face) const
+	{
+		const float t1 = m_texcoords[face * 3 + 0].x;
+		const float s1 = m_texcoords[face * 3 + 0].y;
+		const float t2 = m_texcoords[face * 3 + 1].x;
+		const float s2 = m_texcoords[face * 3 + 1].y;
+		const float t3 = m_texcoords[face * 3 + 2].x;
+		const float s3 = m_texcoords[face * 3 + 2].y;
+		const float parametricArea = ((s2 - s1) * (t3 - t1) - (s3 - s1) * (t2 - t1)) / 2;
+		return parametricArea < 0.0f;
+	}
+
+	void computeChartBoundaryEdges(const Chart *chart, Array<uint32_t> *dest) const
 	{
-		// Find face edges that are on a mesh boundary or form a boundary with another chart.
-		uint32_t edgesToCompare[3];
+		dest->clear();
+		for (uint32_t f = 0; f < chart->faces.size(); f++) {
+			const uint32_t face = chart->faces[f];
+			for (uint32_t i = 0; i < 3; i++) {
+				const uint32_t edge = face * 3 + i;
+				if (isChartBoundaryEdge(chart, edge))
+					dest->push_back(edge);
+			}
+		}
+	}
+
+	bool canAddFaceToChart(Chart *chart, uint32_t face)
+	{
+		// Check for flipped triangles.
+		if (isFaceFlipped(face))
+			return false;
+		// Find face edges that don't border this chart.
+		m_tempEdges1.clear();
 		for (uint32_t i = 0; i < 3; i++) {
 			const uint32_t edge = face * 3 + i;
-			const uint32_t oppositeEdge = m_mesh->oppositeEdge(edge);
-			const uint32_t oppositeFace = meshEdgeFace(oppositeEdge);
-			if (oppositeEdge == UINT32_MAX || m_ignoreFaces[oppositeFace] || m_faceChartArray[oppositeFace] != chart->id)
-				edgesToCompare[i] = edge;
-			else
-				edgesToCompare[i] = UINT32_MAX;
+			if (isChartBoundaryEdge(chart, edge))
+				m_tempEdges1.push_back(edge);
 		}
-		// All edges on boundary? This can happen if the face is surrounded by the chart.
-		if (edgesToCompare[0] == UINT32_MAX && edgesToCompare[1] == UINT32_MAX && edgesToCompare[2] == UINT32_MAX)
-			return true;
-		// Check if any valid face edge intersects the chart boundary.
+		if (m_tempEdges1.isEmpty())
+			return true; // This can happen if the face is surrounded by the chart.
+		// Get chart boundary edges, except those that border the face.
+		m_tempEdges2.clear();
 		for (uint32_t i = 0; i < chart->faces.size(); i++) {
 			const uint32_t chartFace = chart->faces[i];
 			for (uint32_t j = 0; j < 3; j++) {
@@ -5120,47 +4772,65 @@ private:
 				const uint32_t oppositeChartEdge = m_mesh->oppositeEdge(chartEdge);
 				if (meshEdgeFace(oppositeChartEdge) == face)
 					continue;
-				for (uint32_t k = 0; k < 3; k++) {
-					if (edgesToCompare[k] == UINT32_MAX)
-						continue;
-					const uint32_t e1 = chartEdge;
-					const uint32_t e2 = edgesToCompare[k];
-					if (linesIntersect(m_texcoords[meshEdgeIndex0(e1)], m_texcoords[meshEdgeIndex1(e1)], m_texcoords[meshEdgeIndex0(e2)], m_texcoords[meshEdgeIndex1(e2)], m_mesh->epsilon()))
-						return false;
-				}
+				m_tempEdges2.push_back(chartEdge);
 			}
 		}
-		return true;
-	}
-
-	bool canMergeCharts(ChartBuildData *chart1, ChartBuildData *chart2)
-	{
-		for (uint32_t f1 = 0; f1 < chart1->faces.size(); f1++) {
-			const uint32_t face1 = chart1->faces[f1];
-			for (uint32_t i = 0; i < 3; i++) {
-				const uint32_t edge1 = face1 * 3 + i;
-				if (!isChartBoundaryEdge(chart1, edge1))
-					continue;
-				for (uint32_t f2 = 0; f2 < chart2->faces.size(); f2++) {
-					const uint32_t face2 = chart2->faces[f2];
+		const bool intersect = edgeArraysIntersect(m_tempEdges1.data(), m_tempEdges1.size(), m_tempEdges2.data(), m_tempEdges2.size());
+#if 0
+		if (intersect) {
+			static std::atomic<uint32_t> count = 0;
+			char filename[256];
+			XA_SPRINTF(filename, sizeof(filename), "intersect%04u.obj", count.fetch_add(1));
+			FILE *file;
+			XA_FOPEN(file, filename, "w");
+			if (file) {
+				for (uint32_t i = 0; i < m_texcoords.size(); i++)
+					fprintf(file, "v %g %g 0.0\n", m_texcoords[i].x, m_texcoords[i].y);
+				fprintf(file, "s off\n");
+				fprintf(file, "o face\n");
+				{
+					fprintf(file, "f ");
 					for (uint32_t j = 0; j < 3; j++) {
-						const uint32_t edge2 = face2 * 3 + j;
-						if (!isChartBoundaryEdge(chart2, edge2))
-							continue;
-						if (linesIntersect(m_texcoords[meshEdgeIndex0(edge1)], m_texcoords[meshEdgeIndex1(edge1)], m_texcoords[meshEdgeIndex0(edge2)], m_texcoords[meshEdgeIndex1(edge2)], m_mesh->epsilon()))
-							return false;
+						const uint32_t index = face * 3 + j + 1; // 1-indexed
+						fprintf(file, "%d/%d/%d%c", index, index, index, j == 2 ? '\n' : ' ');
+					}
+				}
+				fprintf(file, "s off\n");
+				fprintf(file, "o chart\n");
+				for (uint32_t i = 0; i < chart->faces.size(); i++) {
+					const uint32_t chartFace = chart->faces[i];
+					fprintf(file, "f ");
+					for (uint32_t j = 0; j < 3; j++) {
+						const uint32_t index = chartFace * 3 + j + 1; // 1-indexed
+						fprintf(file, "%d/%d/%d%c", index, index, index, j == 2 ? '\n' : ' ');
 					}
 				}
+				fclose(file);
 			}
 		}
-		return true;
+#endif
+		return !intersect;
+	}
+
+	bool canMergeCharts(Chart *chart1, Chart *chart2)
+	{
+		for (uint32_t i = 0; i < chart2->faces.size(); i++) {
+			if (isFaceFlipped(chart2->faces[i]))
+				return false;
+		}
+		computeChartBoundaryEdges(chart1, &m_tempEdges1);
+		computeChartBoundaryEdges(chart2, &m_tempEdges2);
+		return !edgeArraysIntersect(m_tempEdges1.data(), m_tempEdges1.size(), m_tempEdges2.data(), m_tempEdges2.size());
 	}
 
-	void addFaceToChart(ChartBuildData *chart, uint32_t f, bool recomputeProxy = false)
+	void addFaceToChart(Chart *chart, uint32_t f)
 	{
+		const bool firstFace = chart->faces.isEmpty();
 		// Use the first face normal as the chart basis.
-		if (chart->faces.isEmpty()) {
-			chart->basis.buildFrameForDirection(m_faceNormals[f]);
+		if (firstFace) {
+			chart->basis.normal = m_faceNormals[f];
+			chart->basis.tangent = m_faceTangents[f];
+			chart->basis.bitangent = m_faceBitangents[f];
 			createFaceTexcoords(chart, f);
 		}
 		// Add face to chart.
@@ -5169,74 +4839,36 @@ private:
 		m_faceChartArray[f] = chart->id;
 		m_facesLeft--;
 		// Update area and boundary length.
-		chart->area = evaluateChartArea(chart, f);
-		chart->boundaryLength = evaluateBoundaryLength(chart, f);
-		chart->normalSum = evaluateChartNormalSum(chart, f);
+		chart->area = chart->area + m_faceAreas[f];
+		chart->boundaryLength = computeBoundaryLength(chart, f);
+		chart->normalSum += m_mesh->triangleNormalAreaScaled(f);
+		chart->averageNormal = normalizeSafe(chart->normalSum, Vector3(0), 0.0f);
 		chart->centroidSum += m_mesh->triangleCenter(f);
-		if (recomputeProxy) {
-			// Update proxy and candidate's priorities.
-			updateProxy(chart);
-		}
+		chart->centroid = chart->centroidSum / float(chart->faces.size());
 		// Update candidates.
-		removeCandidate(f);
-		updateCandidates(chart, f);
-		updatePriorities(chart);
-	}
-
-	bool growChart(ChartBuildData *chart, float threshold, uint32_t faceCount)
-	{
-		// Try to add faceCount faces within threshold to chart.
-		for (uint32_t i = 0; i < faceCount; ) {
-			if (chart->candidates.count() == 0 || chart->candidates.firstPriority() > threshold)
-				return false;
-			const uint32_t f = chart->candidates.pop();
-			if (m_faceChartArray[f] != -1)
-				continue;
-			createFaceTexcoords(chart, f);
-			if (!canAddFaceToChart(chart, f))
-				continue;
-			addFaceToChart(chart, f);
-			i++;
-		}
-		if (chart->candidates.count() == 0 || chart->candidates.firstPriority() > threshold)
-			return false;
-		return true;
+		updateChartCandidates(chart, f);
 	}
 
 #if XA_GROW_CHARTS_COPLANAR
-	void growChartCoplanar(ChartBuildData *chart)
+	void growChartCoplanar(Chart *chart)
 	{
 		XA_DEBUG_ASSERT(!chart->faces.isEmpty());
-		const Vector3 chartNormal = m_faceNormals[chart->faces[0]];
-		m_growFaces.clear();
-		for (uint32_t f = 0; f < chart->faces.size(); f++)
-			m_growFaces.push_back(chart->faces[f]);
-		for (;;) {
-			if (m_growFaces.isEmpty())
-				break;
-			const uint32_t face = m_growFaces.back();
-			m_growFaces.pop_back();
-			for (Mesh::FaceEdgeIterator it(m_mesh, face); !it.isDone(); it.advance()) {
-				if (it.isBoundary() || m_ignoreFaces[it.oppositeFace()] || m_faceChartArray[it.oppositeFace()] != -1)
-					continue;
-				if (equal(dot(chartNormal, m_faceNormals[it.oppositeFace()]), 1.0f, kEpsilon)) {
-					createFaceTexcoords(chart, it.oppositeFace());
-					addFaceToChart(chart, it.oppositeFace());
-					m_growFaces.push_back(it.oppositeFace());
+		for (uint32_t i = 0; i < chart->faces.size(); i++) {
+			const uint32_t chartFace = chart->faces[i];
+			uint32_t face = m_nextPlanarRegionFace[chartFace];
+			while (face != chartFace) { 
+				// Not assigned to a chart?
+				if (m_faceChartArray[face] == -1) {
+					createFaceTexcoords(chart, face);
+					addFaceToChart(chart, face);
 				}
+				face = m_nextPlanarRegionFace[face];
 			}
 		}
 	}
 #endif
 
-	void updateProxy(ChartBuildData *chart) const
-	{
-		//#pragma message(NV_FILE_LINE "TODO: Use best fit plane instead of average normal.")
-		chart->averageNormal = normalizeSafe(chart->normalSum, Vector3(0), 0.0f);
-		chart->centroid = chart->centroidSum / float(chart->faces.size());
-	}
-
-	bool relocateSeed(ChartBuildData *chart)
+	bool relocateSeed(Chart *chart)
 	{
 		// Find the first N triangles that fit the proxy best.
 		const uint32_t faceCount = chart->faces.size();
@@ -5272,26 +4904,12 @@ private:
 		return true;
 	}
 
-	void updatePriorities(ChartBuildData *chart)
-	{
-		// Re-evaluate candidate priorities.
-		uint32_t candidateCount = chart->candidates.count();
-		for (uint32_t i = 0; i < candidateCount; i++) {
-			PriorityQueue::Pair &pair = chart->candidates.pairs[i];
-			pair.priority = evaluatePriority(chart, pair.face);
-			if (m_faceChartArray[pair.face] == -1)
-				updateCandidate(chart, pair.face, pair.priority);
-		}
-		// Sort candidates.
-		chart->candidates.sort();
-	}
-
 	// Evaluate combined metric.
-	float evaluatePriority(ChartBuildData *chart, uint32_t face) const
+	float evaluateCost(Chart *chart, uint32_t face) const
 	{
 		// Estimate boundary length and area:
-		const float newChartArea = evaluateChartArea(chart, face);
-		const float newBoundaryLength = evaluateBoundaryLength(chart, face);
+		const float newChartArea = chart->area + m_faceAreas[face];
+		const float newBoundaryLength = computeBoundaryLength(chart, face);
 		// Enforce limits strictly:
 		if (m_options.maxChartArea > 0.0f && newChartArea > m_options.maxChartArea)
 			return FLT_MAX;
@@ -5323,14 +4941,14 @@ private:
 	}
 
 	// Returns a value in [0-1].
-	float evaluateProxyFitMetric(ChartBuildData *chart, uint32_t f) const
+	float evaluateProxyFitMetric(Chart *chart, uint32_t f) const
 	{
 		const Vector3 faceNormal = m_faceNormals[f];
 		// Use plane fitting metric for now:
 		return 1 - dot(faceNormal, chart->averageNormal); // @@ normal deviations should be weighted by face area
 	}
 
-	float evaluateRoundnessMetric(ChartBuildData *chart, uint32_t /*face*/, float newBoundaryLength, float newChartArea) const
+	float evaluateRoundnessMetric(Chart *chart, uint32_t /*face*/, float newBoundaryLength, float newChartArea) const
 	{
 		float roundness = square(chart->boundaryLength) / chart->area;
 		float newRoundness = square(newBoundaryLength) / newChartArea;
@@ -5342,7 +4960,7 @@ private:
 		}
 	}
 
-	float evaluateStraightnessMetric(ChartBuildData *chart, uint32_t f) const
+	float evaluateStraightnessMetric(Chart *chart, uint32_t f) const
 	{
 		float l_out = 0.0f;
 		float l_in = 0.0f;
@@ -5378,7 +4996,7 @@ private:
 		return m_faceNormals[meshEdgeFace(edge)] != m_faceNormals[meshEdgeFace(oppositeEdge)];
 	}
 
-	float evaluateNormalSeamMetric(ChartBuildData *chart, uint32_t f) const
+	float evaluateNormalSeamMetric(Chart *chart, uint32_t f) const
 	{
 		float seamFactor = 0.0f;
 		float totalLength = 0.0f;
@@ -5414,7 +5032,7 @@ private:
 		return seamFactor / totalLength;
 	}
 
-	float evaluateTextureSeamMetric(ChartBuildData *chart, uint32_t f) const
+	float evaluateTextureSeamMetric(Chart *chart, uint32_t f) const
 	{
 		float seamLength = 0.0f;
 		float totalLength = 0.0f;
@@ -5436,12 +5054,7 @@ private:
 		return seamLength / totalLength;
 	}
 
-	float evaluateChartArea(ChartBuildData *chart, uint32_t f) const
-	{
-		return chart->area + m_faceAreas[f];
-	}
-
-	float evaluateBoundaryLength(ChartBuildData *chart, uint32_t f) const
+	float computeBoundaryLength(Chart *chart, uint32_t f) const
 	{
 		float boundaryLength = chart->boundaryLength;
 		// Add new edges, subtract edges shared with the chart.
@@ -5459,73 +5072,7 @@ private:
 		return max(0.0f, boundaryLength);  // @@ Hack!
 	}
 
-	Vector3 evaluateChartNormalSum(ChartBuildData *chart, uint32_t f) const
-	{
-		return chart->normalSum + m_mesh->triangleNormalAreaScaled(f);
-	}
-
-	// @@ Cleanup.
-	struct Candidate {
-		ChartBuildData *chart;
-		uint32_t face;
-		float metric;
-	};
-
-	// @@ Get N best candidates in one pass.
-	const Candidate &getBestCandidate() const
-	{
-		uint32_t best = 0;
-		float bestCandidateMetric = FLT_MAX;
-		const uint32_t candidateCount = m_candidateArray.size();
-		XA_ASSERT(candidateCount > 0);
-		for (uint32_t i = 0; i < candidateCount; i++) {
-			const Candidate &candidate = m_candidateArray[i];
-			if (candidate.metric < bestCandidateMetric) {
-				bestCandidateMetric = candidate.metric;
-				best = i;
-			}
-		}
-		return m_candidateArray[best];
-	}
-
-	void removeCandidate(uint32_t f)
-	{
-		int c = m_faceCandidateArray[f];
-		if (c != -1) {
-			m_faceCandidateArray[f] = (uint32_t)-1;
-			if (c == int(m_candidateArray.size() - 1)) {
-				m_candidateArray.pop_back();
-			} else {
-				// Replace with last.
-				m_candidateArray[c] = m_candidateArray[m_candidateArray.size() - 1];
-				m_candidateArray.pop_back();
-				m_faceCandidateArray[m_candidateArray[c].face] = c;
-			}
-		}
-	}
-
-	void updateCandidate(ChartBuildData *chart, uint32_t f, float metric)
-	{
-		if (m_faceCandidateArray[f] == (uint32_t)-1) {
-			const uint32_t index = m_candidateArray.size();
-			m_faceCandidateArray[f] = index;
-			m_candidateArray.resize(index + 1);
-			m_candidateArray[index].face = f;
-			m_candidateArray[index].chart = chart;
-			m_candidateArray[index].metric = metric;
-		} else {
-			const uint32_t c = m_faceCandidateArray[f];
-			XA_DEBUG_ASSERT(c != (uint32_t)-1);
-			Candidate &candidate = m_candidateArray[c];
-			XA_DEBUG_ASSERT(candidate.face == f);
-			if (metric < candidate.metric || chart == candidate.chart) {
-				candidate.metric = metric;
-				candidate.chart = chart;
-			}
-		}
-	}
-
-	void mergeChart(ChartBuildData *owner, ChartBuildData *chart, float sharedBoundaryLength)
+	void mergeChart(Chart *owner, Chart *chart, float sharedBoundaryLength)
 	{
 		const uint32_t faceCount = chart->faces.size();
 		for (uint32_t i = 0; i < faceCount; i++) {
@@ -5538,10 +5085,10 @@ private:
 		owner->area += chart->area;
 		owner->boundaryLength += chart->boundaryLength - sharedBoundaryLength;
 		owner->normalSum += chart->normalSum;
-		updateProxy(owner);
+		owner->averageNormal = normalizeSafe(owner->normalSum, Vector3(0), 0.0f);
 		// Delete chart.
 		m_chartArray[chart->id] = nullptr;
-		chart->~ChartBuildData();
+		chart->~Chart();
 		XA_FREE(chart);
 	}
 
@@ -5551,18 +5098,448 @@ private:
 	Array<float> m_edgeLengths;
 	Array<float> m_faceAreas;
 	Array<Vector3> m_faceNormals;
+	Array<Vector3> m_faceTangents;
+	Array<Vector3> m_faceBitangents;
 	Array<Vector2> m_texcoords;
-	Array<uint32_t> m_growFaces;
 	uint32_t m_facesLeft;
 	Array<int> m_faceChartArray;
-	Array<ChartBuildData *> m_chartArray;
-	Array<Candidate> m_candidateArray;
-	Array<uint32_t> m_faceCandidateArray; // Map face index to candidate index.
+	Array<Chart *> m_chartArray;
 	PriorityQueue m_bestTriangles;
 	KISSRng m_rand;
 	ChartOptions m_options;
+	Array<Chart *> m_faceCandidateCharts;
+	Array<float> m_faceCandidateCosts;
+#if XA_GROW_CHARTS_COPLANAR
+	Array<uint32_t> m_nextPlanarRegionFace;
+#endif
+	Array<uint32_t> m_tempEdges1, m_tempEdges2;
+};
+
+} // namespace segment
+
+namespace param {
+
+class JacobiPreconditioner
+{
+public:
+	JacobiPreconditioner(const sparse::Matrix &M, bool symmetric) : m_inverseDiagonal(M.width())
+	{
+		XA_ASSERT(M.isSquare());
+		for (uint32_t x = 0; x < M.width(); x++) {
+			float elem = M.getCoefficient(x, x);
+			//XA_DEBUG_ASSERT( elem != 0.0f ); // This can be zero in the presence of zero area triangles.
+			if (symmetric) {
+				m_inverseDiagonal[x] = (elem != 0) ? 1.0f / sqrtf(fabsf(elem)) : 1.0f;
+			} else {
+				m_inverseDiagonal[x] = (elem != 0) ? 1.0f / elem : 1.0f;
+			}
+		}
+	}
+
+	void apply(const FullVector &x, FullVector &y) const
+	{
+		XA_DEBUG_ASSERT(x.dimension() == m_inverseDiagonal.dimension());
+		XA_DEBUG_ASSERT(y.dimension() == m_inverseDiagonal.dimension());
+		// @@ Wrap vector component-wise product into a separate function.
+		const uint32_t D = x.dimension();
+		for (uint32_t i = 0; i < D; i++) {
+			y[i] = m_inverseDiagonal[i] * x[i];
+		}
+	}
+
+private:
+	FullVector m_inverseDiagonal;
 };
 
+// Linear solvers.
+class Solver
+{
+public:
+	// Solve the symmetric system: At·A·x = At·b
+	static bool LeastSquaresSolver(const sparse::Matrix &A, const FullVector &b, FullVector &x, float epsilon = 1e-5f)
+	{
+		XA_DEBUG_ASSERT(A.width() == x.dimension());
+		XA_DEBUG_ASSERT(A.height() == b.dimension());
+		XA_DEBUG_ASSERT(A.height() >= A.width()); // @@ If height == width we could solve it directly...
+		const uint32_t D = A.width();
+		sparse::Matrix At(A.height(), A.width());
+		sparse::transpose(A, At);
+		FullVector Atb(D);
+		sparse::mult(At, b, Atb);
+		sparse::Matrix AtA(D);
+		sparse::mult(At, A, AtA);
+		return SymmetricSolver(AtA, Atb, x, epsilon);
+	}
+
+	// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
+	static bool LeastSquaresSolver(const sparse::Matrix &A, const FullVector &b, FullVector &x, const uint32_t *lockedParameters, uint32_t lockedCount, float epsilon = 1e-5f)
+	{
+		XA_DEBUG_ASSERT(A.width() == x.dimension());
+		XA_DEBUG_ASSERT(A.height() == b.dimension());
+		XA_DEBUG_ASSERT(A.height() >= A.width() - lockedCount);
+		// @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
+		const uint32_t D = A.width() - lockedCount;
+		XA_DEBUG_ASSERT(D > 0);
+		// Compute: b - Al * xl
+		FullVector b_Alxl(b);
+		for (uint32_t y = 0; y < A.height(); y++) {
+			const uint32_t count = A.getRow(y).size();
+			for (uint32_t e = 0; e < count; e++) {
+				uint32_t column = A.getRow(y)[e].x;
+				bool isFree = true;
+				for (uint32_t i = 0; i < lockedCount; i++) {
+					isFree &= (lockedParameters[i] != column);
+				}
+				if (!isFree) {
+					b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
+				}
+			}
+		}
+		// Remove locked columns from A.
+		sparse::Matrix Af(D, A.height());
+		for (uint32_t y = 0; y < A.height(); y++) {
+			const uint32_t count = A.getRow(y).size();
+			for (uint32_t e = 0; e < count; e++) {
+				uint32_t column = A.getRow(y)[e].x;
+				uint32_t ix = column;
+				bool isFree = true;
+				for (uint32_t i = 0; i < lockedCount; i++) {
+					isFree &= (lockedParameters[i] != column);
+					if (column > lockedParameters[i]) ix--; // shift columns
+				}
+				if (isFree) {
+					Af.setCoefficient(ix, y, A.getRow(y)[e].v);
+				}
+			}
+		}
+		// Remove elements from x
+		FullVector xf(D);
+		for (uint32_t i = 0, j = 0; i < A.width(); i++) {
+			bool isFree = true;
+			for (uint32_t l = 0; l < lockedCount; l++) {
+				isFree &= (lockedParameters[l] != i);
+			}
+			if (isFree) {
+				xf[j++] = x[i];
+			}
+		}
+		// Solve reduced system.
+		bool result = LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
+		// Copy results back to x.
+		for (uint32_t i = 0, j = 0; i < A.width(); i++) {
+			bool isFree = true;
+			for (uint32_t l = 0; l < lockedCount; l++) {
+				isFree &= (lockedParameters[l] != i);
+			}
+			if (isFree) {
+				x[i] = xf[j++];
+			}
+		}
+		return result;
+	}
+
+private:
+	/**
+	* Compute the solution of the sparse linear system Ab=x using the Conjugate
+	* Gradient method.
+	*
+	* Solving sparse linear systems:
+	* (1)		A·x = b
+	*
+	* The conjugate gradient algorithm solves (1) only in the case that A is
+	* symmetric and positive definite. It is based on the idea of minimizing the
+	* function
+	*
+	* (2)		f(x) = 1/2·x·A·x - b·x
+	*
+	* This function is minimized when its gradient
+	*
+	* (3)		df = A·x - b
+	*
+	* is zero, which is equivalent to (1). The minimization is carried out by
+	* generating a succession of search directions p.k and improved minimizers x.k.
+	* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k·p.k),
+	* and x.k+1 is set equal to the new point x.k + alfa.k·p.k. The p.k and x.k are
+	* built up in such a way that x.k+1 is also the minimizer of f over the whole
+	* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N
+	* iterations you arrive at the minimizer over the entire vector space, i.e., the
+	* solution to (1).
+	*
+	* For a really good explanation of the method see:
+	*
+	* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
+	* Jonhathan Richard Shewchuk.
+	*
+	**/
+	// Conjugate gradient with preconditioner.
+	static bool ConjugateGradientSolver(const JacobiPreconditioner &preconditioner, const sparse::Matrix &A, const FullVector &b, FullVector &x, float epsilon)
+	{
+		XA_DEBUG_ASSERT( A.isSquare() );
+		XA_DEBUG_ASSERT( A.width() == b.dimension() );
+		XA_DEBUG_ASSERT( A.width() == x.dimension() );
+		int i = 0;
+		const int D = A.width();
+		const int i_max = 4 * D;   // Convergence should be linear, but in some cases, it's not.
+		FullVector r(D);    // residual
+		FullVector p(D);    // search direction
+		FullVector q(D);    //
+		FullVector s(D);    // preconditioned
+		float delta_0;
+		float delta_old;
+		float delta_new;
+		float alpha;
+		float beta;
+		// r = b - A·x
+		sparse::copy(b, r);
+		sparse::sgemv(-1, A, x, 1, r);
+		// p = M^-1 · r
+		preconditioner.apply(r, p);
+		delta_new = sparse::dot(r, p);
+		delta_0 = delta_new;
+		while (i < i_max && delta_new > epsilon * epsilon * delta_0) {
+			i++;
+			// q = A·p
+			sparse::mult(A, p, q);
+			// alpha = delta_new / p·q
+			alpha = delta_new / sparse::dot(p, q);
+			// x = alfa·p + x
+			sparse::saxpy(alpha, p, x);
+			if ((i & 31) == 0) { // recompute r after 32 steps
+									// r = b - A·x
+				sparse::copy(b, r);
+				sparse::sgemv(-1, A, x, 1, r);
+			} else {
+				// r = r - alfa·q
+				sparse::saxpy(-alpha, q, r);
+			}
+			// s = M^-1 · r
+			preconditioner.apply(r, s);
+			delta_old = delta_new;
+			delta_new = sparse::dot( r, s );
+			beta = delta_new / delta_old;
+			// p = s + beta·p
+			sparse::scal(beta, p);
+			sparse::saxpy(1, s, p);
+		}
+		return delta_new <= epsilon * epsilon * delta_0;
+	}
+
+	static bool SymmetricSolver(const sparse::Matrix &A, const FullVector &b, FullVector &x, float epsilon = 1e-5f)
+	{
+		XA_DEBUG_ASSERT(A.height() == A.width());
+		XA_DEBUG_ASSERT(A.height() == b.dimension());
+		XA_DEBUG_ASSERT(b.dimension() == x.dimension());
+		JacobiPreconditioner jacobi(A, true);
+		return ConjugateGradientSolver(jacobi, A, b, x, epsilon);
+	}
+};
+
+// Fast sweep in 3 directions
+static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b)
+{
+	XA_DEBUG_ASSERT(a != nullptr);
+	XA_DEBUG_ASSERT(b != nullptr);
+	const uint32_t vertexCount = mesh->vertexCount();
+	uint32_t minVertex[3];
+	uint32_t maxVertex[3];
+	minVertex[0] = minVertex[1] = minVertex[2] = UINT32_MAX;
+	maxVertex[0] = maxVertex[1] = maxVertex[2] = UINT32_MAX;
+	for (uint32_t v = 1; v < vertexCount; v++) {
+		if (mesh->isBoundaryVertex(v)) {
+			minVertex[0] = minVertex[1] = minVertex[2] = v;
+			maxVertex[0] = maxVertex[1] = maxVertex[2] = v;
+			break;
+		}
+	}
+	if (minVertex[0] == UINT32_MAX) {
+		// Input mesh has not boundaries.
+		return false;
+	}
+	for (uint32_t v = 1; v < vertexCount; v++) {
+		if (!mesh->isBoundaryVertex(v)) {
+			// Skip interior vertices.
+			continue;
+		}
+		const Vector3 &pos = mesh->position(v);
+		if (pos.x < mesh->position(minVertex[0]).x)
+			minVertex[0] = v;
+		else if (pos.x > mesh->position(maxVertex[0]).x)
+			maxVertex[0] = v;
+		if (pos.y < mesh->position(minVertex[1]).y)
+			minVertex[1] = v;
+		else if (pos.y > mesh->position(maxVertex[1]).y)
+			maxVertex[1] = v;
+		if (pos.z < mesh->position(minVertex[2]).z)
+			minVertex[2] = v;
+		else if (pos.z > mesh->position(maxVertex[2]).z)
+			maxVertex[2] = v;
+	}
+	float lengths[3];
+	for (int i = 0; i < 3; i++) {
+		lengths[i] = length(mesh->position(minVertex[i]) - mesh->position(maxVertex[i]));
+	}
+	if (lengths[0] > lengths[1] && lengths[0] > lengths[2]) {
+		*a = minVertex[0];
+		*b = maxVertex[0];
+	} else if (lengths[1] > lengths[2]) {
+		*a = minVertex[1];
+		*b = maxVertex[1];
+	} else {
+		*a = minVertex[2];
+		*b = maxVertex[2];
+	}
+	return true;
+}
+
+// Conformal relations from Brecht Van Lommel (based on ABF):
+
+static float vec_angle_cos(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3)
+{
+	Vector3 d1 = v1 - v2;
+	Vector3 d2 = v3 - v2;
+	return clamp(dot(d1, d2) / (length(d1) * length(d2)), -1.0f, 1.0f);
+}
+
+static float vec_angle(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3)
+{
+	float dot = vec_angle_cos(v1, v2, v3);
+	return acosf(dot);
+}
+
+static void triangle_angles(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3, float *a1, float *a2, float *a3)
+{
+	*a1 = vec_angle(v3, v1, v2);
+	*a2 = vec_angle(v1, v2, v3);
+	*a3 = kPi - *a2 - *a1;
+}
+
+static void setup_abf_relations(sparse::Matrix &A, int row, int id0, int id1, int id2, const Vector3 &p0, const Vector3 &p1, const Vector3 &p2)
+{
+	// @@ IC: Wouldn't it be more accurate to return cos and compute 1-cos^2?
+	// It does indeed seem to be a little bit more robust.
+	// @@ Need to revisit this more carefully!
+	float a0, a1, a2;
+	triangle_angles(p0, p1, p2, &a0, &a1, &a2);
+	float s0 = sinf(a0);
+	float s1 = sinf(a1);
+	float s2 = sinf(a2);
+	if (s1 > s0 && s1 > s2) {
+		swap(s1, s2);
+		swap(s0, s1);
+		swap(a1, a2);
+		swap(a0, a1);
+		swap(id1, id2);
+		swap(id0, id1);
+	} else if (s0 > s1 && s0 > s2) {
+		swap(s0, s2);
+		swap(s0, s1);
+		swap(a0, a2);
+		swap(a0, a1);
+		swap(id0, id2);
+		swap(id0, id1);
+	}
+	float c0 = cosf(a0);
+	float ratio = (s2 == 0.0f) ? 1.0f : s1 / s2;
+	float cosine = c0 * ratio;
+	float sine = s0 * ratio;
+	// Note  : 2*id + 0 --> u
+	//         2*id + 1 --> v
+	int u0_id = 2 * id0 + 0;
+	int v0_id = 2 * id0 + 1;
+	int u1_id = 2 * id1 + 0;
+	int v1_id = 2 * id1 + 1;
+	int u2_id = 2 * id2 + 0;
+	int v2_id = 2 * id2 + 1;
+	// Real part
+	A.setCoefficient(u0_id, 2 * row + 0, cosine - 1.0f);
+	A.setCoefficient(v0_id, 2 * row + 0, -sine);
+	A.setCoefficient(u1_id, 2 * row + 0, -cosine);
+	A.setCoefficient(v1_id, 2 * row + 0, sine);
+	A.setCoefficient(u2_id, 2 * row + 0, 1);
+	// Imaginary part
+	A.setCoefficient(u0_id, 2 * row + 1, sine);
+	A.setCoefficient(v0_id, 2 * row + 1, cosine - 1.0f);
+	A.setCoefficient(u1_id, 2 * row + 1, -sine);
+	A.setCoefficient(v1_id, 2 * row + 1, -cosine);
+	A.setCoefficient(v2_id, 2 * row + 1, 1);
+}
+
+static bool computeLeastSquaresConformalMap(Mesh *mesh)
+{
+	// For this to work properly, mesh should not have colocals that have the same
+	// attributes, unless you want the vertices to actually have different texcoords.
+	const uint32_t vertexCount = mesh->vertexCount();
+	const uint32_t D = 2 * vertexCount;
+	const uint32_t N = 2 * mesh->faceCount();
+	// N is the number of equations (one per triangle)
+	// D is the number of variables (one per vertex; there are 2 pinned vertices).
+	if (N < D - 4) {
+		return false;
+	}
+	sparse::Matrix A(D, N);
+	FullVector b(N);
+	FullVector x(D);
+	// Fill b:
+	b.fill(0.0f);
+	// Fill x:
+	uint32_t v0, v1;
+	if (!findApproximateDiameterVertices(mesh, &v0, &v1)) {
+		// Mesh has no boundaries.
+		return false;
+	}
+	if (mesh->texcoord(v0) == mesh->texcoord(v1)) {
+		// LSCM expects an existing parameterization.
+		return false;
+	}
+	for (uint32_t v = 0; v < vertexCount; v++) {
+		// Initial solution.
+		x[2 * v + 0] = mesh->texcoord(v).x;
+		x[2 * v + 1] = mesh->texcoord(v).y;
+	}
+	// Fill A:
+	const uint32_t faceCount = mesh->faceCount();
+	for (uint32_t f = 0, t = 0; f < faceCount; f++) {
+		const uint32_t vertex0 = mesh->vertexAt(f * 3 + 0);
+		const uint32_t vertex1 = mesh->vertexAt(f * 3 + 1);
+		const uint32_t vertex2 = mesh->vertexAt(f * 3 + 2);
+		setup_abf_relations(A, t, vertex0, vertex1, vertex2, mesh->position(vertex0), mesh->position(vertex1), mesh->position(vertex2));
+		t++;
+	}
+	const uint32_t lockedParameters[] = {
+		2 * v0 + 0,
+		2 * v0 + 1,
+		2 * v1 + 0,
+		2 * v1 + 1
+	};
+	// Solve
+	Solver::LeastSquaresSolver(A, b, x, lockedParameters, 4, 0.000001f);
+	// Map x back to texcoords:
+	for (uint32_t v = 0; v < vertexCount; v++)
+		mesh->texcoord(v) = Vector2(x[2 * v + 0], x[2 * v + 1]);
+	return true;
+}
+
+static bool computeOrthogonalProjectionMap(Mesh *mesh)
+{
+	uint32_t vertexCount = mesh->vertexCount();
+	// Avoid redundant computations.
+	float matrix[6];
+	Fit::computeCovariance(vertexCount, &mesh->position(0), matrix);
+	if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0)
+		return false;
+	float eigenValues[3];
+	Vector3 eigenVectors[3];
+	if (!Fit::eigenSolveSymmetric3(matrix, eigenValues, eigenVectors))
+		return false;
+	Vector3 axis[2];
+	axis[0] = normalize(eigenVectors[0], kEpsilon);
+	axis[1] = normalize(eigenVectors[1], kEpsilon);
+	// Project vertices to plane.
+	for (uint32_t i = 0; i < vertexCount; i++)
+		mesh->texcoord(i) = Vector2(dot(axis[0], mesh->position(i)), dot(axis[1], mesh->position(i)));
+	return true;
+}
+
 // Estimate quality of existing parameterization.
 struct ParameterizationQuality
 {
@@ -5578,15 +5555,15 @@ struct ParameterizationQuality
 	bool boundaryIntersection = false;
 };
 
-static ParameterizationQuality calculateParameterizationQuality(const Mesh *mesh, Array<uint32_t> *flippedFaces)
+static ParameterizationQuality calculateParameterizationQuality(const Mesh *mesh, uint32_t faceCount, Array<uint32_t> *flippedFaces)
 {
 	XA_DEBUG_ASSERT(mesh != nullptr);
 	ParameterizationQuality quality;
-	const uint32_t faceCount = mesh->faceCount();
 	uint32_t firstBoundaryEdge = UINT32_MAX;
 	for (uint32_t e = 0; e < mesh->edgeCount(); e++) {
 		if (mesh->isBoundaryEdge(e)) {
 			firstBoundaryEdge = e;
+			break;
 		}
 	}
 	XA_DEBUG_ASSERT(firstBoundaryEdge != UINT32_MAX);
@@ -5681,7 +5658,8 @@ static ParameterizationQuality calculateParameterizationQuality(const Mesh *mesh
 		// If more than half the triangles are flipped, reverse the flipped / not flipped classification.
 		quality.flippedTriangleCount = quality.totalTriangleCount - quality.flippedTriangleCount;
 		if (flippedFaces) {
-			Array<uint32_t> temp(*flippedFaces);
+			Array<uint32_t> temp;
+			flippedFaces->copyTo(temp);
 			flippedFaces->clear();
 			for (uint32_t f = 0; f < faceCount; f++) {
 				bool match = false;
@@ -5732,28 +5710,36 @@ struct ChartWarningFlags
 class Chart
 {
 public:
-	Chart(const Mesh *originalMesh, const Array<uint32_t> &faceArray, const Basis &basis, uint32_t meshId, uint32_t chartGroupId, uint32_t chartId) : m_basis(basis), m_mesh(nullptr), m_unifiedMesh(nullptr), m_isDisk(false), m_isOrtho(false), m_isPlanar(false), m_warningFlags(0), m_closedHolesCount(0), m_fixedTJunctionsCount(0), m_faceArray(faceArray)
+	Chart(const segment::Atlas *atlas, const Mesh *originalMesh, uint32_t chartIndex, uint32_t meshId, uint32_t chartGroupId, uint32_t chartId) : m_mesh(nullptr), m_unifiedMesh(nullptr), m_isDisk(false), m_isOrtho(false), m_isPlanar(false), m_warningFlags(0), m_closedHolesCount(0), m_fixedTJunctionsCount(0)
 	{
 		XA_UNUSED(meshId);
 		XA_UNUSED(chartGroupId);
 		XA_UNUSED(chartId);
+		m_basis = atlas->chartBasis(chartIndex);
+		atlas->chartFaces(chartIndex).copyTo(m_faceArray);
 		// Copy face indices.
-		m_mesh = XA_NEW(MemTag::Mesh, Mesh, originalMesh->epsilon(), faceArray.size() * 3, faceArray.size());
-		m_unifiedMesh = XA_NEW(MemTag::Mesh, Mesh, originalMesh->epsilon(), faceArray.size() * 3, faceArray.size());
+		m_mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, originalMesh->epsilon(), m_faceArray.size() * 3, m_faceArray.size());
+		m_unifiedMesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, originalMesh->epsilon(), m_faceArray.size() * 3, m_faceArray.size());
 		Array<uint32_t> chartMeshIndices;
-		chartMeshIndices.resize(originalMesh->vertexCount(), (uint32_t)~0);
+		chartMeshIndices.resize(originalMesh->vertexCount());
+		chartMeshIndices.setAll(UINT32_MAX);
 		Array<uint32_t> unifiedMeshIndices;
-		unifiedMeshIndices.resize(originalMesh->vertexCount(), (uint32_t)~0);
+		unifiedMeshIndices.resize(originalMesh->vertexCount());
+		unifiedMeshIndices.setAll(UINT32_MAX);
 		// Add vertices.
-		const uint32_t faceCount = faceArray.size();
+		const uint32_t faceCount = m_initialFaceCount = m_faceArray.size();
 		for (uint32_t f = 0; f < faceCount; f++) {
 			for (uint32_t i = 0; i < 3; i++) {
-				const uint32_t vertex = originalMesh->vertexAt(faceArray[f] * 3 + i);
+				const uint32_t vertex = originalMesh->vertexAt(m_faceArray[f] * 3 + i);
 				const uint32_t unifiedVertex = originalMesh->firstColocal(vertex);
 				if (unifiedMeshIndices[unifiedVertex] == (uint32_t)~0) {
 					unifiedMeshIndices[unifiedVertex] = m_unifiedMesh->vertexCount();
 					XA_DEBUG_ASSERT(equal(originalMesh->position(vertex), originalMesh->position(unifiedVertex), originalMesh->epsilon()));
+#if XA_SKIP_PARAMETERIZATION
+					m_unifiedMesh->addVertex(originalMesh->position(vertex), Vector3(0.0f), atlas->faceTexcoords(m_faceArray[f])[i]);
+#else
 					m_unifiedMesh->addVertex(originalMesh->position(vertex));
+#endif
 				}
 				if (chartMeshIndices[vertex] == (uint32_t)~0) {
 					chartMeshIndices[vertex] = m_mesh->vertexCount();
@@ -5767,7 +5753,7 @@ public:
 		for (uint32_t f = 0; f < faceCount; f++) {
 			uint32_t indices[3], unifiedIndices[3];
 			for (uint32_t i = 0; i < 3; i++) {
-				const uint32_t vertex = originalMesh->vertexAt(faceArray[f] * 3 + i);
+				const uint32_t vertex = originalMesh->vertexAt(m_faceArray[f] * 3 + i);
 				indices[i] = chartMeshIndices[vertex];
 				unifiedIndices[i] = unifiedMeshIndices[originalMesh->firstColocal(vertex)];
 			}
@@ -5810,6 +5796,7 @@ public:
 				m_unifiedMesh = fixedUnifiedMesh;
 				m_unifiedMesh->createBoundaries();
 				m_unifiedMesh->linkBoundaries();
+				m_initialFaceCount = m_unifiedMesh->faceCount(); // Fixing t-junctions rewrites faces.
 			}
 			// See if there are any holes that need closing.
 			Array<uint32_t> boundaryLoops;
@@ -5825,7 +5812,7 @@ public:
 				// - Use minimal spanning trees or seamster.
 				Array<uint32_t> holeFaceCounts;
 				XA_PROFILE_START(closeChartMeshHoles)
-				failed = !meshCloseHoles(m_unifiedMesh, boundaryLoops, basis.normal, holeFaceCounts);
+				failed = !meshCloseHoles(m_unifiedMesh, boundaryLoops, m_basis.normal, holeFaceCounts);
 				XA_PROFILE_END(closeChartMeshHoles)
 				m_unifiedMesh->createBoundaries();
 				m_unifiedMesh->linkBoundaries();
@@ -5907,7 +5894,7 @@ public:
 	void evaluateOrthoParameterizationQuality()
 	{
 		XA_PROFILE_START(parameterizeChartsEvaluateQuality)
-		m_paramQuality = calculateParameterizationQuality(m_unifiedMesh, nullptr);
+		m_paramQuality = calculateParameterizationQuality(m_unifiedMesh, m_initialFaceCount, nullptr);
 		XA_PROFILE_END(parameterizeChartsEvaluateQuality)
 		// Use orthogonal parameterization if quality is acceptable.
 		if (!m_paramQuality.boundaryIntersection && m_paramQuality.geometricArea > 0.0f && m_paramQuality.stretchMetric <= 1.1f && m_paramQuality.maxStretchMetric <= 1.25f)
@@ -5918,9 +5905,9 @@ public:
 	{
 		XA_PROFILE_START(parameterizeChartsEvaluateQuality)
 #if XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION
-		m_paramQuality = calculateParameterizationQuality(m_unifiedMesh, &m_paramFlippedFaces);
+		m_paramQuality = calculateParameterizationQuality(m_unifiedMesh, m_initialFaceCount, &m_paramFlippedFaces);
 #else
-		m_paramQuality = calculateParameterizationQuality(m_unifiedMesh, nullptr);
+		m_paramQuality = calculateParameterizationQuality(m_unifiedMesh, m_initialFaceCount, nullptr);
 #endif
 		XA_PROFILE_END(parameterizeChartsEvaluateQuality)
 	}
@@ -5961,6 +5948,7 @@ private:
 	Mesh *m_unifiedMesh;
 	bool m_isDisk, m_isOrtho, m_isPlanar;
 	uint32_t m_warningFlags;
+	uint32_t m_initialFaceCount; // Before fixing T-junctions and/or closing holes.
 	uint32_t m_closedHolesCount, m_fixedTJunctionsCount;
 
 	// List of faces of the original mesh that belong to this chart.
@@ -5979,9 +5967,9 @@ private:
 
 struct CreateChartTaskArgs
 {
+	const segment::Atlas *atlas;
 	const Mesh *mesh;
-	const Array<uint32_t> *faceArray;
-	const Basis *basis;
+	uint32_t chartIndex; // In the atlas.
 	uint32_t meshId;
 	uint32_t chartGroupId;
 	uint32_t chartId;
@@ -5992,7 +5980,7 @@ static void runCreateChartTask(void *userData)
 {
 	XA_PROFILE_START(createChartMeshesThread)
 	auto args = (CreateChartTaskArgs *)userData;
-	*(args->chart) = XA_NEW(MemTag::Default, Chart, args->mesh, *(args->faceArray), *(args->basis), args->meshId, args->chartGroupId, args->chartId);
+	*(args->chart) = XA_NEW_ARGS(MemTag::Default, Chart, args->atlas, args->mesh, args->chartIndex, args->meshId, args->chartGroupId, args->chartId);
 	XA_PROFILE_END(createChartMeshesThread)
 }
 
@@ -6043,10 +6031,11 @@ public:
 		}
 		// Only initial meshes have face groups and ignored faces. The only flag we care about is HasNormals.
 		const uint32_t faceCount = m_faceToSourceFaceMap.size();
-		m_mesh = XA_NEW(MemTag::Mesh, Mesh, sourceMesh->epsilon(), faceCount * 3, faceCount, sourceMesh->flags() & MeshFlags::HasNormals);
+		m_mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, sourceMesh->epsilon(), faceCount * 3, faceCount, sourceMesh->flags() & MeshFlags::HasNormals);
 		XA_DEBUG_ASSERT(faceCount > 0);
 		Array<uint32_t> meshIndices;
-		meshIndices.resize(sourceMesh->vertexCount(), (uint32_t)~0);
+		meshIndices.resize(sourceMesh->vertexCount());
+		meshIndices.setAll((uint32_t)~0);
 		for (uint32_t f = 0; f < faceCount; f++) {
 			const uint32_t face = m_faceToSourceFaceMap[f];
 			for (uint32_t i = 0; i < 3; i++) {
@@ -6183,22 +6172,22 @@ public:
 		chartFaces.resize(m_mesh->faceCount());
 		for (uint32_t i = 0; i < chartFaces.size(); i++)
 			chartFaces[i] = i;
-		Chart *chart = XA_NEW(MemTag::Default, Chart, m_mesh, chartFaces, m_sourceId, m_id, 0);
+		Chart *chart = XA_NEW_ARGS(MemTag::Default, Chart, m_mesh, chartFaces, m_sourceId, m_id, 0);
 		m_chartArray.push_back(chart);
 #else
-		XA_PROFILE_START(atlasBuilder)
-		AtlasBuilder builder(m_mesh, nullptr, options);
-		runAtlasBuilder(builder, options);
-		XA_PROFILE_END(atlasBuilder)
-		const uint32_t chartCount = builder.chartCount();
+		XA_PROFILE_START(buildAtlas)
+		segment::Atlas atlas(m_mesh, nullptr, options);
+		buildAtlas(atlas, options);
+		XA_PROFILE_END(buildAtlas)
+		const uint32_t chartCount = atlas.chartCount();
 		m_chartArray.resize(chartCount);
 		Array<CreateChartTaskArgs> taskArgs;
 		taskArgs.resize(chartCount);
 		for (uint32_t i = 0; i < chartCount; i++) {
 			CreateChartTaskArgs &args = taskArgs[i];
+			args.atlas = &atlas;
 			args.mesh = m_mesh;
-			args.faceArray = &builder.chartFaces(i);
-			args.basis = &builder.chartBasis(i);
+			args.chartIndex = i;
 			args.meshId = m_sourceId;
 			args.chartGroupId = m_id;
 			args.chartId = i;
@@ -6239,6 +6228,16 @@ public:
 	void parameterizeCharts(TaskScheduler *taskScheduler, ParameterizeFunc func)
 	{
 		const uint32_t chartCount = m_chartArray.size();
+#if XA_SKIP_PARAMETERIZATION
+		XA_UNUSED(taskScheduler);
+		XA_UNUSED(func);
+		for (uint32_t i = 0; i < chartCount; i++) {
+			Chart *chart = m_chartArray[i];
+			chart->evaluateOrthoParameterizationQuality();
+			chart->evaluateParameterizationQuality();
+			chart->transferParameterization();
+		}
+#else
 		Array<ParameterizeChartTaskArgs> taskArgs;
 		taskArgs.resize(chartCount);
 		TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(chartCount);
@@ -6279,10 +6278,10 @@ public:
 			options.maxChartArea = invalidChartArea * 0.2f;
 			options.maxThreshold = 0.25f;
 			options.maxIterations = 3;
-			AtlasBuilder builder(m_mesh, &meshFaces, options);
-			runAtlasBuilder(builder, options);
-			for (uint32_t j = 0; j < builder.chartCount(); j++) {
-				Chart *chart = XA_NEW(MemTag::Default, Chart, m_mesh, builder.chartFaces(j), builder.chartBasis(j), m_sourceId, m_id, m_chartArray.size());
+			segment::Atlas atlas(m_mesh, &meshFaces, options);
+			buildAtlas(atlas, options);
+			for (uint32_t j = 0; j < atlas.chartCount(); j++) {
+				Chart *chart = XA_NEW_ARGS(MemTag::Default, Chart, &atlas, m_mesh, j, m_sourceId, m_id, m_chartArray.size());
 				m_chartArray.push_back(chart);
 				m_paramAddedChartsCount++;
 			}
@@ -6325,46 +6324,41 @@ public:
 			XA_FREE(chart);
 			m_paramDeletedChartsCount++;
 		}
-#endif
+#endif // XA_RECOMPUTE_CHARTS
+#endif // XA_SKIP_PARAMETERIZATION
 	}
 
 private:
-	void runAtlasBuilder(AtlasBuilder &builder, const ChartOptions &options)
+	void buildAtlas(segment::Atlas &atlas, const ChartOptions &options)
 	{
-		if (builder.facesLeft() == 0)
+		if (atlas.facesLeft() == 0)
 			return;
-		// This seems a reasonable estimate.
-		XA_PROFILE_START(atlasBuilderCreateInitialCharts)
 		// Create initial charts greedely.
-		builder.placeSeeds(options.maxThreshold * 0.5f);
+		atlas.placeSeeds(options.maxThreshold * 0.5f);
 		if (options.maxIterations == 0) {
-			XA_DEBUG_ASSERT(builder.facesLeft() == 0);
-			XA_PROFILE_END(atlasBuilderCreateInitialCharts)
+			XA_DEBUG_ASSERT(atlas.facesLeft() == 0);
 			return;
 		}
-		builder.updateProxies();
-		builder.relocateSeeds();
-		builder.resetCharts();
-		XA_PROFILE_END(atlasBuilderCreateInitialCharts)
+		atlas.relocateSeeds();
+		atlas.resetCharts();
 		// Restart process growing charts in parallel.
 		uint32_t iteration = 0;
 		while (true) {
-			if (!builder.growCharts(options.maxThreshold, options.growFaceCount)) {
+			if (!atlas.growCharts(options.maxThreshold)) {
 				// If charts cannot grow more: fill holes, merge charts, relocate seeds and start new iteration.
-				builder.fillHoles(options.maxThreshold * 0.5f);
-				builder.updateProxies();
+				atlas.fillHoles(options.maxThreshold * 0.5f);
 #if XA_MERGE_CHARTS
-				builder.mergeCharts();
+				atlas.mergeCharts();
 #endif
 				if (++iteration == options.maxIterations)
 					break;
-				if (!builder.relocateSeeds())
+				if (!atlas.relocateSeeds())
 					break;
-				builder.resetCharts();
+				atlas.resetCharts();
 			}
 		}
 		// Make sure no holes are left!
-		XA_DEBUG_ASSERT(builder.facesLeft() == 0);
+		XA_DEBUG_ASSERT(atlas.facesLeft() == 0);
 	}
 
 	void removeChart(const Chart *chart)
@@ -6400,7 +6394,7 @@ static void runCreateChartGroupTask(void *userData)
 {
 	XA_PROFILE_START(addMeshCreateChartGroupsThread)
 	auto args = (CreateChartGroupTaskArgs *)userData;
-	*(args->chartGroup) = XA_NEW(MemTag::Default, ChartGroup, args->groupId, args->mesh, args->faceGroup);
+	*(args->chartGroup) = XA_NEW_ARGS(MemTag::Default, ChartGroup, args->groupId, args->mesh, args->faceGroup);
 	XA_PROFILE_END(addMeshCreateChartGroupsThread)
 }
 
@@ -6448,7 +6442,7 @@ static void runParameterizeChartsJob(void *userData)
 class Atlas
 {
 public:
-	Atlas() : m_chartsComputed(false), m_chartsParameterized(false) {}
+	Atlas() : m_meshCount(0), m_chartsComputed(false), m_chartsParameterized(false) {}
 
 	~Atlas()
 	{
@@ -6460,6 +6454,8 @@ public:
 
 	bool chartsComputed() const { return m_chartsComputed; }
 	bool chartsParameterized() const { return m_chartsParameterized; }
+	uint32_t chartGroupCount() const { return m_chartGroups.size(); }
+	const ChartGroup *chartGroupAt(uint32_t index) const { return m_chartGroups[index]; }
 
 	uint32_t chartGroupCount(uint32_t mesh) const
 	{
@@ -6483,26 +6479,6 @@ public:
 		return nullptr;
 	}
 
-	uint32_t chartCount() const
-	{
-		uint32_t count = 0;
-		for (uint32_t i = 0; i < m_chartGroups.size(); i++)
-			count += m_chartGroups[i]->chartCount();
-		return count;
-	}
-
-	Chart *chartAt(uint32_t i)
-	{
-		for (uint32_t c = 0; c < m_chartGroups.size(); c++) {
-			uint32_t count = m_chartGroups[c]->chartCount();
-			if (i < count) {
-				return m_chartGroups[c]->chartAt(i);
-			}
-			i -= count;
-		}
-		return nullptr;
-	}
-
 	// This function is thread safe.
 	void addMesh(TaskScheduler *taskScheduler, const Mesh *mesh)
 	{
@@ -6548,9 +6524,32 @@ public:
 			m_chartGroups.push_back(chartGroups[g]);
 			m_chartGroupSourceMeshes.push_back(mesh->id());
 		}
+		m_meshCount++;
 		m_addMeshMutex.unlock();
 	}
 
+	// Chart id/index is determined by depth-first hierarchy of mesh -> chart group -> chart.
+	// For chart index to be consistent here, chart groups needs to sorted by mesh index. Since addMesh is called by multithreaded tasks, order is indeterminate, so chart groups need to be explicitly sorted after all meshes are added.
+	void sortChartGroups()
+	{
+		Array<ChartGroup *> oldChartGroups;
+		oldChartGroups.resize(m_chartGroups.size());
+		memcpy(oldChartGroups.data(), m_chartGroups.data(), sizeof(ChartGroup *) * m_chartGroups.size());
+		Array<uint32_t> oldChartGroupSourceMeshes;
+		oldChartGroupSourceMeshes.resize(m_chartGroupSourceMeshes.size());
+		memcpy(oldChartGroupSourceMeshes.data(), m_chartGroupSourceMeshes.data(), sizeof(uint32_t) * m_chartGroupSourceMeshes.size());
+		uint32_t current = 0;
+		for (uint32_t i = 0; i < m_meshCount; i++) {
+			for (uint32_t j = 0; j < oldChartGroups.size(); j++) {
+				if (oldChartGroupSourceMeshes[j] == i) {
+					m_chartGroups[current] = oldChartGroups[j];
+					m_chartGroupSourceMeshes[current] = oldChartGroupSourceMeshes[j];
+					current++;
+				}
+			}
+		}
+	}
+
 	bool computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, ProgressFunc progressFunc, void *progressUserData)
 	{
 		m_chartsComputed = false;
@@ -6629,37 +6628,18 @@ public:
 		taskScheduler->wait(&taskGroup);
 		if (progress.cancel)
 			return false;
-		// Save original texcoords so PackCharts can be called multiple times (packing overwrites the texcoords).
-		const uint32_t nCharts = chartCount();
-		m_originalChartTexcoords.resize(nCharts);
-		for (uint32_t i = 0; i < nCharts; i++) {
-			const Mesh *mesh = chartAt(i)->mesh();
-			m_originalChartTexcoords[i].resize(mesh->vertexCount());
-			for (uint32_t j = 0; j < mesh->vertexCount(); j++)
-				m_originalChartTexcoords[i][j] = mesh->texcoord(j);
-		}
 		m_chartsParameterized = true;
 		return true;
 	}
 
-	void restoreOriginalChartTexcoords()
-	{
-		const uint32_t nCharts = chartCount();
-		for (uint32_t i = 0; i < nCharts; i++) {
-			Mesh *mesh = chartAt(i)->mesh();
-			for (uint32_t j = 0; j < mesh->vertexCount(); j++)
-				mesh->texcoord(j) = m_originalChartTexcoords[i][j];
-		}
-	}
-
 private:
 	std::mutex m_addMeshMutex;
+	uint32_t m_meshCount;
 	bool m_chartsComputed;
 	bool m_chartsParameterized;
 	Array<ChartGroup *> m_chartGroups;
 	RadixSort m_chartGroupsRadix; // By mesh indexCount.
 	Array<uint32_t> m_chartGroupSourceMeshes;
-	Array<Array<Vector2> > m_originalChartTexcoords;
 };
 
 } // namespace param
@@ -6733,10 +6713,10 @@ public:
 			memcpy(&data[y * width], &m_data[y * m_width], min(m_width, width) * sizeof(uint32_t));
 		m_width = width;
 		m_height = height;
-		swap(m_data, data);
+		data.moveTo(m_data);
 	}
 
-	void addChart(uint32_t chartIndex, const BitImage *image, bool imageHasPadding, int atlas_w, int atlas_h, int offset_x, int offset_y)
+	void addChart(uint32_t chartIndex, const BitImage *image, const BitImage *imageBilinear, const BitImage *imagePadding, int atlas_w, int atlas_h, int offset_x, int offset_y)
 	{
 		const int w = image->width();
 		const int h = image->height();
@@ -6746,23 +6726,27 @@ public:
 				continue;
 			for (int x = 0; x < w; x++) {
 				const int xx = x + offset_x;
-				if (xx >= 0 && xx < atlas_w && yy < atlas_h && image->bitAt(x, y)) {
+				if (xx >= 0 && xx < atlas_w && yy < atlas_h) {
 					const uint32_t dataOffset = xx + yy * m_width;
-					if (m_data[dataOffset] != 0)
-						continue;
-					uint32_t value = chartIndex | kImageHasChartIndexBit;
-					if (imageHasPadding)
-						value |= kImageIsPaddingBit;
-					m_data[dataOffset] = value;
+					if (image->bitAt(x, y)) {
+						XA_DEBUG_ASSERT(m_data[dataOffset] == 0);
+						m_data[dataOffset] = chartIndex | kImageHasChartIndexBit;
+					} else if (imageBilinear && imageBilinear->bitAt(x, y)) {
+						XA_DEBUG_ASSERT(m_data[dataOffset] == 0);
+						m_data[dataOffset] = chartIndex | kImageHasChartIndexBit | kImageIsBilinearBit;
+					} else if (imagePadding && imagePadding->bitAt(x, y)) {
+						XA_DEBUG_ASSERT(m_data[dataOffset] == 0);
+						m_data[dataOffset] = chartIndex | kImageHasChartIndexBit | kImageIsPaddingBit;
+					}
 				}
 			}
 		}
 	}
 
-	void copyTo(uint32_t *dest, uint32_t destWidth, uint32_t destHeight) const
+	void copyTo(uint32_t *dest, uint32_t destWidth, uint32_t destHeight, int padding) const
 	{
 		for (uint32_t y = 0; y < destHeight; y++)
-			memcpy(&dest[y * destWidth], &m_data[y * m_width], destWidth * sizeof(uint32_t));
+			memcpy(&dest[y * destWidth], &m_data[padding + (y + padding) * m_width], destWidth * sizeof(uint32_t));
 	}
 
 #if XA_DEBUG_EXPORT_ATLAS_IMAGES
@@ -6777,20 +6761,26 @@ public:
 				if (x >= m_width)
 					continue;
 				const uint32_t data = m_data[x + y * m_width];
-				if (!(data & kImageHasChartIndexBit))
+				uint8_t *bgr = &image[(x + y * width) * 3];
+				if (data == 0) {
+					bgr[0] = bgr[1] = bgr[2] = 0;
 					continue;
+				}
 				const uint32_t chartIndex = data & kImageChartIndexMask;
-				uint8_t *color = &image[(x + y * width) * 3];
 				if (data & kImageIsPaddingBit) {
-					color[0] = 255;
-					color[1] = 0;
-					color[2] = 255;
+					bgr[0] = 0;
+					bgr[1] = 0;
+					bgr[2] = 255;
+				} else if (data & kImageIsBilinearBit) {
+					bgr[0] = 0;
+					bgr[1] = 255;
+					bgr[2] = 0;
 				} else {
 					const int mix = 192;
 					srand((unsigned int)chartIndex);
-					color[0] = uint8_t((rand() % 255 + mix) * 0.5f);
-					color[1] = uint8_t((rand() % 255 + mix) * 0.5f);
-					color[2] = uint8_t((rand() % 255 + mix) * 0.5f);
+					bgr[0] = uint8_t((rand() % 255 + mix) * 0.5f);
+					bgr[1] = uint8_t((rand() % 255 + mix) * 0.5f);
+					bgr[2] = uint8_t((rand() % 255 + mix) * 0.5f);
 				}
 			}
 		}
@@ -6817,11 +6807,61 @@ struct Chart
 	bool allowRotate;
 	// bounding box
 	Vector2 majorAxis, minorAxis, minCorner, maxCorner;
+	// UvMeshChart only
+	Array<uint32_t> faces;
 
 	Vector2 &uniqueVertexAt(uint32_t v) { return uniqueVertices.isEmpty() ? vertices[v] : vertices[uniqueVertices[v]]; }
 	uint32_t uniqueVertexCount() const { return uniqueVertices.isEmpty() ? vertexCount : uniqueVertices.size(); }
 };
 
+struct AddChartTaskArgs
+{
+	param::Chart *paramChart;
+	Chart *chart; // out
+};
+
+static void runAddChartTask(void *userData)
+{
+	XA_PROFILE_START(packChartsAddChartsThread)
+	auto args = (AddChartTaskArgs *)userData;
+	param::Chart *paramChart = args->paramChart;
+	XA_PROFILE_START(packChartsAddChartsRestoreTexcoords)
+	paramChart->transferParameterization();
+	XA_PROFILE_END(packChartsAddChartsRestoreTexcoords)
+	Mesh *mesh = paramChart->mesh();
+	Chart *chart = args->chart = XA_NEW(MemTag::Default, Chart);
+	chart->atlasIndex = -1;
+	chart->material = 0;
+	chart->indexCount = mesh->indexCount();
+	chart->indices = mesh->indices();
+	chart->parametricArea = paramChart->computeParametricArea();
+	if (chart->parametricArea < kAreaEpsilon) {
+		// When the parametric area is too small we use a rough approximation to prevent divisions by very small numbers.
+		const Vector2 bounds = paramChart->computeParametricBounds();
+		chart->parametricArea = bounds.x * bounds.y;
+	}
+	chart->surfaceArea = paramChart->computeSurfaceArea();
+	chart->vertices = mesh->texcoords();
+	chart->vertexCount = mesh->vertexCount();
+	chart->allowRotate = true;
+	// Compute list of boundary vertices.
+	Array<Vector2> boundary;
+	boundary.reserve(16);
+	for (uint32_t v = 0; v < chart->vertexCount; v++) {
+		if (mesh->isBoundaryVertex(v))
+			boundary.push_back(mesh->texcoord(v));
+	}
+	XA_DEBUG_ASSERT(boundary.size() > 0);
+	// Compute bounding box of chart.
+	static thread_local BoundingBox2D boundingBox;
+	boundingBox.compute(boundary.data(), boundary.size(), mesh->texcoords(), mesh->vertexCount());
+	chart->majorAxis = boundingBox.majorAxis();
+	chart->minorAxis = boundingBox.minorAxis();
+	chart->minCorner = boundingBox.minCorner();
+	chart->maxCorner = boundingBox.maxCorner();
+	XA_PROFILE_END(packChartsAddChartsThread)
+}
+
 struct FindChartLocationBruteForceTaskArgs
 {
 	std::atomic<bool> *finished; // One of the tasks found a location that doesn't expand the atlas.
@@ -6830,7 +6870,8 @@ struct FindChartLocationBruteForceTaskArgs
 	const BitImage *chartBitImage;
 	const BitImage *chartBitImageRotated;
 	int w, h;
-	bool blockAligned, resizableAtlas, allowRotate;
+	bool blockAligned, allowRotate;
+	uint32_t maxResolution;
 	// out
 	bool best_insideAtlas;
 	int best_metric, best_x, best_y, best_w, best_h, best_r;
@@ -6845,6 +6886,8 @@ static void runFindChartLocationBruteForceTask(void *userData)
 		return;
 	// Try two different orientations.
 	for (int r = 0; r < 2; r++) {
+		if (args->finished->load())
+			break;
 		int cw = args->chartBitImage->width();
 		int ch = args->chartBitImage->height();
 		if (r == 1) {
@@ -6855,8 +6898,8 @@ static void runFindChartLocationBruteForceTask(void *userData)
 		}
 		const int y = args->startPosition.y;
 		const int stepSize = args->blockAligned ? 4 : 1;
-		for (int x = args->startPosition.x; x <= args->w + stepSize; x += stepSize) { // + 1 not really necessary here.
-			if (!args->resizableAtlas && (x > (int)args->atlasBitImage->width() - cw || y > (int)args->atlasBitImage->height() - ch))
+		for (int x = args->startPosition.x; x <= args->w + stepSize; x += stepSize) {
+			if (args->maxResolution > 0 && (x > (int)args->maxResolution - cw || y > (int)args->maxResolution - ch))
 				continue;
 			if (args->finished->load())
 				break;
@@ -6891,6 +6934,10 @@ struct Atlas
 {
 	~Atlas()
 	{
+		for (uint32_t i = 0; i < m_atlasImages.size(); i++) {
+			m_atlasImages[i]->~AtlasImage();
+			XA_FREE(m_atlasImages[i]);
+		}
 		for (uint32_t i = 0; i < m_bitImages.size(); i++) {
 			m_bitImages[i]->~BitImage();
 			XA_FREE(m_bitImages[i]);
@@ -6910,39 +6957,44 @@ struct Atlas
 	const Array<AtlasImage *> &getImages() const { return m_atlasImages; }
 	float getUtilization(uint32_t atlas) const { return m_utilization[atlas]; }
 
-	void addChart(param::Chart *paramChart)
+	void addCharts(TaskScheduler *taskScheduler, param::Atlas *paramAtlas)
 	{
-		Mesh *mesh = paramChart->mesh();
-		Chart *chart = XA_NEW(MemTag::Default, Chart);
-		chart->atlasIndex = -1;
-		chart->material = 0;
-		chart->indexCount = mesh->indexCount();
-		chart->indices = mesh->indices();
-		chart->parametricArea = paramChart->computeParametricArea();
-		if (chart->parametricArea < kAreaEpsilon) {
-			// When the parametric area is too small we use a rough approximation to prevent divisions by very small numbers.
-			const Vector2 bounds = paramChart->computeParametricBounds();
-			chart->parametricArea = bounds.x * bounds.y;
-		}
-		chart->surfaceArea = paramChart->computeSurfaceArea();
-		chart->vertices = mesh->texcoords();
-		chart->vertexCount = mesh->vertexCount();
-		chart->allowRotate = true;
-		// Compute list of boundary vertices.
-		Array<Vector2> boundary;
-		boundary.reserve(16);
-		for (uint32_t v = 0; v < chart->vertexCount; v++) {
-			if (mesh->isBoundaryVertex(v))
-				boundary.push_back(mesh->texcoord(v));
+		// Count charts.
+		uint32_t chartCount = 0;
+		const uint32_t chartGroupsCount = paramAtlas->chartGroupCount();
+		for (uint32_t i = 0; i < chartGroupsCount; i++) {
+			const param::ChartGroup *chartGroup = paramAtlas->chartGroupAt(i);
+			if (chartGroup->isVertexMap())
+				continue;
+			chartCount += chartGroup->chartCount();
 		}
-		XA_DEBUG_ASSERT(boundary.size() > 0);
-		// Compute bounding box of chart.
-		m_boundingBox.compute(boundary.data(), boundary.size(), mesh->texcoords(), mesh->vertexCount());
-		chart->majorAxis = m_boundingBox.majorAxis();
-		chart->minorAxis = m_boundingBox.minorAxis();
-		chart->minCorner = m_boundingBox.minCorner();
-		chart->maxCorner = m_boundingBox.maxCorner();
-		m_charts.push_back(chart);
+		if (chartCount == 0)
+			return;
+		// Run one task per chart.
+		Array<AddChartTaskArgs> taskArgs;
+		taskArgs.resize(chartCount);
+		TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(chartCount);
+		uint32_t chartIndex = 0;
+		for (uint32_t i = 0; i < chartGroupsCount; i++) {
+			const param::ChartGroup *chartGroup = paramAtlas->chartGroupAt(i);
+			if (chartGroup->isVertexMap())
+				continue;
+			const uint32_t count = chartGroup->chartCount();
+			for (uint32_t j = 0; j < count; j++) {
+				AddChartTaskArgs &args = taskArgs[chartIndex];
+				args.paramChart = chartGroup->chartAt(j);
+				Task task;
+				task.userData = &taskArgs[chartIndex];
+				task.func = runAddChartTask;
+				taskScheduler->run(taskGroup, task);
+				chartIndex++;
+			}
+		}
+		taskScheduler->wait(&taskGroup);
+		// Get task output.
+		m_charts.resize(chartCount);
+		for (uint32_t i = 0; i < chartCount; i++)
+			m_charts[i] = taskArgs[i].chart;
 	}
 
 	void addUvMeshCharts(UvMeshInstance *mesh)
@@ -6950,6 +7002,7 @@ struct Atlas
 		BitArray vertexUsed(mesh->texcoords.size());
 		Array<Vector2> boundary;
 		boundary.reserve(16);
+		BoundingBox2D boundingBox;
 		for (uint32_t c = 0; c < mesh->mesh->charts.size(); c++) {
 			UvMeshChart *uvChart = mesh->mesh->charts[c];
 			Chart *chart = XA_NEW(MemTag::Default, Chart);
@@ -6960,6 +7013,8 @@ struct Atlas
 			chart->vertices = mesh->texcoords.data();
 			chart->vertexCount = mesh->texcoords.size();
 			chart->allowRotate = mesh->rotateCharts;
+			chart->faces.resize(uvChart->faces.size());
+			memcpy(chart->faces.data(), uvChart->faces.data(), sizeof(uint32_t) * uvChart->faces.size());
 			// Find unique vertices.
 			vertexUsed.clearAll();
 			for (uint32_t i = 0; i < chart->indexCount; i++) {
@@ -6997,11 +7052,11 @@ struct Atlas
 				boundary.push_back(chart->uniqueVertexAt(v));
 			XA_DEBUG_ASSERT(boundary.size() > 0);
 			// Compute bounding box of chart.
-			m_boundingBox.compute(boundary.data(), boundary.size(), boundary.data(), boundary.size());
-			chart->majorAxis = m_boundingBox.majorAxis();
-			chart->minorAxis = m_boundingBox.minorAxis();
-			chart->minCorner = m_boundingBox.minCorner();
-			chart->maxCorner = m_boundingBox.maxCorner();
+			boundingBox.compute(boundary.data(), boundary.size(), boundary.data(), boundary.size());
+			chart->majorAxis = boundingBox.majorAxis();
+			chart->minorAxis = boundingBox.minorAxis();
+			chart->minCorner = boundingBox.minCorner();
+			chart->maxCorner = boundingBox.maxCorner();
 			m_charts.push_back(chart);
 		}
 	}
@@ -7022,8 +7077,10 @@ struct Atlas
 			}
 			return true;
 		}
-		uint32_t resolution = options.resolution;
+		// Estimate resolution and/or texels per unit if not specified.
 		m_texelsPerUnit = options.texelsPerUnit;
+		uint32_t resolution = options.resolution > 0 ? options.resolution + options.padding * 2 : 0;
+		const uint32_t maxResolution = m_texelsPerUnit > 0.0f ? resolution : 0;
 		if (resolution <= 0 || m_texelsPerUnit <= 0) {
 			if (resolution <= 0 && m_texelsPerUnit <= 0)
 				resolution = 1024;
@@ -7049,15 +7106,11 @@ struct Atlas
 		float minChartPerimeter = FLT_MAX, maxChartPerimeter = 0.0f;
 		for (uint32_t c = 0; c < chartCount; c++) {
 			Chart *chart = m_charts[c];
-			//chartOrderArray[c] = chart.surfaceArea;
 			// Compute chart scale
 			float scale = (chart->surfaceArea / chart->parametricArea) * m_texelsPerUnit;
-			if (chart->parametricArea == 0) { // < kAreaEpsilon)
+			if (chart->parametricArea == 0.0f)
 				scale = 0;
-			}
 			XA_ASSERT(isFinite(scale));
-			// Sort charts by perimeter. @@ This is sometimes producing somewhat unexpected results. Is this right?
-			//chartOrderArray[c] = ((chart->maxCorner.x - chart->minCorner.x) + (chart->maxCorner.y - chart->minCorner.y)) * scale;
 			// Translate, rotate and scale vertices. Compute extents.
 			Vector2 minCorner(FLT_MAX, FLT_MAX);
 			if (!chart->allowRotate) {
@@ -7077,58 +7130,59 @@ struct Atlas
 					texcoord -= minCorner;
 				}
 				texcoord *= scale;
-				XA_DEBUG_ASSERT(texcoord.x >= 0 && texcoord.y >= 0);
+				XA_DEBUG_ASSERT(texcoord.x >= 0.0f && texcoord.y >= 0.0f);
 				XA_DEBUG_ASSERT(isFinite(texcoord.x) && isFinite(texcoord.y));
 				extents = max(extents, texcoord);
 			}
 			XA_DEBUG_ASSERT(extents.x >= 0 && extents.y >= 0);
-			// Limit chart size.
-			const float maxChartSize = (float)options.maxChartSize;
-			if (extents.x > maxChartSize || extents.y > maxChartSize) {
-				const float limit = max(extents.x, extents.y);
-				scale = maxChartSize / (limit + 1.0f);
-				for (uint32_t i = 0; i < chart->uniqueVertexCount(); i++)
-					chart->uniqueVertexAt(i) *= scale;
-				extents *= scale;
-				XA_DEBUG_ASSERT(extents.x <= maxChartSize && extents.y <= maxChartSize);
-			}
-			// Scale the charts to use the entire texel area available. So, if the width is 0.1 we could scale it to 1 without increasing the lightmap usage and making a better
-			// use of it. In many cases this also improves the look of the seams, since vertices on the chart boundaries have more chances of being aligned with the texel centers.
-			float scale_x = 1.0f;
-			float scale_y = 1.0f;
-			float divide_x = 1.0f;
-			float divide_y = 1.0f;
-			if (extents.x > 0) {
-				int cw = ftoi_ceil(extents.x);
-				if (options.blockAlign) {
-					// Align all chart extents to 4x4 blocks, but taking padding into account.
-					cw = align(cw + 2, 4) - 2;
+			// Scale the charts to use the entire texel area available. So, if the width is 0.1 we could scale it to 1 without increasing the lightmap usage and making a better use of it. In many cases this also improves the look of the seams, since vertices on the chart boundaries have more chances of being aligned with the texel centers.
+			if (extents.x > 0.0f && extents.y > 0.0f) {
+				// Block align: align all chart extents to 4x4 blocks, but taking padding and texel center offset into account.
+				const int blockAlignSizeOffset = options.padding * 2 + 1;
+				int width = ftoi_ceil(extents.x);
+				if (options.blockAlign)
+					width = align(width + blockAlignSizeOffset, 4) - blockAlignSizeOffset;
+				int height = ftoi_ceil(extents.y);
+				if (options.blockAlign)
+					height = align(height + blockAlignSizeOffset, 4) - blockAlignSizeOffset;
+				for (uint32_t v = 0; v < chart->uniqueVertexCount(); v++) {
+					Vector2 &texcoord = chart->uniqueVertexAt(v);
+					texcoord.x = texcoord.x / extents.x * (float)width;
+					texcoord.y = texcoord.y / extents.y * (float)height;
 				}
-				scale_x = (float(cw) - kEpsilon);
-				divide_x = extents.x;
-				extents.x = float(cw);
-			}
-			if (extents.y > 0) {
-				int ch = ftoi_ceil(extents.y);
-				if (options.blockAlign) {
-					// Align all chart extents to 4x4 blocks, but taking padding into account.
-					ch = align(ch + 2, 4) - 2;
+				extents.x = (float)width;
+				extents.y = (float)height;
+			}
+			// Limit chart size, either to PackOptions::maxChartSize or maxResolution (if set), whichever is smaller.
+			// If limiting chart size to maxResolution, print a warning, since that may not be desirable to the user.
+			uint32_t maxChartSize = options.maxChartSize;
+			bool warnChartResized = false;
+			if (maxResolution > 0 && (maxChartSize == 0 || maxResolution < maxChartSize)) {
+				maxChartSize = maxResolution - options.padding * 2; // Don't include padding.
+				warnChartResized = true;
+			}
+			if (maxChartSize > 0) {
+				const float realMaxChartSize = (float)maxChartSize - 1.0f; // Aligning to texel centers increases texel footprint by 1.
+				if (extents.x > realMaxChartSize || extents.y > realMaxChartSize) {
+					if (warnChartResized)
+						XA_PRINT("   Resizing chart %u from %gx%g to %ux%u to fit atlas\n", c, extents.x, extents.y, maxChartSize, maxChartSize);
+					scale = realMaxChartSize / max(extents.x, extents.y);
+					for (uint32_t i = 0; i < chart->uniqueVertexCount(); i++) {
+						Vector2 &texcoord = chart->uniqueVertexAt(i);
+						texcoord = min(texcoord * scale, Vector2(realMaxChartSize));
+					}
 				}
-				scale_y = (float(ch) - kEpsilon);
-				divide_y = extents.y;
-				extents.y = float(ch);
 			}
+			// Align to texel centers and add padding offset.
+			extents.x = extents.y = 0.0f;
 			for (uint32_t v = 0; v < chart->uniqueVertexCount(); v++) {
 				Vector2 &texcoord = chart->uniqueVertexAt(v);
-				texcoord.x /= divide_x;
-				texcoord.y /= divide_y;
-				texcoord.x *= scale_x;
-				texcoord.y *= scale_y;
-				XA_ASSERT(isFinite(texcoord.x) && isFinite(texcoord.y));
+				texcoord.x += 0.5f + options.padding;
+				texcoord.y += 0.5f + options.padding;
+				extents = max(extents, texcoord);
 			}
 			chartExtents[c] = extents;
-			// Sort charts by perimeter.
-			chartOrderArray[c] = extents.x + extents.y;
+			chartOrderArray[c] = extents.x + extents.y; // Use perimeter for chart sort key.
 			minChartPerimeter = min(minChartPerimeter, chartOrderArray[c]);
 			maxChartPerimeter = max(maxChartPerimeter, chartOrderArray[c]);
 		}
@@ -7147,9 +7201,14 @@ struct Atlas
 #else
 		const bool createImage = options.createImage;
 #endif
-		BitImage chartBitImage, chartBitImageRotated;
-		int atlasWidth = 0, atlasHeight = 0;
-		const bool resizableAtlas = !(options.resolution > 0 && options.texelsPerUnit > 0.0f);
+		// chartImage: result from conservative rasterization
+		// chartImageBilinear: chartImage plus any texels that would be sampled by bilinear filtering.
+		// chartImagePadding: either chartImage or chartImageBilinear depending on options, with a dilate filter applied options.padding times.
+		// Rotated versions swap x and y.
+		BitImage chartImage, chartImageBilinear, chartImagePadding;
+		BitImage chartImageRotated, chartImageBilinearRotated, chartImagePaddingRotated;
+		Array<Vector2i> atlasSizes;
+		atlasSizes.push_back(Vector2i(0, 0));
 		int progress = 0;
 		for (uint32_t i = 0; i < chartCount; i++) {
 			uint32_t c = ranks[chartCount - i - 1]; // largest chart first
@@ -7167,29 +7226,46 @@ struct Atlas
 			//    V   V   V
 			//    0   1   2
 			XA_PROFILE_START(packChartsRasterize)
-			// Leave room for padding.
-			chartBitImage.resize(ftoi_ceil(chartExtents[c].x) + 1 + options.padding * 2, ftoi_ceil(chartExtents[c].y) + 1 + options.padding * 2, true);
+			// Resize and clear (discard = true) chart images.
+			// Leave room for padding at extents.
+			chartImage.resize(ftoi_ceil(chartExtents[c].x) + options.padding, ftoi_ceil(chartExtents[c].y) + options.padding, true);
 			if (chart->allowRotate)
-				chartBitImageRotated.resize(chartBitImage.height(), chartBitImage.width(), true);
+				chartImageRotated.resize(chartImage.height(), chartImage.width(), true);
+			if (options.bilinear) {
+				chartImageBilinear.resize(chartImage.width(), chartImage.height(), true);
+				if (chart->allowRotate)
+					chartImageBilinearRotated.resize(chartImage.height(), chartImage.width(), true);
+			}
 			// Rasterize chart faces.
 			const uint32_t faceCount = chart->indexCount / 3;
 			for (uint32_t f = 0; f < faceCount; f++) {
-				// Offset vertices by padding.
 				Vector2 vertices[3];
 				for (uint32_t v = 0; v < 3; v++)
-					vertices[v] = chart->vertices[chart->indices[f * 3 + v]] + Vector2(0.5f) + Vector2(float(options.padding));
+					vertices[v] = chart->vertices[chart->indices[f * 3 + v]];
 				DrawTriangleCallbackArgs args;
-				args.chartBitImage = &chartBitImage;
-				args.chartBitImageRotated = chart->allowRotate ? &chartBitImageRotated : nullptr;
-				raster::drawTriangle(Vector2((float)chartBitImage.width(), (float)chartBitImage.height()), vertices, drawTriangleCallback, &args);
-			}
-			// Expand chart by padding pixels. (dilation)
-			BitImage chartBitImageNoPadding(chartBitImage), chartBitImageNoPaddingRotated(chartBitImageRotated);
+				args.chartBitImage = &chartImage;
+				args.chartBitImageRotated = chart->allowRotate ? &chartImageRotated : nullptr;
+				raster::drawTriangle(Vector2((float)chartImage.width(), (float)chartImage.height()), vertices, drawTriangleCallback, &args);
+			}
+			// Expand chart by pixels sampled by bilinear interpolation.
+			if (options.bilinear)
+				bilinearExpand(chart, &chartImage, &chartImageBilinear, chart->allowRotate ? &chartImageBilinearRotated : nullptr);
+			// Expand chart by padding pixels (dilation).
 			if (options.padding > 0) {
+				// Copy into the same BitImage instances for every chart to avoid reallocating BitImage buffers (largest chart is packed first).
 				XA_PROFILE_START(packChartsDilate)
-				chartBitImage.dilate(options.padding);
-				if (chart->allowRotate)
-					chartBitImageRotated.dilate(options.padding);
+				if (options.bilinear)
+					chartImageBilinear.copyTo(chartImagePadding);
+				else
+					chartImage.copyTo(chartImagePadding);
+				chartImagePadding.dilate(options.padding);
+				if (chart->allowRotate) {
+					if (options.bilinear)
+						chartImageBilinearRotated.copyTo(chartImagePaddingRotated);
+					else
+						chartImageRotated.copyTo(chartImagePaddingRotated);
+					chartImagePaddingRotated.dilate(options.padding);
+				}
 				XA_PROFILE_END(packChartsDilate)
 			}
 			XA_PROFILE_END(packChartsRasterize)
@@ -7203,6 +7279,17 @@ struct Atlas
 				}
 			}
 			// Find a location to place the chart in the atlas.
+			BitImage *chartImageToPack, *chartImageToPackRotated;
+			if (options.padding > 0) {
+				chartImageToPack = &chartImagePadding;
+				chartImageToPackRotated = &chartImagePaddingRotated;
+			} else if (options.bilinear) {
+				chartImageToPack = &chartImageBilinear;
+				chartImageToPackRotated = &chartImageBilinearRotated;
+			} else {
+				chartImageToPack = &chartImage;
+				chartImageToPackRotated = &chartImageRotated;
+			}
 			uint32_t currentAtlas = 0;
 			int best_x = 0, best_y = 0;
 			int best_cw = 0, best_ch = 0;
@@ -7210,27 +7297,24 @@ struct Atlas
 			for (;;)
 			{
 				bool firstChartInBitImage = false;
+				XA_UNUSED(firstChartInBitImage);
 				if (currentAtlas + 1 > m_bitImages.size()) {
 					// Chart doesn't fit in the current bitImage, create a new one.
-					BitImage *bi = XA_NEW(MemTag::Default, BitImage);
-					bi->resize(resolution, resolution, true);
+					BitImage *bi = XA_NEW_ARGS(MemTag::Default, BitImage, resolution, resolution);
 					m_bitImages.push_back(bi);
+					atlasSizes.push_back(Vector2i(0, 0));
 					firstChartInBitImage = true;
 					if (createImage)
-						m_atlasImages.push_back(XA_NEW(MemTag::Default, AtlasImage, resolution, resolution));
+						m_atlasImages.push_back(XA_NEW_ARGS(MemTag::Default, AtlasImage, resolution, resolution));
 					// Start positions are per-atlas, so create a new one of those too.
 					chartStartPositions.push_back(Vector2i(0, 0));
 				}
 				XA_PROFILE_START(packChartsFindLocation)
-				const bool foundLocation = findChartLocation(taskScheduler, chartStartPositions[currentAtlas], options.bruteForce, m_bitImages[currentAtlas], &chartBitImage, &chartBitImageRotated, atlasWidth, atlasHeight, &best_x, &best_y, &best_cw, &best_ch, &best_r, options.blockAlign, resizableAtlas, chart->allowRotate);
+				const bool foundLocation = findChartLocation(taskScheduler, chartStartPositions[currentAtlas], options.bruteForce, m_bitImages[currentAtlas], chartImageToPack, chartImageToPackRotated, atlasSizes[currentAtlas].x, atlasSizes[currentAtlas].y, &best_x, &best_y, &best_cw, &best_ch, &best_r, options.blockAlign, maxResolution, chart->allowRotate);
 				XA_PROFILE_END(packChartsFindLocation)
-				if (firstChartInBitImage && !foundLocation) {
-					// Chart doesn't fit in an empty, newly allocated bitImage. texelsPerUnit must be too large for the resolution.
-					XA_ASSERT(true && "chart doesn't fit");
-					break;
-				}
-				if (resizableAtlas) {
-					XA_DEBUG_ASSERT(foundLocation);
+				XA_DEBUG_ASSERT(!(firstChartInBitImage && !foundLocation)); // Chart doesn't fit in an empty, newly allocated bitImage. Shouldn't happen, since charts are resized if they are too big to fit in the atlas.
+				if (maxResolution == 0) {
+					XA_DEBUG_ASSERT(foundLocation); // The atlas isn't limited to a fixed resolution, a chart location should be found on the first attempt.
 					break;
 				}
 				if (foundLocation)
@@ -7241,7 +7325,7 @@ struct Atlas
 			// Update brute force start location.
 			if (options.bruteForce) {
 				// Reset start location if the chart expanded the atlas.
-				if (best_x + best_cw > atlasWidth || best_y + best_ch > atlasHeight) {
+				if (best_x + best_cw > atlasSizes[currentAtlas].x || best_y + best_ch > atlasSizes[currentAtlas].y) {
 					for (uint32_t j = 0; j < chartStartPositions.size(); j++)
 						chartStartPositions[j] = Vector2i(0, 0);
 				}
@@ -7250,28 +7334,37 @@ struct Atlas
 				}
 			}
 			// Update parametric extents.
-			atlasWidth = max(atlasWidth, best_x + best_cw);
-			atlasHeight = max(atlasHeight, best_y + best_ch);
-			if (resizableAtlas) {
-				// Resize bitImage if necessary.
-				if (uint32_t(atlasWidth) > m_bitImages[0]->width() || uint32_t(atlasHeight) > m_bitImages[0]->height()) {
-					m_bitImages[0]->resize(nextPowerOfTwo(uint32_t(atlasWidth)), nextPowerOfTwo(uint32_t(atlasHeight)), false);
+			atlasSizes[currentAtlas].x = max(atlasSizes[currentAtlas].x, best_x + best_cw);
+			atlasSizes[currentAtlas].y = max(atlasSizes[currentAtlas].y, best_y + best_ch);
+			// Resize bitImage if necessary.
+			// If maxResolution > 0, the bitImage is always set to maxResolutionIncludingPadding on creation and doesn't need to be dynamically resized.
+			if (maxResolution == 0) {
+				const uint32_t w = (uint32_t)atlasSizes[currentAtlas].x;
+				const uint32_t h = (uint32_t)atlasSizes[currentAtlas].y;
+				if (w > m_bitImages[0]->width() || h > m_bitImages[0]->height()) {
+					m_bitImages[0]->resize(nextPowerOfTwo(w), nextPowerOfTwo(h), false);
 					if (createImage)
 						m_atlasImages[0]->resize(m_bitImages[0]->width(), m_bitImages[0]->height());
 				}
 			} else {
-				atlasWidth = min((int)options.resolution, atlasWidth);
-				atlasHeight = min((int)options.resolution, atlasHeight);
+				XA_DEBUG_ASSERT(atlasSizes[currentAtlas].x <= (int)maxResolution);
+				XA_DEBUG_ASSERT(atlasSizes[currentAtlas].y <= (int)maxResolution);
 			}
 			XA_PROFILE_START(packChartsBlit)
-			addChart(m_bitImages[currentAtlas], &chartBitImage, &chartBitImageRotated, atlasWidth, atlasHeight, best_x, best_y, best_r);
+			addChart(m_bitImages[currentAtlas], chartImageToPack, chartImageToPackRotated, atlasSizes[currentAtlas].x, atlasSizes[currentAtlas].y, best_x, best_y, best_r);
 			XA_PROFILE_END(packChartsBlit)
 			if (createImage) {
-				m_atlasImages[currentAtlas]->addChart(c, best_r == 0 ? &chartBitImageNoPadding : &chartBitImageNoPaddingRotated, false, atlasWidth, atlasHeight, best_x, best_y);
-				m_atlasImages[currentAtlas]->addChart(c, best_r == 0 ? &chartBitImage : &chartBitImageRotated, true, atlasWidth, atlasHeight, best_x, best_y);
+				if (best_r == 0) {
+					m_atlasImages[currentAtlas]->addChart(c, &chartImage, options.bilinear ? &chartImageBilinear : nullptr, options.padding > 0 ? &chartImagePadding : nullptr, atlasSizes[currentAtlas].x, atlasSizes[currentAtlas].y, best_x, best_y);
+				} else {
+					m_atlasImages[currentAtlas]->addChart(c, &chartImageRotated, options.bilinear ? &chartImageBilinearRotated : nullptr, options.padding > 0 ? &chartImagePaddingRotated : nullptr, atlasSizes[currentAtlas].x, atlasSizes[currentAtlas].y, best_x, best_y);
+				}
 			}
 			chart->atlasIndex = (int32_t)currentAtlas;
-			// Translate and rotate chart texture coordinates.
+			// Modify texture coordinates:
+			//  - rotate if the chart should be rotated
+			//  - translate to chart location
+			//  - translate to remove padding from top and left atlas edges (unless block aligned)
 			for (uint32_t v = 0; v < chart->uniqueVertexCount(); v++) {
 				Vector2 &texcoord = chart->uniqueVertexAt(v);
 				Vector2 t = texcoord;
@@ -7279,8 +7372,12 @@ struct Atlas
 					XA_DEBUG_ASSERT(chart->allowRotate);
 					swap(t.x, t.y);
 				}
-				texcoord.x = best_x + t.x + 0.5f;
-				texcoord.y = best_y + t.y + 0.5f;
+				texcoord.x = best_x + t.x;
+				texcoord.y = best_y + t.y;
+				if (!options.blockAlign) {
+					texcoord.x -= (float)options.padding;
+					texcoord.y -= (float)options.padding;
+				}
 				XA_ASSERT(texcoord.x >= 0 && texcoord.y >= 0);
 				XA_ASSERT(isFinite(texcoord.x) && isFinite(texcoord.y));
 			}
@@ -7293,21 +7390,35 @@ struct Atlas
 				}
 			}
 		}
-		if (resizableAtlas) {
-			m_width = max(0, atlasWidth - (int)options.padding * 2);
-			m_height = max(0, atlasHeight - (int)options.padding * 2);
+		if (options.blockAlign) {
+			if (maxResolution == 0) {
+				m_width = max(0, atlasSizes[0].x);
+				m_height = max(0, atlasSizes[0].y);
+			} else {
+				m_width = m_height = maxResolution;
+			}
 		} else {
-			m_width = m_height = options.resolution;
+			// Remove padding from outer edges.
+			if (maxResolution == 0) {
+				m_width = max(0, atlasSizes[0].x - (int)options.padding * 2);
+				m_height = max(0, atlasSizes[0].y - (int)options.padding * 2);
+			} else {
+				m_width = m_height = maxResolution - (int)options.padding * 2;
+			}
 		}
 		XA_PRINT("   %dx%d resolution\n", m_width, m_height);
 		m_utilization.resize(m_bitImages.size());
 		for (uint32_t i = 0; i < m_utilization.size(); i++) {
-			uint32_t count = 0;
-			for (uint32_t y = 0; y < m_height; y++) {
-				for (uint32_t x = 0; x < m_width; x++)
-					count += m_bitImages[i]->bitAt(x, y);
+			if (m_width == 0 || m_height == 0)
+				m_utilization[i] = 0.0f;
+			else {
+				uint32_t count = 0;
+				for (uint32_t y = 0; y < m_height; y++) {
+					for (uint32_t x = 0; x < m_width; x++)
+						count += m_bitImages[i]->bitAt(x, y);
+				}
+				m_utilization[i] = float(count) / (m_width * m_height);
 			}
-			m_utilization[i] = float(count) / (m_width * m_height);
 			if (m_utilization.size() > 1) {
 				XA_PRINT("   %u: %f%% utilization\n", i, m_utilization[i] * 100.0f);
 			}
@@ -7334,27 +7445,33 @@ private:
 	// is occupied at this point. At the end we have many small charts and a large atlas with sparse holes. Finding those holes randomly is slow. A better approach would be to
 	// start stacking large charts as if they were tetris pieces. Once charts get small try to place them randomly. It may be interesting to try a intermediate strategy, first try
 	// along one axis and then try exhaustively along that axis.
-	bool findChartLocation(TaskScheduler *taskScheduler, const Vector2i &startPosition, bool bruteForce, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, bool blockAligned, bool resizableAtlas, bool allowRotate)
+	bool findChartLocation(TaskScheduler *taskScheduler, const Vector2i &startPosition, bool bruteForce, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, bool blockAligned, uint32_t maxResolution, bool allowRotate)
 	{
 		const int attempts = 4096;
 		if (bruteForce || attempts >= w * h)
-			return findChartLocation_bruteForce(taskScheduler, startPosition, atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, blockAligned, resizableAtlas, allowRotate);
-		return findChartLocation_random(atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, attempts, blockAligned, resizableAtlas, allowRotate);
+			return findChartLocation_bruteForce(taskScheduler, startPosition, atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, blockAligned, maxResolution, allowRotate);
+		return findChartLocation_random(atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, attempts, blockAligned, maxResolution, allowRotate);
 	}
 
-	bool findChartLocation_bruteForce(TaskScheduler *taskScheduler, const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, bool blockAligned, bool resizableAtlas, bool allowRotate)
+	bool findChartLocation_bruteForce(TaskScheduler *taskScheduler, const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, bool blockAligned, uint32_t maxResolution, bool allowRotate)
 	{
 		const int stepSize = blockAligned ? 4 : 1;
+		const int chartMinHeight = min(chartBitImage->height(), chartBitImageRotated->height());
 		uint32_t taskCount = 0;
-		for (int y = startPosition.y; y <= h + stepSize; y += stepSize)
+		for (int y = startPosition.y; y <= h + stepSize; y += stepSize) {
+			if (maxResolution > 0 && y > (int)maxResolution - chartMinHeight)
+				break;
 			taskCount++;
-		Array<FindChartLocationBruteForceTaskArgs> taskArgs;
-		taskArgs.resize(taskCount);
+		}
+		m_bruteForceTaskArgs.clear();
+		m_bruteForceTaskArgs.resize(taskCount);
 		TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(taskCount);
 		std::atomic<bool> finished(false); // One of the tasks found a location that doesn't expand the atlas.
 		uint32_t i = 0;
 		for (int y = startPosition.y; y <= h + stepSize; y += stepSize) {
-			FindChartLocationBruteForceTaskArgs &args = taskArgs[i];
+			if (maxResolution > 0 && y > (int)maxResolution - chartMinHeight)
+				break;
+			FindChartLocationBruteForceTaskArgs &args = m_bruteForceTaskArgs[i];
 			args.finished = &finished;
 			args.startPosition = Vector2i(y == startPosition.y ? startPosition.x : 0, y);
 			args.atlasBitImage = atlasBitImage;
@@ -7363,10 +7480,10 @@ private:
 			args.w = w;
 			args.h = h;
 			args.blockAligned = blockAligned;
-			args.resizableAtlas = resizableAtlas;
 			args.allowRotate = allowRotate;
+			args.maxResolution = maxResolution;
 			Task task;
-			task.userData = &taskArgs[i];
+			task.userData = &m_bruteForceTaskArgs[i];
 			task.func = runFindChartLocationBruteForceTask;
 			taskScheduler->run(taskGroup, task);
 			i++;
@@ -7376,7 +7493,7 @@ private:
 		int best_metric = INT_MAX;
 		bool best_insideAtlas = false;
 		for (i = 0; i < taskCount; i++) {
-			FindChartLocationBruteForceTaskArgs &args = taskArgs[i];
+			FindChartLocationBruteForceTaskArgs &args = m_bruteForceTaskArgs[i];
 			if (args.best_metric > best_metric)
 				continue;
 			// A location that doesn't expand the atlas is always preferred.
@@ -7396,7 +7513,7 @@ private:
 		return best_metric != INT_MAX;
 	}
 
-	bool findChartLocation_random(const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, int minTrialCount, bool blockAligned, bool resizableAtlas, bool allowRotate)
+	bool findChartLocation_random(const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, int minTrialCount, bool blockAligned, uint32_t maxResolution, bool allowRotate)
 	{
 		bool result = false;
 		const int BLOCK_SIZE = 4;
@@ -7410,16 +7527,17 @@ private:
 			// + 1 to extend atlas in case atlas full. We may want to use a higher number to increase probability of extending atlas.
 			int xRange = w + 1;
 			int yRange = h + 1;
-			if (!resizableAtlas) {
-				xRange = min(xRange, (int)atlasBitImage->width() - cw);
-				yRange = min(yRange, (int)atlasBitImage->height() - ch);
+			// Clamp to max resolution.
+			if (maxResolution > 0) {
+				xRange = min(xRange, (int)maxResolution - cw);
+				yRange = min(yRange, (int)maxResolution - ch);
 			}
 			int x = m_rand.getRange(xRange);
 			int y = m_rand.getRange(yRange);
 			if (blockAligned) {
 				x = align(x, BLOCK_SIZE);
 				y = align(y, BLOCK_SIZE);
-				if (!resizableAtlas && (x > (int)atlasBitImage->width() - cw || y > (int)atlasBitImage->height() - ch))
+				if (maxResolution > 0 && (x > (int)maxResolution - cw || y > (int)maxResolution - ch))
 					continue; // Block alignment pushed the chart outside the atlas.
 			}
 			// Early out.
@@ -7475,10 +7593,68 @@ private:
 		}
 	}
 
+	void bilinearExpand(const Chart *chart, BitImage *source, BitImage *dest, BitImage *destRotated) const
+	{
+		const int xOffsets[] = { -1, 0, 1, -1, 1, -1, 0, 1 };
+		const int yOffsets[] = { -1, -1, -1, 0, 0, 1, 1, 1 };
+		for (uint32_t y = 0; y < source->height(); y++) {
+			for (uint32_t x = 0; x < source->width(); x++) {
+				// Copy pixels from source.
+				if (source->bitAt(x, y))
+					goto setPixel;
+				// Empty pixel. If none of of the surrounding pixels are set, this pixel can't be sampled by bilinear interpolation.
+				{
+					uint32_t s = 0;
+					for (; s < 8; s++) {
+						const int sx = (int)x + xOffsets[s];
+						const int sy = (int)y + yOffsets[s];
+						if (sx < 0 || sy < 0 || sx >= (int)source->width() || sy >= (int)source->height())
+							continue;
+						if (source->bitAt((uint32_t)sx, (uint32_t)sy))
+							break;
+					}
+					if (s == 8)
+						continue;
+				}
+				// If a 2x2 square centered on the pixels centroid intersects the triangle, this pixel will be sampled by bilinear interpolation.
+				// See "Precomputed Global Illumination in Frostbite (GDC 2018)" page 95
+				for (uint32_t f = 0; f < chart->indexCount / 3; f++) {
+					const Vector2 centroid((float)x + 0.5f, (float)y + 0.5f);
+					Vector2 vertices[3];
+					for (uint32_t i = 0; i < 3; i++)
+						vertices[i] = chart->vertices[chart->indices[f * 3 + i]];
+					// Test for triangle vertex in square bounds.
+					for (uint32_t i = 0; i < 3; i++) {
+						const Vector2 &v = vertices[i];
+						if (v.x > centroid.x - 1.0f && v.x < centroid.x + 1.0f && v.y > centroid.y - 1.0f && v.y < centroid.y + 1.0f)
+							goto setPixel;
+					}
+					// Test for triangle edge intersection with square edge.
+					const Vector2 squareVertices[4] = {
+						Vector2(centroid.x - 1.0f, centroid.y - 1.0f),
+						Vector2(centroid.x + 1.0f, centroid.y - 1.0f),
+						Vector2(centroid.x + 1.0f, centroid.y + 1.0f),
+						Vector2(centroid.x - 1.0f, centroid.y + 1.0f)
+					};
+					for (uint32_t i = 0; i < 3; i++) {
+						for (uint32_t j = 0; j < 4; j++) {
+							if (linesIntersect(vertices[i], vertices[(i + 1) % 3], squareVertices[j], squareVertices[(j + 1) % 4], 0.0f))
+								goto setPixel;
+						}
+					}
+				}
+				continue;
+			setPixel:
+				dest->setBitAt(x, y);
+				if (destRotated)
+					destRotated->setBitAt(y, x);
+			}
+		}
+	}
+
 	struct DrawTriangleCallbackArgs
 	{
-		BitImage *chartBitImage;
-		BitImage *chartBitImageRotated;
+		BitImage *chartBitImage, *chartBitImageRotated;
 	};
 
 	static bool drawTriangleCallback(void *param, int x, int y)
@@ -7493,8 +7669,8 @@ private:
 	Array<AtlasImage *> m_atlasImages;
 	Array<float> m_utilization;
 	Array<BitImage *> m_bitImages;
-	BoundingBox2D m_boundingBox;
 	Array<Chart *> m_charts;
+	Array<FindChartLocationBruteForceTaskArgs> m_bruteForceTaskArgs;
 	RadixSort m_radix;
 	uint32_t m_width = 0;
 	uint32_t m_height = 0;
@@ -7534,8 +7710,8 @@ static void DestroyOutputMeshes(Context *ctx)
 	for (int i = 0; i < (int)ctx->atlas.meshCount; i++) {
 		Mesh &mesh = ctx->atlas.meshes[i];
 		for (uint32_t j = 0; j < mesh.chartCount; j++) {
-			if (mesh.chartArray[j].indexArray)
-				XA_FREE(mesh.chartArray[j].indexArray);
+			if (mesh.chartArray[j].faceArray)
+				XA_FREE(mesh.chartArray[j].faceArray);
 		}
 		if (mesh.chartArray)
 			XA_FREE(mesh.chartArray);
@@ -7715,18 +7891,19 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh
 #endif
 	// Don't know how many times AddMesh will be called, so progress needs to adjusted each time.
 	if (!ctx->addMeshProgress) {
-		ctx->addMeshProgress = XA_NEW(internal::MemTag::Default, internal::Progress, ProgressCategory::AddMesh, ctx->progressFunc, ctx->progressUserData, 1);
+		ctx->addMeshProgress = XA_NEW_ARGS(internal::MemTag::Default, internal::Progress, ProgressCategory::AddMesh, ctx->progressFunc, ctx->progressUserData, 1);
 	}
 	else {
 		ctx->addMeshProgress->setMaxValue(internal::max(ctx->meshCount + 1, meshCountHint));
 	}
-	bool decoded = (meshDecl.indexCount <= 0);
-	uint32_t indexCount = decoded ? meshDecl.vertexCount : meshDecl.indexCount;
+	XA_PROFILE_START(addMeshCopyData)
+	const bool hasIndices = meshDecl.indexCount > 0;
+	const uint32_t indexCount = hasIndices ? meshDecl.indexCount : meshDecl.vertexCount;
 	XA_PRINT("Adding mesh %d: %u vertices, %u triangles\n", ctx->meshCount, meshDecl.vertexCount, indexCount / 3);
 	// Expecting triangle faces.
 	if ((indexCount % 3) != 0)
 		return AddMeshError::InvalidIndexCount;
-	if (!decoded) {
+	if (hasIndices) {
 		// Check if any index is out of range.
 		for (uint32_t i = 0; i < indexCount; i++) {
 			const uint32_t index = DecodeIndex(meshDecl.indexFormat, meshDecl.indexData, meshDecl.indexOffset, i);
@@ -7737,7 +7914,7 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh
 	uint32_t meshFlags = internal::MeshFlags::HasFaceGroups | internal::MeshFlags::HasIgnoredFaces;
 	if (meshDecl.vertexNormalData)
 		meshFlags |= internal::MeshFlags::HasNormals;
-	internal::Mesh *mesh = XA_NEW(internal::MemTag::Mesh, internal::Mesh, meshDecl.epsilon, meshDecl.vertexCount, indexCount / 3, meshFlags, ctx->meshCount);
+	internal::Mesh *mesh = XA_NEW_ARGS(internal::MemTag::Mesh, internal::Mesh, meshDecl.epsilon, meshDecl.vertexCount, indexCount / 3, meshFlags, ctx->meshCount);
 	for (uint32_t i = 0; i < meshDecl.vertexCount; i++) {
 		internal::Vector3 normal(0.0f);
 		internal::Vector2 texcoord(0.0f);
@@ -7750,7 +7927,7 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh
 	for (uint32_t i = 0; i < indexCount / 3; i++) {
 		uint32_t tri[3];
 		for (int j = 0; j < 3; j++)
-			tri[j] = decoded ? i * 3 + j : DecodeIndex(meshDecl.indexFormat, meshDecl.indexData, meshDecl.indexOffset, i * 3 + j);
+			tri[j] = hasIndices ? DecodeIndex(meshDecl.indexFormat, meshDecl.indexData, meshDecl.indexOffset, i * 3 + j) : i * 3 + j;
 		bool ignore = false;
 		// Check for degenerate or zero length edges.
 		for (int j = 0; j < 3; j++) {
@@ -7769,10 +7946,37 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh
 				break;
 			}
 		}
+		// Ignore faces with any nan vertex attributes.
+		if (!ignore) {
+			for (int j = 0; j < 3; j++) {
+				const internal::Vector3 &pos = mesh->position(tri[j]);
+				if (internal::isNan(pos.x) || internal::isNan(pos.y) || internal::isNan(pos.z)) {
+					XA_PRINT("   NAN position in face: %d\n", i);
+					ignore = true;
+					break;
+				}
+				if (meshDecl.vertexNormalData) {
+					const internal::Vector3 &normal = mesh->normal(tri[j]);
+					if (internal::isNan(normal.x) || internal::isNan(normal.y) || internal::isNan(normal.z)) {
+						XA_PRINT("   NAN normal in face: %d\n", i);
+						ignore = true;
+						break;
+					}
+				}
+				if (meshDecl.vertexUvData) {
+					const internal::Vector2 &uv = mesh->texcoord(tri[j]);
+					if (internal::isNan(uv.x) || internal::isNan(uv.y)) {
+						XA_PRINT("   NAN texture coordinate in face: %d\n", i);
+						ignore = true;
+						break;
+					}
+				}
+			}
+		}
 		const internal::Vector3 &a = mesh->position(tri[0]);
 		const internal::Vector3 &b = mesh->position(tri[1]);
 		const internal::Vector3 &c = mesh->position(tri[2]);
-		// Check for zero area faces. Don't bother if a degenerate or zero length edge was already detected.
+		// Check for zero area faces.
 		float area = 0.0f;
 		if (!ignore) {
 			area = internal::length(internal::cross(b - a, c - a)) * 0.5f;
@@ -7791,6 +7995,7 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh
 			ignore = true;
 		mesh->addFace(tri[0], tri[1], tri[2], ignore);
 	}
+	XA_PROFILE_END(addMeshCopyData)
 	if (ctx->addMeshTaskGroup.value == UINT32_MAX)
 		ctx->addMeshTaskGroup = ctx->taskScheduler->createTaskGroup();
 	AddMeshTaskArgs *taskArgs = XA_NEW(internal::MemTag::Default, AddMeshTaskArgs); // The task frees this.
@@ -7818,11 +8023,13 @@ void AddMeshJoin(Atlas *atlas)
 	ctx->addMeshProgress->~Progress();
 	XA_FREE(ctx->addMeshProgress);
 	ctx->addMeshProgress = nullptr;
+	ctx->paramAtlas.sortChartGroups();
 #if XA_PROFILE
 	XA_PRINT("Added %u meshes\n", ctx->meshCount);
 	internal::s_profile.addMeshReal = clock() - internal::s_profile.addMeshReal;
 #endif
 	XA_PROFILE_PRINT_AND_RESET("   Total (real): ", addMeshReal)
+	XA_PROFILE_PRINT_AND_RESET("      Copy data: ", addMeshCopyData)
 	XA_PROFILE_PRINT_AND_RESET("   Total (thread): ", addMeshThread)
 	XA_PROFILE_PRINT_AND_RESET("      Create colocals: ", addMeshCreateColocals)
 	XA_PROFILE_PRINT_AND_RESET("      Create face groups: ", addMeshCreateFaceGroups)
@@ -7880,8 +8087,13 @@ AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl)
 	}
 	internal::UvMeshInstance *meshInstance = XA_NEW(internal::MemTag::Default, internal::UvMeshInstance);
 	meshInstance->texcoords.resize(decl.vertexCount);
-	for (uint32_t i = 0; i < decl.vertexCount; i++)
-		meshInstance->texcoords[i] = *((const internal::Vector2 *)&((const uint8_t *)decl.vertexUvData)[decl.vertexStride * i]);
+	for (uint32_t i = 0; i < decl.vertexCount; i++) {
+		internal::Vector2 texcoord = *((const internal::Vector2 *)&((const uint8_t *)decl.vertexUvData)[decl.vertexStride * i]);
+		// Set nan values to 0.
+		if (internal::isNan(texcoord.x) || internal::isNan(texcoord.y))
+			texcoord.x = texcoord.y = 0.0f;
+		meshInstance->texcoords[i] = texcoord;
+	}
 	meshInstance->rotateCharts = decl.rotateCharts;
 	// See if this is an instance of an already existing mesh.
 	internal::UvMesh *mesh = nullptr;
@@ -7902,13 +8114,12 @@ AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl)
 		for (uint32_t i = 0; i < mesh->vertexToChartMap.size(); i++)
 			mesh->vertexToChartMap[i] = UINT32_MAX;
 		// Calculate charts (incident faces).
-		internal::HashMap<internal::Vector2, uint32_t> vertexToFaceMap(internal::MemTag::Default, indexCount);
+		internal::HashMap<internal::Vector2> vertexToFaceMap(internal::MemTag::Default, indexCount); // Face is index / 3
 		const uint32_t faceCount = indexCount / 3;
 		for (uint32_t i = 0; i < indexCount; i++)
-			vertexToFaceMap.add(meshInstance->texcoords[mesh->indices[i]], i / 3);
+			vertexToFaceMap.add(meshInstance->texcoords[mesh->indices[i]]);
 		internal::BitArray faceAssigned(faceCount);
 		faceAssigned.clearAll();
-		internal::Array<uint32_t> chartFaces;
 		for (uint32_t f = 0; f < faceCount; f++) {
 			if (faceAssigned.bitAt(f))
 				continue;
@@ -7917,34 +8128,33 @@ AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl)
 			chart->material = decl.faceMaterialData ? decl.faceMaterialData[f] : 0;
 			// Walk incident faces and assign them to the chart.
 			faceAssigned.setBitAt(f);
-			chartFaces.clear();
-			chartFaces.push_back(f);
+			chart->faces.push_back(f);
 			for (;;) {
 				bool newFaceAssigned = false;
-				const uint32_t faceCount2 = chartFaces.size();
+				const uint32_t faceCount2 = chart->faces.size();
 				for (uint32_t f2 = 0; f2 < faceCount2; f2++) {
-					const uint32_t face = chartFaces[f2];
+					const uint32_t face = chart->faces[f2];
 					for (uint32_t i = 0; i < 3; i++) {
 						const internal::Vector2 &texcoord = meshInstance->texcoords[meshInstance->mesh->indices[face * 3 + i]];
-						uint32_t mapFaceIndex = vertexToFaceMap.get(texcoord);
-						while (mapFaceIndex != UINT32_MAX) {
-							const uint32_t face2 = vertexToFaceMap.value(mapFaceIndex);
+						uint32_t mapIndex = vertexToFaceMap.get(texcoord);
+						while (mapIndex != UINT32_MAX) {
+							const uint32_t face2 = mapIndex / 3; // 3 vertices added per face.
 							// Materials must match.
 							if (!faceAssigned.bitAt(face2) && (!decl.faceMaterialData || decl.faceMaterialData[face] == decl.faceMaterialData[face2])) {
 								faceAssigned.setBitAt(face2);
-								chartFaces.push_back(face2);
+								chart->faces.push_back(face2);
 								newFaceAssigned = true;
 							}
-							mapFaceIndex = vertexToFaceMap.getNext(mapFaceIndex);
+							mapIndex = vertexToFaceMap.getNext(mapIndex);
 						}
 					}
 				}
 				if (!newFaceAssigned)
 					break;
 			}
-			for (uint32_t i = 0; i < chartFaces.size(); i++) {
+			for (uint32_t i = 0; i < chart->faces.size(); i++) {
 				for (uint32_t j = 0; j < 3; j++) {
-					const uint32_t vertex = meshInstance->mesh->indices[chartFaces[i] * 3 + j];
+					const uint32_t vertex = meshInstance->mesh->indices[chart->faces[i] * 3 + j];
 					chart->indices.push_back(vertex);
 					mesh->vertexToChartMap[vertex] = mesh->charts.size();
 				}
@@ -8019,11 +8229,14 @@ void ComputeCharts(Atlas *atlas, ChartOptions chartOptions)
 	XA_PRINT("   %u charts\n", chartCount);
 	XA_PROFILE_PRINT_AND_RESET("   Total (real): ", computeChartsReal)
 	XA_PROFILE_PRINT_AND_RESET("   Total (thread): ", computeChartsThread)
-	XA_PROFILE_PRINT_AND_RESET("      Atlas builder: ", atlasBuilder)
-	XA_PROFILE_PRINT_AND_RESET("         Init: ", atlasBuilderInit)
-	XA_PROFILE_PRINT_AND_RESET("         Create initial charts: ", atlasBuilderCreateInitialCharts)
-	XA_PROFILE_PRINT_AND_RESET("         Grow charts: ", atlasBuilderGrowCharts)
-	XA_PROFILE_PRINT_AND_RESET("         Merge charts: ", atlasBuilderMergeCharts)
+	XA_PROFILE_PRINT_AND_RESET("      Build atlas: ", buildAtlas)
+	XA_PROFILE_PRINT_AND_RESET("         Init: ", buildAtlasInit)
+	XA_PROFILE_PRINT_AND_RESET("         Place seeds: ", buildAtlasPlaceSeeds)
+	XA_PROFILE_PRINT_AND_RESET("         Relocate seeds: ", buildAtlasRelocateSeeds)
+	XA_PROFILE_PRINT_AND_RESET("         Reset charts: ", buildAtlasResetCharts)
+	XA_PROFILE_PRINT_AND_RESET("         Grow charts: ", buildAtlasGrowCharts)
+	XA_PROFILE_PRINT_AND_RESET("         Merge charts: ", buildAtlasMergeCharts)
+	XA_PROFILE_PRINT_AND_RESET("         Fill holes: ", buildAtlasFillHoles)
 	XA_PROFILE_PRINT_AND_RESET("      Create chart meshes (real): ", createChartMeshesReal)
 	XA_PROFILE_PRINT_AND_RESET("      Create chart meshes (thread): ", createChartMeshesThread)
 	XA_PROFILE_PRINT_AND_RESET("         Fix t-junctions: ", fixChartMeshTJunctions)
@@ -8087,7 +8300,7 @@ void ParameterizeCharts(Atlas *atlas, ParameterizeFunc func)
 	XA_PRINT("   %u planar charts, %u ortho charts, %u other\n", planarChartsCount, orthoChartsCount, chartCount - (planarChartsCount + orthoChartsCount));
 	if (chartsDeletedCount > 0) {
 		XA_PRINT("   %u charts deleted due to invalid parameterizations, %u new charts added\n", chartsDeletedCount, chartsAddedCount);
-		XA_PRINT("   %u charts\n", ctx->paramAtlas.chartCount());
+		XA_PRINT("   %u charts\n", chartCount);
 	}
 	uint32_t chartIndex = 0, invalidParamCount = 0;
 	for (uint32_t i = 0; i < ctx->meshCount; i++) {
@@ -8192,16 +8405,15 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
 	}
 	atlas->meshCount = 0;
 	// Pack charts.
+	XA_PROFILE_START(packChartsAddCharts)
 	internal::pack::Atlas packAtlas;
 	if (!ctx->uvMeshInstances.isEmpty()) {
 		for (uint32_t i = 0; i < ctx->uvMeshInstances.size(); i++)
 			packAtlas.addUvMeshCharts(ctx->uvMeshInstances[i]);
 	}
-	else if (ctx->paramAtlas.chartCount() > 0) {
-		ctx->paramAtlas.restoreOriginalChartTexcoords();
-		for (uint32_t i = 0; i < ctx->paramAtlas.chartCount(); i++)
-			packAtlas.addChart(ctx->paramAtlas.chartAt(i));
-	}
+	else
+		packAtlas.addCharts(ctx->taskScheduler, &ctx->paramAtlas);
+	XA_PROFILE_END(packChartsAddCharts)
 	XA_PROFILE_START(packCharts)
 	if (!packAtlas.packCharts(ctx->taskScheduler, packOptions, ctx->progressFunc, ctx->progressUserData))
 		return;
@@ -8220,9 +8432,12 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
 	if (packOptions.createImage) {
 		atlas->image = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, atlas->atlasCount * atlas->width * atlas->height);
 		for (uint32_t i = 0; i < atlas->atlasCount; i++)
-			packAtlas.getImages()[i]->copyTo(&atlas->image[atlas->width * atlas->height * i], atlas->width, atlas->height);
+			packAtlas.getImages()[i]->copyTo(&atlas->image[atlas->width * atlas->height * i], atlas->width, atlas->height, packOptions.blockAlign ? 0 : packOptions.padding);
 	}
 	XA_PROFILE_PRINT_AND_RESET("   Total: ", packCharts)
+	XA_PROFILE_PRINT_AND_RESET("      Add charts (real): ", packChartsAddCharts)
+	XA_PROFILE_PRINT_AND_RESET("      Add charts (thread): ", packChartsAddChartsThread)
+	XA_PROFILE_PRINT_AND_RESET("         Restore texcoords: ", packChartsAddChartsRestoreTexcoords)
 	XA_PROFILE_PRINT_AND_RESET("      Rasterize: ", packChartsRasterize)
 	XA_PROFILE_PRINT_AND_RESET("      Dilate (padding): ", packChartsDilate)
 	XA_PROFILE_PRINT_AND_RESET("      Find location (real): ", packChartsFindLocation)
@@ -8230,6 +8445,7 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
 	XA_PROFILE_PRINT_AND_RESET("      Blit: ", packChartsBlit)
 	XA_PRINT_MEM_USAGE
 	XA_PRINT("Building output meshes\n");
+	XA_PROFILE_START(buildOutputMeshes)
 	int progress = 0;
 	if (ctx->progressFunc) {
 		if (!ctx->progressFunc(ProgressCategory::BuildOutputMeshes, 0, ctx->progressUserData))
@@ -8265,8 +8481,7 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
 			outputMesh.chartArray = XA_ALLOC_ARRAY(internal::MemTag::Default, Chart, outputMesh.chartCount);
 			XA_PRINT("   mesh %u: %u vertices, %u triangles, %u charts\n", i, outputMesh.vertexCount, outputMesh.indexCount / 3, outputMesh.chartCount);
 			// Copy mesh data.
-			uint32_t firstVertex = 0;
-			uint32_t meshChartIndex = 0;
+			uint32_t firstVertex = 0, meshChartIndex = 0;
 			for (uint32_t cg = 0; cg < ctx->paramAtlas.chartGroupCount(i); cg++) {
 				const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, cg);
 				if (chartGroup->isVertexMap()) {
@@ -8315,16 +8530,14 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
 						outputChart->flags = 0;
 						if (chart->paramQuality().boundaryIntersection || chart->paramQuality().flippedTriangleCount > 0)
 							outputChart->flags |= ChartFlags::Invalid;
-						outputChart->indexCount = mesh->faceCount() * 3;
-						outputChart->indexArray = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, outputChart->indexCount);
-						for (uint32_t f = 0; f < mesh->faceCount(); f++) {
-							for (uint32_t j = 0; j < 3; j++)
-								outputChart->indexArray[3 * f + j] = firstVertex + mesh->vertexAt(f * 3 + j);
-						}
+						outputChart->faceCount = mesh->faceCount();
+						outputChart->faceArray = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, outputChart->faceCount);
+						for (uint32_t f = 0; f < outputChart->faceCount; f++)
+							outputChart->faceArray[f] = chartGroup->mapFaceToSourceFace(chart->mapFaceToSourceFace(f));
 						outputChart->material = 0;
 						meshChartIndex++;
 						chartIndex++;
-						firstVertex += chart->mesh()->vertexCount();
+						firstVertex += mesh->vertexCount();
 					}
 				}
 			}
@@ -8378,10 +8591,11 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
 				const internal::pack::Chart *chart = packAtlas.getChart(chartIndex);
 				XA_DEBUG_ASSERT(chart->atlasIndex >= 0);
 				outputChart->atlasIndex = (uint32_t)chart->atlasIndex;
-				outputChart->indexCount = chart->indexCount;
-				outputChart->indexArray = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, outputChart->indexCount);
+				outputChart->faceCount = chart->faces.size();
+				outputChart->faceArray = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, outputChart->faceCount);
 				outputChart->material = chart->material;
-				memcpy(outputChart->indexArray, chart->indices, chart->indexCount * sizeof(uint32_t));
+				for (uint32_t f = 0; f < outputChart->faceCount; f++)
+					outputChart->faceArray[f] = chart->faces[f];
 				chartIndex++;
 			}
 			if (ctx->progressFunc) {
@@ -8396,6 +8610,8 @@ void PackCharts(Atlas *atlas, PackOptions packOptions)
 	}
 	if (ctx->progressFunc && progress != 100)
 		ctx->progressFunc(ProgressCategory::BuildOutputMeshes, 100, ctx->progressUserData);
+	XA_PROFILE_END(buildOutputMeshes)
+	XA_PROFILE_PRINT_AND_RESET("   Total: ", buildOutputMeshes)
 	XA_PRINT_MEM_USAGE
 }
 
@@ -8430,9 +8646,10 @@ void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc, void *progress
 	ctx->progressUserData = progressUserData;
 }
 
-void SetRealloc(ReallocFunc reallocFunc)
+void SetAlloc(ReallocFunc reallocFunc, FreeFunc freeFunc)
 {
 	internal::s_realloc = reallocFunc;
+	internal::s_free = freeFunc;
 }
 
 void SetPrint(PrintFunc print, bool verbose)
diff --git a/thirdparty/xatlas/xatlas.h b/thirdparty/xatlas/xatlas.h
index c123e800b4..7be165e7e5 100644
--- a/thirdparty/xatlas/xatlas.h
+++ b/thirdparty/xatlas/xatlas.h
@@ -48,8 +48,8 @@ struct Chart
 {
 	uint32_t atlasIndex; // Sub-atlas index.
 	uint32_t flags;
-	uint32_t *indexArray;
-	uint32_t indexCount;
+	uint32_t *faceArray;
+	uint32_t faceCount;
 	uint32_t material;
 };
 
@@ -73,9 +73,10 @@ struct Mesh
 	uint32_t vertexCount;
 };
 
-static const uint32_t kImageChartIndexMask = 0x3FFFFFFF;
-static const uint32_t kImageHasChartIndexBit = 0x40000000;
-static const uint32_t kImageIsPaddingBit = 0x80000000;
+static const uint32_t kImageChartIndexMask = 0x1FFFFFFF;
+static const uint32_t kImageHasChartIndexBit = 0x80000000;
+static const uint32_t kImageIsBilinearBit = 0x40000000;
+static const uint32_t kImageIsPaddingBit = 0x20000000;
 
 // Empty on creation. Populated after charts are packed.
 struct Atlas
@@ -173,7 +174,6 @@ struct ChartOptions
 	float textureSeamMetricWeight = 0.5f;
 
 	float maxThreshold = 2.0f; // If total of all metrics * weights > maxThreshold, don't grow chart. Lower values result in more charts.
-	uint32_t growFaceCount = 32; // Grow this many faces at a time.
 	uint32_t maxIterations = 1; // Number of iterations of the chart growing and seeding phases. Higher values result in better charts.
 };
 
@@ -188,12 +188,24 @@ void ParameterizeCharts(Atlas *atlas, ParameterizeFunc func = nullptr);
 
 struct PackOptions
 {
+	// Leave space around charts for texels that would be sampled by bilinear filtering.
+	bool bilinear = true;
+
+	// Align charts to 4x4 blocks. Also improves packing speed, since there are fewer possible chart locations to consider.
+	bool blockAlign = false;
+
 	// Slower, but gives the best result. If false, use random chart placement.
 	bool bruteForce = false;
 
 	// Create Atlas::image
 	bool createImage = false;
 
+	// Charts larger than this will be scaled down. 0 means no limit.
+	uint32_t maxChartSize = 0;
+
+	// Number of pixels to pad charts with.
+	uint32_t padding = 0;
+
 	// Unit to texel scale. e.g. a 1x1 quad with texelsPerUnit of 32 will take up approximately 32x32 texels in the atlas.
 	// If 0, an estimated value will be calculated to approximately match the given resolution.
 	// If resolution is also 0, the estimated value will approximately match a 1024x1024 atlas.
@@ -203,15 +215,6 @@ struct PackOptions
 	// If not 0, and texelsPerUnit is not 0, generate one or more atlases with that exact resolution.
 	// If not 0, and texelsPerUnit is 0, texelsPerUnit is estimated to approximately match the resolution.
 	uint32_t resolution = 0;
-
-	// Charts larger than this will be scaled down.
-	uint32_t maxChartSize = 1024;
-
-	// Align charts to 4x4 blocks. Also improves packing speed, since there are fewer possible chart locations to consider.
-	bool blockAlign = false;
-
-	// Number of pixels to pad charts with.
-	uint32_t padding = 0;
 };
 
 // Call after ParameterizeCharts. Can be called multiple times to re-pack charts with different options.
@@ -240,7 +243,8 @@ void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc = nullptr, void
 
 // Custom memory allocation.
 typedef void *(*ReallocFunc)(void *, size_t);
-void SetRealloc(ReallocFunc reallocFunc);
+typedef void (*FreeFunc)(void *);
+void SetAlloc(ReallocFunc reallocFunc, FreeFunc freeFunc = nullptr);
 
 // Custom print function.
 typedef int (*PrintFunc)(const char *, ...);