6 files changed, 405 insertions, 45 deletions
diff --git a/thirdparty/meshoptimizer/clusterizer.cpp b/thirdparty/meshoptimizer/clusterizer.cpp
index f8aad7b49c..b1f7b359c1 100644
--- a/thirdparty/meshoptimizer/clusterizer.cpp
+++ b/thirdparty/meshoptimizer/clusterizer.cpp
@@ -368,8 +368,7 @@ static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const
 	}
 
 	// split axis is one where the variance is largest
-	unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1
-	                                                                                      : 2;
+	unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 : 2;
 
 	float split = mean[axis];
 	size_t middle = kdtreePartition(indices, count, points, stride, axis, split);
diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h
index e44b99ce52..a420eb1098 100644
--- a/thirdparty/meshoptimizer/meshoptimizer.h
+++ b/thirdparty/meshoptimizer/meshoptimizer.h
@@ -278,9 +278,30 @@ MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t verte
  * meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M.
  * Each 32-bit component is decoded in isolation; stride must be divisible by 4.
  */
-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size);
-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size);
-MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size);
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride);
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride);
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride);
+
+/**
+ * Vertex buffer filter encoders
+ * These functions can be used to encode data in a format that meshopt_decodeFilter can decode
+ * 
+ * meshopt_encodeFilterOct encodes unit vectors with K-bit (K <= 16) signed X/Y as an output.
+ * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is.
+ * Input data must contain 4 floats for every vector (count*4 total).
+ * 
+ * meshopt_encodeFilterQuat encodes unit quaternions with K-bit (4 <= K <= 16) component encoding.
+ * Each component is stored as an 16-bit integer; stride must be equal to 8.
+ * Input data must contain 4 floats for every quaternion (count*4 total).
+ * 
+ * meshopt_encodeFilterExp encodes arbitrary (finite) floating-point data with 8-bit exponent and K-bit integer mantissa (1 <= K <= 24).
+ * Mantissa is shared between all components of a given vector as defined by stride; stride must be divisible by 4.
+ * Input data must contain stride/4 floats for every vector (count*stride/4 total).
+ * When individual (scalar) encoding is desired, simply pass stride=4 and adjust count accordingly.
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data);
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data);
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data);
 
 /**
  * Experimental: Mesh simplifier
@@ -305,7 +326,7 @@ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* d
 
 /**
  * Experimental: Mesh simplifier (sloppy)
- * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance
+ * Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance
  * The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error.
  * Returns the number of indices after simplification, with destination containing new index data
  * The resulting index buffer references vertices from the original vertex buffer.
diff --git a/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch b/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch
new file mode 100644
index 0000000000..54132a6c86
--- /dev/null
+++ b/thirdparty/meshoptimizer/patches/attribute-aware-simplify-distance-only-metric.patch
@@ -0,0 +1,176 @@
+diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
+index 0f10ebef4b..cf5db4e119 100644
+--- a/thirdparty/meshoptimizer/simplifier.cpp
++++ b/thirdparty/meshoptimizer/simplifier.cpp
+@@ -20,7 +20,7 @@
+ #define TRACESTATS(i) (void)0
+ #endif
+ 
+-#define ATTRIBUTES 8
++#define ATTRIBUTES 3
+ 
+ // This work is based on:
+ // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
+@@ -445,6 +445,7 @@ struct Collapse
+ 		float error;
+ 		unsigned int errorui;
+ 	};
++	float distance_error;
+ };
+ 
+ static float normalize(Vector3& v)
+@@ -525,6 +526,34 @@ static float quadricError(const Quadric& Q, const Vector3& v)
+ 	return fabsf(r) * s;
+ }
+ 
++static float quadricErrorNoAttributes(const Quadric& Q, const Vector3& v)
++{
++	float rx = Q.b0;
++	float ry = Q.b1;
++	float rz = Q.b2;
++
++	rx += Q.a10 * v.y;
++	ry += Q.a21 * v.z;
++	rz += Q.a20 * v.x;
++
++	rx *= 2;
++	ry *= 2;
++	rz *= 2;
++
++	rx += Q.a00 * v.x;
++	ry += Q.a11 * v.y;
++	rz += Q.a22 * v.z;
++
++	float r = Q.c;
++	r += rx * v.x;
++	r += ry * v.y;
++	r += rz * v.z;
++
++	float s = Q.w == 0.f ? 0.f : 1.f / Q.w;
++
++	return fabsf(r) * s;
++}
++
+ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w)
+ {
+ 	float aw = a * w;
+@@ -680,7 +709,7 @@ static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3
+ }
+ #endif
+ 
+-static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
++static void fillFaceQuadrics(Quadric* vertex_quadrics, Quadric* vertex_no_attrib_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
+ {
+ 	for (size_t i = 0; i < index_count; i += 3)
+ 	{
+@@ -690,6 +719,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
+ 
+ 		Quadric Q;
+ 		quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
++		quadricAdd(vertex_no_attrib_quadrics[remap[i0]], Q);
++		quadricAdd(vertex_no_attrib_quadrics[remap[i1]], Q);
++		quadricAdd(vertex_no_attrib_quadrics[remap[i2]], Q);
+ 
+ #if ATTRIBUTES
+ 		quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w);
+@@ -700,7 +732,7 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
+ 	}
+ }
+ 
+-static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback)
++static void fillEdgeQuadrics(Quadric* vertex_quadrics, Quadric* vertex_no_attrib_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback)
+ {
+ 	for (size_t i = 0; i < index_count; i += 3)
+ 	{
+@@ -744,6 +776,9 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
+ 
+ 			quadricAdd(vertex_quadrics[remap[i0]], Q);
+ 			quadricAdd(vertex_quadrics[remap[i1]], Q);
++
++			quadricAdd(vertex_no_attrib_quadrics[remap[i0]], Q);
++			quadricAdd(vertex_no_attrib_quadrics[remap[i1]], Q);
+ 		}
+ 	}
+ }
+@@ -848,7 +883,7 @@ static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices
+ 	return collapse_count;
+ }
+ 
+-static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const unsigned int* remap)
++static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const Quadric* vertex_no_attrib_quadrics, const unsigned int* remap)
+ {
+ 	for (size_t i = 0; i < collapse_count; ++i)
+ 	{
+@@ -868,10 +903,14 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const
+ 		float ei = quadricError(qi, vertex_positions[i1]);
+ 		float ej = quadricError(qj, vertex_positions[j1]);
+ 
++		const Quadric& naqi = vertex_no_attrib_quadrics[remap[i0]];
++		const Quadric& naqj = vertex_no_attrib_quadrics[remap[j0]];
++
+ 		// pick edge direction with minimal error
+ 		c.v0 = ei <= ej ? i0 : j0;
+ 		c.v1 = ei <= ej ? i1 : j1;
+ 		c.error = ei <= ej ? ei : ej;
++		c.distance_error = ei <= ej ? quadricErrorNoAttributes(naqi, vertex_positions[i1]) :  quadricErrorNoAttributes(naqj, vertex_positions[j1]);
+ 	}
+ }
+ 
+@@ -968,7 +1007,7 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse
+ 	}
+ }
+ 
+-static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error)
++static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, Quadric* vertex_no_attrib_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error)
+ {
+ 	size_t edge_collapses = 0;
+ 	size_t triangle_collapses = 0;
+@@ -1030,6 +1069,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
+ 		assert(collapse_remap[r1] == r1);
+ 
+ 		quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]);
++		quadricAdd(vertex_no_attrib_quadrics[r1], vertex_no_attrib_quadrics[r0]);
+ 
+ 		if (vertex_kind[i0] == Kind_Complex)
+ 		{
+@@ -1067,7 +1107,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
+ 		triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2;
+ 		edge_collapses++;
+ 
+-		result_error = result_error < c.error ? c.error : result_error;
++		result_error = result_error < c.distance_error ? c.distance_error : result_error;
+ 	}
+ 
+ #if TRACE
+@@ -1455,9 +1495,11 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
+ 
+ 	Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
+ 	memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
++	Quadric* vertex_no_attrib_quadrics = allocator.allocate<Quadric>(vertex_count);
++	memset(vertex_no_attrib_quadrics, 0, vertex_count * sizeof(Quadric));
+ 
+-	fillFaceQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap);
+-	fillEdgeQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback);
++	fillFaceQuadrics(vertex_quadrics, vertex_no_attrib_quadrics, indices, index_count, vertex_positions, remap);
++	fillEdgeQuadrics(vertex_quadrics, vertex_no_attrib_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback);
+ 
+ 	if (result != indices)
+ 		memcpy(result, indices, index_count * sizeof(unsigned int));
+@@ -1488,7 +1530,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
+ 		if (edge_collapse_count == 0)
+ 			break;
+ 
+-		rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, remap);
++		rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, vertex_no_attrib_quadrics, remap);
+ 
+ #if TRACE > 1
+ 		dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind);
+@@ -1507,7 +1549,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
+ 		printf("pass %d: ", int(pass_count++));
+ #endif
+ 
+-		size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error);
++		size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, vertex_no_attrib_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error);
+ 
+ 		// no edges can be collapsed any more due to hitting the error limit or triangle collapse limit
+ 		if (collapses == 0)
diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
index 0f10ebef4b..ccc99edb1a 100644
--- a/thirdparty/meshoptimizer/simplifier.cpp
+++ b/thirdparty/meshoptimizer/simplifier.cpp
@@ -20,7 +20,7 @@
 #define TRACESTATS(i) (void)0
 #endif
 
-#define ATTRIBUTES 8
+#define ATTRIBUTES 3
 
 // This work is based on:
 // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
@@ -358,7 +358,7 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
 
 #if TRACE
 	printf("locked: many open edges %d, disconnected seam %d, many seam edges %d, many wedges %d\n",
-	       int(stats[0]), int(stats[1]), int(stats[2]), int(stats[3]));
+	    int(stats[0]), int(stats[1]), int(stats[2]), int(stats[3]));
 #endif
 }
 
@@ -445,6 +445,7 @@ struct Collapse
 		float error;
 		unsigned int errorui;
 	};
+	float distance_error;
 };
 
 static float normalize(Vector3& v)
@@ -525,6 +526,34 @@ static float quadricError(const Quadric& Q, const Vector3& v)
 	return fabsf(r) * s;
 }
 
+static float quadricErrorNoAttributes(const Quadric& Q, const Vector3& v)
+{
+	float rx = Q.b0;
+	float ry = Q.b1;
+	float rz = Q.b2;
+
+	rx += Q.a10 * v.y;
+	ry += Q.a21 * v.z;
+	rz += Q.a20 * v.x;
+
+	rx *= 2;
+	ry *= 2;
+	rz *= 2;
+
+	rx += Q.a00 * v.x;
+	ry += Q.a11 * v.y;
+	rz += Q.a22 * v.z;
+
+	float r = Q.c;
+	r += rx * v.x;
+	r += ry * v.y;
+	r += rz * v.z;
+
+	float s = Q.w == 0.f ? 0.f : 1.f / Q.w;
+
+	return fabsf(r) * s;
+}
+
 static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w)
 {
 	float aw = a * w;
@@ -680,7 +709,7 @@ static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3
 }
 #endif
 
-static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
+static void fillFaceQuadrics(Quadric* vertex_quadrics, Quadric* vertex_no_attrib_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
 {
 	for (size_t i = 0; i < index_count; i += 3)
 	{
@@ -690,6 +719,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
 
 		Quadric Q;
 		quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
+		quadricAdd(vertex_no_attrib_quadrics[remap[i0]], Q);
+		quadricAdd(vertex_no_attrib_quadrics[remap[i1]], Q);
+		quadricAdd(vertex_no_attrib_quadrics[remap[i2]], Q);
 
 #if ATTRIBUTES
 		quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w);
@@ -700,7 +732,7 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
 	}
 }
 
-static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback)
+static void fillEdgeQuadrics(Quadric* vertex_quadrics, Quadric* vertex_no_attrib_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback)
 {
 	for (size_t i = 0; i < index_count; i += 3)
 	{
@@ -744,6 +776,9 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
 
 			quadricAdd(vertex_quadrics[remap[i0]], Q);
 			quadricAdd(vertex_quadrics[remap[i1]], Q);
+
+			quadricAdd(vertex_no_attrib_quadrics[remap[i0]], Q);
+			quadricAdd(vertex_no_attrib_quadrics[remap[i1]], Q);
 		}
 	}
 }
@@ -848,7 +883,7 @@ static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices
 	return collapse_count;
 }
 
-static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const unsigned int* remap)
+static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const Quadric* vertex_no_attrib_quadrics, const unsigned int* remap)
 {
 	for (size_t i = 0; i < collapse_count; ++i)
 	{
@@ -868,10 +903,14 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const
 		float ei = quadricError(qi, vertex_positions[i1]);
 		float ej = quadricError(qj, vertex_positions[j1]);
 
+		const Quadric& naqi = vertex_no_attrib_quadrics[remap[i0]];
+		const Quadric& naqj = vertex_no_attrib_quadrics[remap[j0]];
+
 		// pick edge direction with minimal error
 		c.v0 = ei <= ej ? i0 : j0;
 		c.v1 = ei <= ej ? i1 : j1;
 		c.error = ei <= ej ? ei : ej;
+		c.distance_error = ei <= ej ? quadricErrorNoAttributes(naqi, vertex_positions[i1]) :  quadricErrorNoAttributes(naqj, vertex_positions[j1]);
 	}
 }
 
@@ -968,7 +1007,7 @@ static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapse
 	}
 }
 
-static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error)
+static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, Quadric* vertex_no_attrib_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, const Vector3* vertex_positions, const EdgeAdjacency& adjacency, size_t triangle_collapse_goal, float error_limit, float& result_error)
 {
 	size_t edge_collapses = 0;
 	size_t triangle_collapses = 0;
@@ -1030,6 +1069,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
 		assert(collapse_remap[r1] == r1);
 
 		quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]);
+		quadricAdd(vertex_no_attrib_quadrics[r1], vertex_no_attrib_quadrics[r0]);
 
 		if (vertex_kind[i0] == Kind_Complex)
 		{
@@ -1067,15 +1107,15 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
 		triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2;
 		edge_collapses++;
 
-		result_error = result_error < c.error ? c.error : result_error;
+		result_error = result_error < c.distance_error ? c.distance_error : result_error;
 	}
 
 #if TRACE
 	float error_goal_perfect = edge_collapse_goal < collapse_count ? collapses[collapse_order[edge_collapse_goal]].error : 0.f;
 
 	printf("removed %d triangles, error %e (goal %e); evaluated %d/%d collapses (done %d, skipped %d, invalid %d)\n",
-		int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_perfect),
-		int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2]));
+	    int(triangle_collapses), sqrtf(result_error), sqrtf(error_goal_perfect),
+	    int(stats[0]), int(collapse_count), int(edge_collapses), int(stats[1]), int(stats[2]));
 #endif
 
 	return edge_collapses;
@@ -1433,7 +1473,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
 		kinds[vertex_kind[i]] += remap[i] == i;
 
 	printf("kinds: manifold %d, border %d, seam %d, complex %d, locked %d\n",
-	       int(kinds[Kind_Manifold]), int(kinds[Kind_Border]), int(kinds[Kind_Seam]), int(kinds[Kind_Complex]), int(kinds[Kind_Locked]));
+	    int(kinds[Kind_Manifold]), int(kinds[Kind_Border]), int(kinds[Kind_Seam]), int(kinds[Kind_Complex]), int(kinds[Kind_Locked]));
 #endif
 
 	Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
@@ -1455,9 +1495,11 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
 
 	Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
 	memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
+	Quadric* vertex_no_attrib_quadrics = allocator.allocate<Quadric>(vertex_count);
+	memset(vertex_no_attrib_quadrics, 0, vertex_count * sizeof(Quadric));
 
-	fillFaceQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap);
-	fillEdgeQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback);
+	fillFaceQuadrics(vertex_quadrics, vertex_no_attrib_quadrics, indices, index_count, vertex_positions, remap);
+	fillEdgeQuadrics(vertex_quadrics, vertex_no_attrib_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback);
 
 	if (result != indices)
 		memcpy(result, indices, index_count * sizeof(unsigned int));
@@ -1488,7 +1530,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
 		if (edge_collapse_count == 0)
 			break;
 
-		rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, remap);
+		rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, vertex_no_attrib_quadrics, remap);
 
 #if TRACE > 1
 		dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind);
@@ -1507,7 +1549,7 @@ size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned
 		printf("pass %d: ", int(pass_count++));
 #endif
 
-		size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error);
+		size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, vertex_no_attrib_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, vertex_positions, adjacency, triangle_collapse_goal, error_limit, result_error);
 
 		// no edges can be collapsed any more due to hitting the error limit or triangle collapse limit
 		if (collapses == 0)
@@ -1607,9 +1649,9 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind
 
 #if TRACE
 		printf("pass %d (%s): grid size %d, triangles %d, %s\n",
-		       pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary",
-		       grid_size, int(triangles),
-		       (triangles <= target_index_count / 3) ? "under" : "over");
+		    pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary",
+		    grid_size, int(triangles),
+		    (triangles <= target_index_count / 3) ? "under" : "over");
 #endif
 
 		float tip = interpolate(float(target_index_count / 3), float(min_grid), float(min_triangles), float(grid_size), float(triangles), float(max_grid), float(max_triangles));
@@ -1736,9 +1778,9 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos
 
 #if TRACE
 		printf("pass %d (%s): grid size %d, vertices %d, %s\n",
-		       pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary",
-		       grid_size, int(vertices),
-		       (vertices <= target_vertex_count) ? "under" : "over");
+		    pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary",
+		    grid_size, int(vertices),
+		    (vertices <= target_vertex_count) ? "under" : "over");
 #endif
 
 		float tip = interpolate(float(target_vertex_count), float(min_grid), float(min_vertices), float(grid_size), float(vertices), float(max_grid), float(max_vertices));
diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp
index 5f3ec204ab..7925ea862c 100644
--- a/thirdparty/meshoptimizer/vertexcodec.cpp
+++ b/thirdparty/meshoptimizer/vertexcodec.cpp
@@ -77,6 +77,8 @@
 #endif
 
 #ifdef SIMD_WASM
+#undef __DEPRECATED
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
 #include <wasm_simd128.h>
 #endif
 
@@ -1028,7 +1030,7 @@ static unsigned int getCpuFeatures()
 	return cpuinfo[2];
 }
 
-unsigned int cpuid = getCpuFeatures();
+static unsigned int cpuid = getCpuFeatures();
 #endif
 
 } // namespace meshopt
diff --git a/thirdparty/meshoptimizer/vertexfilter.cpp b/thirdparty/meshoptimizer/vertexfilter.cpp
index 39946f46ed..606a280aa9 100644
--- a/thirdparty/meshoptimizer/vertexfilter.cpp
+++ b/thirdparty/meshoptimizer/vertexfilter.cpp
@@ -52,6 +52,7 @@
 #endif
 
 #ifdef SIMD_WASM
+#undef __DEPRECATED
 #include <wasm_simd128.h>
 #endif
 
@@ -160,7 +161,8 @@ static void decodeFilterExp(unsigned int* data, size_t count)
 #endif
 
 #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
-template <typename T> static void dispatchSimd(void (*process)(T*, size_t), T* data, size_t count, size_t stride)
+template <typename T>
+static void dispatchSimd(void (*process)(T*, size_t), T* data, size_t count, size_t stride)
 {
 	assert(stride <= 4);
 
@@ -791,52 +793,170 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count)
 
 } // namespace meshopt
 
-void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size)
+void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride)
 {
 	using namespace meshopt;
 
-	assert(vertex_size == 4 || vertex_size == 8);
+	assert(stride == 4 || stride == 8);
 
 #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
-	if (vertex_size == 4)
-		dispatchSimd(decodeFilterOctSimd, static_cast<signed char*>(buffer), vertex_count, 4);
+	if (stride == 4)
+		dispatchSimd(decodeFilterOctSimd, static_cast<signed char*>(buffer), count, 4);
 	else
-		dispatchSimd(decodeFilterOctSimd, static_cast<short*>(buffer), vertex_count, 4);
+		dispatchSimd(decodeFilterOctSimd, static_cast<short*>(buffer), count, 4);
 #else
-	if (vertex_size == 4)
-		decodeFilterOct(static_cast<signed char*>(buffer), vertex_count);
+	if (stride == 4)
+		decodeFilterOct(static_cast<signed char*>(buffer), count);
 	else
-		decodeFilterOct(static_cast<short*>(buffer), vertex_count);
+		decodeFilterOct(static_cast<short*>(buffer), count);
 #endif
 }
 
-void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size)
+void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride)
 {
 	using namespace meshopt;
 
-	assert(vertex_size == 8);
-	(void)vertex_size;
+	assert(stride == 8);
+	(void)stride;
 
 #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
-	dispatchSimd(decodeFilterQuatSimd, static_cast<short*>(buffer), vertex_count, 4);
+	dispatchSimd(decodeFilterQuatSimd, static_cast<short*>(buffer), count, 4);
 #else
-	decodeFilterQuat(static_cast<short*>(buffer), vertex_count);
+	decodeFilterQuat(static_cast<short*>(buffer), count);
 #endif
 }
 
-void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size)
+void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride)
 {
 	using namespace meshopt;
 
-	assert(vertex_size % 4 == 0);
+	assert(stride > 0 && stride % 4 == 0);
 
 #if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
-	dispatchSimd(decodeFilterExpSimd, static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4), 1);
+	dispatchSimd(decodeFilterExpSimd, static_cast<unsigned int*>(buffer), count * (stride / 4), 1);
 #else
-	decodeFilterExp(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4));
+	decodeFilterExp(static_cast<unsigned int*>(buffer), count * (stride / 4));
 #endif
 }
 
+void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data)
+{
+	assert(stride == 4 || stride == 8);
+	assert(bits >= 1 && bits <= 16);
+
+	signed char* d8 = static_cast<signed char*>(destination);
+	short* d16 = static_cast<short*>(destination);
+
+	int bytebits = int(stride * 2);
+
+	for (size_t i = 0; i < count; ++i)
+	{
+		const float* n = &data[i * 4];
+
+		// octahedral encoding of a unit vector
+		float nx = n[0], ny = n[1], nz = n[2], nw = n[3];
+		float nl = fabsf(nx) + fabsf(ny) + fabsf(nz);
+		float ns = nl == 0.f ? 0.f : 1.f / nl;
+
+		nx *= ns;
+		ny *= ns;
+
+		float u = (nz >= 0.f) ? nx : (1 - fabsf(ny)) * (nx >= 0.f ? 1.f : -1.f);
+		float v = (nz >= 0.f) ? ny : (1 - fabsf(nx)) * (ny >= 0.f ? 1.f : -1.f);
+
+		int fu = meshopt_quantizeSnorm(u, bits);
+		int fv = meshopt_quantizeSnorm(v, bits);
+		int fo = meshopt_quantizeSnorm(1.f, bits);
+		int fw = meshopt_quantizeSnorm(nw, bytebits);
+
+		if (stride == 4)
+		{
+			d8[i * 4 + 0] = (signed char)(fu);
+			d8[i * 4 + 1] = (signed char)(fv);
+			d8[i * 4 + 2] = (signed char)(fo);
+			d8[i * 4 + 3] = (signed char)(fw);
+		}
+		else
+		{
+			d16[i * 4 + 0] = short(fu);
+			d16[i * 4 + 1] = short(fv);
+			d16[i * 4 + 2] = short(fo);
+			d16[i * 4 + 3] = short(fw);
+		}
+	}
+}
+
+void meshopt_encodeFilterQuat(void* destination_, size_t count, size_t stride, int bits, const float* data)
+{
+	assert(stride == 8);
+	assert(bits >= 4 && bits <= 16);
+	(void)stride;
+
+	short* destination = static_cast<short*>(destination_);
+
+	const float scaler = sqrtf(2.f);
+
+	for (size_t i = 0; i < count; ++i)
+	{
+		const float* q = &data[i * 4];
+		short* d = &destination[i * 4];
+
+		// establish maximum quaternion component
+		int qc = 0;
+		qc = fabsf(q[1]) > fabsf(q[qc]) ? 1 : qc;
+		qc = fabsf(q[2]) > fabsf(q[qc]) ? 2 : qc;
+		qc = fabsf(q[3]) > fabsf(q[qc]) ? 3 : qc;
+
+		// we use double-cover properties to discard the sign
+		float sign = q[qc] < 0.f ? -1.f : 1.f;
+
+		// note: we always encode a cyclical swizzle to be able to recover the order via rotation
+		d[0] = short(meshopt_quantizeSnorm(q[(qc + 1) & 3] * scaler * sign, bits));
+		d[1] = short(meshopt_quantizeSnorm(q[(qc + 2) & 3] * scaler * sign, bits));
+		d[2] = short(meshopt_quantizeSnorm(q[(qc + 3) & 3] * scaler * sign, bits));
+		d[3] = short((meshopt_quantizeSnorm(1.f, bits) & ~3) | qc);
+	}
+}
+
+void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, int bits, const float* data)
+{
+	assert(stride > 0 && stride % 4 == 0);
+	assert(bits >= 1 && bits <= 24);
+
+	unsigned int* destination = static_cast<unsigned int*>(destination_);
+	size_t stride_float = stride / sizeof(float);
+
+	for (size_t i = 0; i < count; ++i)
+	{
+		const float* v = &data[i * stride_float];
+		unsigned int* d = &destination[i * stride_float];
+
+		// use maximum exponent to encode values; this guarantess that mantissa is [-1, 1]
+		int exp = -100;
+
+		for (size_t j = 0; j < stride_float; ++j)
+		{
+			int e;
+			frexp(v[j], &e);
+
+			exp = (exp < e) ? e : exp;
+		}
+
+		// note that we additionally scale the mantissa to make it a K-bit signed integer (K-1 bits for magnitude)
+		exp -= (bits - 1);
+
+		// compute renormalized rounded mantissa for each component
+		int mmask = (1 << 24) - 1;
+
+		for (size_t j = 0; j < stride_float; ++j)
+		{
+			int m = int(ldexp(v[j], -exp) + (v[j] >= 0 ? 0.5f : -0.5f));
+
+			d[j] = (m & mmask) | (unsigned(exp) << 24);
+		}
+	}
+}
+
 #undef SIMD_SSE
 #undef SIMD_NEON
 #undef SIMD_WASM