summaryrefslogtreecommitdiff
path: root/thirdparty/meshoptimizer
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/meshoptimizer')
-rw-r--r--thirdparty/meshoptimizer/LICENSE.md2
-rw-r--r--thirdparty/meshoptimizer/clusterizer.cpp616
-rw-r--r--thirdparty/meshoptimizer/indexgenerator.cpp206
-rw-r--r--thirdparty/meshoptimizer/meshoptimizer.h109
-rw-r--r--thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch262
-rw-r--r--thirdparty/meshoptimizer/simplifier.cpp170
-rw-r--r--thirdparty/meshoptimizer/vertexcodec.cpp14
7 files changed, 1285 insertions, 94 deletions
diff --git a/thirdparty/meshoptimizer/LICENSE.md b/thirdparty/meshoptimizer/LICENSE.md
index 4fcd766d22..3c52415f62 100644
--- a/thirdparty/meshoptimizer/LICENSE.md
+++ b/thirdparty/meshoptimizer/LICENSE.md
@@ -1,6 +1,6 @@
MIT License
-Copyright (c) 2016-2020 Arseny Kapoulkine
+Copyright (c) 2016-2021 Arseny Kapoulkine
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/thirdparty/meshoptimizer/clusterizer.cpp b/thirdparty/meshoptimizer/clusterizer.cpp
index f7d88c5136..f8aad7b49c 100644
--- a/thirdparty/meshoptimizer/clusterizer.cpp
+++ b/thirdparty/meshoptimizer/clusterizer.cpp
@@ -2,6 +2,7 @@
#include "meshoptimizer.h"
#include <assert.h>
+#include <float.h>
#include <math.h>
#include <string.h>
@@ -12,6 +13,68 @@
namespace meshopt
{
+// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet
+const size_t kMeshletMaxVertices = 255;
+
+// A reasonable limit is around 2*max_vertices or less
+const size_t kMeshletMaxTriangles = 512;
+
+struct TriangleAdjacency2
+{
+ unsigned int* counts;
+ unsigned int* offsets;
+ unsigned int* data;
+};
+
+static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
+{
+ size_t face_count = index_count / 3;
+
+ // allocate arrays
+ adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
+ adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
+ adjacency.data = allocator.allocate<unsigned int>(index_count);
+
+ // fill triangle counts
+ memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
+
+ for (size_t i = 0; i < index_count; ++i)
+ {
+ assert(indices[i] < vertex_count);
+
+ adjacency.counts[indices[i]]++;
+ }
+
+ // fill offset table
+ unsigned int offset = 0;
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ adjacency.offsets[i] = offset;
+ offset += adjacency.counts[i];
+ }
+
+ assert(offset == index_count);
+
+ // fill triangle data
+ for (size_t i = 0; i < face_count; ++i)
+ {
+ unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+
+ adjacency.data[adjacency.offsets[a]++] = unsigned(i);
+ adjacency.data[adjacency.offsets[b]++] = unsigned(i);
+ adjacency.data[adjacency.offsets[c]++] = unsigned(i);
+ }
+
+ // fix offsets that have been disturbed by the previous pass
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ assert(adjacency.offsets[i] >= adjacency.counts[i]);
+
+ adjacency.offsets[i] -= adjacency.counts[i];
+ }
+}
+
static void computeBoundingSphere(float result[4], const float points[][3], size_t count)
{
assert(count > 0);
@@ -82,13 +145,310 @@ static void computeBoundingSphere(float result[4], const float points[][3], size
result[3] = radius;
}
+struct Cone
+{
+ float px, py, pz;
+ float nx, ny, nz;
+};
+
+static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius)
+{
+ float cone = 1.f - spread * cone_weight;
+ float cone_clamped = cone < 1e-3f ? 1e-3f : cone;
+
+ return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped;
+}
+
+static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count)
+{
+ Cone result = acc;
+
+ float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count);
+
+ result.px *= center_scale;
+ result.py *= center_scale;
+ result.pz *= center_scale;
+
+ float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz;
+ float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length);
+
+ result.nx *= axis_scale;
+ result.ny *= axis_scale;
+ result.nz *= axis_scale;
+
+ return result;
+}
+
+static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ (void)vertex_count;
+
+ size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
+ size_t face_count = index_count / 3;
+
+ float mesh_area = 0;
+
+ for (size_t i = 0; i < face_count; ++i)
+ {
+ unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
+ assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+ const float* p0 = vertex_positions + vertex_stride_float * a;
+ const float* p1 = vertex_positions + vertex_stride_float * b;
+ const float* p2 = vertex_positions + vertex_stride_float * c;
+
+ float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]};
+ float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]};
+
+ float normalx = p10[1] * p20[2] - p10[2] * p20[1];
+ float normaly = p10[2] * p20[0] - p10[0] * p20[2];
+ float normalz = p10[0] * p20[1] - p10[1] * p20[0];
+
+ float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz);
+ float invarea = (area == 0.f) ? 0.f : 1.f / area;
+
+ triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f;
+ triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f;
+ triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f;
+
+ triangles[i].nx = normalx * invarea;
+ triangles[i].ny = normaly * invarea;
+ triangles[i].nz = normalz * invarea;
+
+ mesh_area += area;
+ }
+
+ return mesh_area;
+}
+
+static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles)
+{
+ size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3;
+
+ // fill 4b padding with 0
+ while (offset & 3)
+ meshlet_triangles[offset++] = 0;
+}
+
+static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles)
+{
+ unsigned char& av = used[a];
+ unsigned char& bv = used[b];
+ unsigned char& cv = used[c];
+
+ bool result = false;
+
+ unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
+
+ if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
+ {
+ meshlets[meshlet_offset] = meshlet;
+
+ for (size_t j = 0; j < meshlet.vertex_count; ++j)
+ used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff;
+
+ finishMeshlet(meshlet, meshlet_triangles);
+
+ meshlet.vertex_offset += meshlet.vertex_count;
+ meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding
+ meshlet.vertex_count = 0;
+ meshlet.triangle_count = 0;
+
+ result = true;
+ }
+
+ if (av == 0xff)
+ {
+ av = (unsigned char)meshlet.vertex_count;
+ meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a;
+ }
+
+ if (bv == 0xff)
+ {
+ bv = (unsigned char)meshlet.vertex_count;
+ meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b;
+ }
+
+ if (cv == 0xff)
+ {
+ cv = (unsigned char)meshlet.vertex_count;
+ meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c;
+ }
+
+ meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av;
+ meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv;
+ meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv;
+ meshlet.triangle_count++;
+
+ return result;
+}
+
+struct KDNode
+{
+ union
+ {
+ float split;
+ unsigned int index;
+ };
+
+ // leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point)
+ // branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children
+ unsigned int axis : 2;
+ unsigned int children : 30;
+};
+
+static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot)
+{
+ size_t m = 0;
+
+ // invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot
+ for (size_t i = 0; i < count; ++i)
+ {
+ float v = points[indices[i] * stride + axis];
+
+ // swap(m, i) unconditionally
+ unsigned int t = indices[m];
+ indices[m] = indices[i];
+ indices[i] = t;
+
+ // when v >= pivot, we swap i with m without advancing it, preserving invariants
+ m += v < pivot;
+ }
+
+ return m;
+}
+
+static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count)
+{
+ assert(offset + count <= node_count);
+ (void)node_count;
+
+ KDNode& result = nodes[offset];
+
+ result.index = indices[0];
+ result.axis = 3;
+ result.children = unsigned(count - 1);
+
+ // all remaining points are stored in nodes immediately following the leaf
+ for (size_t i = 1; i < count; ++i)
+ {
+ KDNode& tail = nodes[offset + i];
+
+ tail.index = indices[i];
+ tail.axis = 3;
+ tail.children = ~0u >> 2; // bogus value to prevent misuse
+ }
+
+ return offset + count;
+}
+
+static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size)
+{
+ assert(count > 0);
+ assert(offset < node_count);
+
+ if (count <= leaf_size)
+ return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
+
+ float mean[3] = {};
+ float vars[3] = {};
+ float runc = 1, runs = 1;
+
+ // gather statistics on the points in the subtree using Welford's algorithm
+ for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc)
+ {
+ const float* point = points + indices[i] * stride;
+
+ for (int k = 0; k < 3; ++k)
+ {
+ float delta = point[k] - mean[k];
+ mean[k] += delta * runs;
+ vars[k] += delta * (point[k] - mean[k]);
+ }
+ }
+
+ // split axis is one where the variance is largest
+ unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1
+ : 2;
+
+ float split = mean[axis];
+ size_t middle = kdtreePartition(indices, count, points, stride, axis, split);
+
+ // when the partition is degenerate simply consolidate the points into a single node
+ if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2)
+ return kdtreeBuildLeaf(offset, nodes, node_count, indices, count);
+
+ KDNode& result = nodes[offset];
+
+ result.split = split;
+ result.axis = axis;
+
+ // left subtree is right after our node
+ size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size);
+
+ // distance to the right subtree is represented explicitly
+ result.children = unsigned(next_offset - offset - 1);
+
+ return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size);
+}
+
+static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit)
+{
+ const KDNode& node = nodes[root];
+
+ if (node.axis == 3)
+ {
+ // leaf
+ for (unsigned int i = 0; i <= node.children; ++i)
+ {
+ unsigned int index = nodes[root + i].index;
+
+ if (emitted_flags[index])
+ continue;
+
+ const float* point = points + index * stride;
+
+ float distance2 =
+ (point[0] - position[0]) * (point[0] - position[0]) +
+ (point[1] - position[1]) * (point[1] - position[1]) +
+ (point[2] - position[2]) * (point[2] - position[2]);
+ float distance = sqrtf(distance2);
+
+ if (distance < limit)
+ {
+ result = index;
+ limit = distance;
+ }
+ }
+ }
+ else
+ {
+ // branch; we order recursion to process the node that search position is in first
+ float delta = position[node.axis] - node.split;
+ unsigned int first = (delta <= 0) ? 0 : node.children;
+ unsigned int second = first ^ node.children;
+
+ kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit);
+
+ // only process the other node if it can have a match based on closest distance so far
+ if (fabsf(delta) <= limit)
+ kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit);
+ }
+}
+
} // namespace meshopt
size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles)
{
+ using namespace meshopt;
+
assert(index_count % 3 == 0);
- assert(max_vertices >= 3);
- assert(max_triangles >= 1);
+ assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+ assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+ assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
+
+ (void)kMeshletMaxVertices;
+ (void)kMeshletMaxTriangles;
// meshlet construction is limited by max vertices and max triangles per meshlet
// the worst case is that the input is an unindexed stream since this equally stresses both limits
@@ -100,77 +460,226 @@ size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_
return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles;
}
-size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
{
+ using namespace meshopt;
+
assert(index_count % 3 == 0);
- assert(max_vertices >= 3);
- assert(max_triangles >= 1);
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+
+ assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+ assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+ assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
meshopt_Allocator allocator;
- meshopt_Meshlet meshlet;
- memset(&meshlet, 0, sizeof(meshlet));
+ TriangleAdjacency2 adjacency = {};
+ buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
+
+ unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
+ memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
+
+ size_t face_count = index_count / 3;
+
+ unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
+ memset(emitted_flags, 0, face_count);
+
+ // for each triangle, precompute centroid & normal to use for scoring
+ Cone* triangles = allocator.allocate<Cone>(face_count);
+ float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+
+ // assuming each meshlet is a square patch, expected radius is sqrt(expected area)
+ float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f;
+ float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f;
+
+ // build a kd-tree for nearest neighbor lookup
+ unsigned int* kdindices = allocator.allocate<unsigned int>(face_count);
+ for (size_t i = 0; i < face_count; ++i)
+ kdindices[i] = unsigned(i);
- assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0]));
- assert(max_triangles <= sizeof(meshlet.indices) / 3);
+ KDNode* nodes = allocator.allocate<KDNode>(face_count * 2);
+ kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8);
// index of the vertex in the meshlet, 0xff if the vertex isn't used
unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
memset(used, -1, vertex_count);
- size_t offset = 0;
+ meshopt_Meshlet meshlet = {};
+ size_t meshlet_offset = 0;
- for (size_t i = 0; i < index_count; i += 3)
- {
- unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
- assert(a < vertex_count && b < vertex_count && c < vertex_count);
+ Cone meshlet_cone_acc = {};
- unsigned char& av = used[a];
- unsigned char& bv = used[b];
- unsigned char& cv = used[c];
+ for (;;)
+ {
+ unsigned int best_triangle = ~0u;
+ unsigned int best_extra = 5;
+ float best_score = FLT_MAX;
- unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff);
+ Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count);
- if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles)
+ for (size_t i = 0; i < meshlet.vertex_count; ++i)
{
- destination[offset++] = meshlet;
+ unsigned int index = meshlet_vertices[meshlet.vertex_offset + i];
+
+ unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+ size_t neighbours_size = adjacency.counts[index];
+
+ for (size_t j = 0; j < neighbours_size; ++j)
+ {
+ unsigned int triangle = neighbours[j];
+ assert(!emitted_flags[triangle]);
+
+ unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
+ assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+ unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff);
+
+ // triangles that don't add new vertices to meshlets are max. priority
+ if (extra != 0)
+ {
+ // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets
+ if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1)
+ extra = 0;
+
+ extra++;
+ }
+
+ // since topology-based priority is always more important than the score, we can skip scoring in some cases
+ if (extra > best_extra)
+ continue;
+
+ const Cone& tri_cone = triangles[triangle];
+
+ float distance2 =
+ (tri_cone.px - meshlet_cone.px) * (tri_cone.px - meshlet_cone.px) +
+ (tri_cone.py - meshlet_cone.py) * (tri_cone.py - meshlet_cone.py) +
+ (tri_cone.pz - meshlet_cone.pz) * (tri_cone.pz - meshlet_cone.pz);
- for (size_t j = 0; j < meshlet.vertex_count; ++j)
- used[meshlet.vertices[j]] = 0xff;
+ float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz;
- memset(&meshlet, 0, sizeof(meshlet));
+ float score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius);
+
+ // note that topology-based priority is always more important than the score
+ // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost
+ if (extra < best_extra || score < best_score)
+ {
+ best_triangle = triangle;
+ best_extra = extra;
+ best_score = score;
+ }
+ }
}
- if (av == 0xff)
+ if (best_triangle == ~0u)
{
- av = meshlet.vertex_count;
- meshlet.vertices[meshlet.vertex_count++] = a;
+ float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz};
+ unsigned int index = ~0u;
+ float limit = FLT_MAX;
+
+ kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit);
+
+ best_triangle = index;
}
- if (bv == 0xff)
+ if (best_triangle == ~0u)
+ break;
+
+ unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2];
+ assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+ // add meshlet to the output; when the current meshlet is full we reset the accumulated bounds
+ if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles))
{
- bv = meshlet.vertex_count;
- meshlet.vertices[meshlet.vertex_count++] = b;
+ meshlet_offset++;
+ memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc));
}
- if (cv == 0xff)
+ live_triangles[a]--;
+ live_triangles[b]--;
+ live_triangles[c]--;
+
+ // remove emitted triangle from adjacency data
+ // this makes sure that we spend less time traversing these lists on subsequent iterations
+ for (size_t k = 0; k < 3; ++k)
{
- cv = meshlet.vertex_count;
- meshlet.vertices[meshlet.vertex_count++] = c;
+ unsigned int index = indices[best_triangle * 3 + k];
+
+ unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
+ size_t neighbours_size = adjacency.counts[index];
+
+ for (size_t i = 0; i < neighbours_size; ++i)
+ {
+ unsigned int tri = neighbours[i];
+
+ if (tri == best_triangle)
+ {
+ neighbours[i] = neighbours[neighbours_size - 1];
+ adjacency.counts[index]--;
+ break;
+ }
+ }
}
- meshlet.indices[meshlet.triangle_count][0] = av;
- meshlet.indices[meshlet.triangle_count][1] = bv;
- meshlet.indices[meshlet.triangle_count][2] = cv;
- meshlet.triangle_count++;
+ // update aggregated meshlet cone data for scoring subsequent triangles
+ meshlet_cone_acc.px += triangles[best_triangle].px;
+ meshlet_cone_acc.py += triangles[best_triangle].py;
+ meshlet_cone_acc.pz += triangles[best_triangle].pz;
+ meshlet_cone_acc.nx += triangles[best_triangle].nx;
+ meshlet_cone_acc.ny += triangles[best_triangle].ny;
+ meshlet_cone_acc.nz += triangles[best_triangle].nz;
+
+ emitted_flags[best_triangle] = 1;
+ }
+
+ if (meshlet.triangle_count)
+ {
+ finishMeshlet(meshlet, meshlet_triangles);
+
+ meshlets[meshlet_offset++] = meshlet;
+ }
+
+ assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+ return meshlet_offset;
+}
+
+size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+
+ assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices);
+ assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles);
+ assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned
+
+ meshopt_Allocator allocator;
+
+ // index of the vertex in the meshlet, 0xff if the vertex isn't used
+ unsigned char* used = allocator.allocate<unsigned char>(vertex_count);
+ memset(used, -1, vertex_count);
+
+ meshopt_Meshlet meshlet = {};
+ size_t meshlet_offset = 0;
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2];
+ assert(a < vertex_count && b < vertex_count && c < vertex_count);
+
+ // appends triangle to the meshlet and writes previous meshlet to the output if full
+ meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles);
}
if (meshlet.triangle_count)
- destination[offset++] = meshlet;
+ {
+ finishMeshlet(meshlet, meshlet_triangles);
- assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+ meshlets[meshlet_offset++] = meshlet;
+ }
- return offset;
+ assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles));
+ return meshlet_offset;
}
meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
@@ -178,18 +687,17 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t
using namespace meshopt;
assert(index_count % 3 == 0);
+ assert(index_count / 3 <= kMeshletMaxTriangles);
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
- assert(index_count / 3 <= 256);
-
(void)vertex_count;
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
// compute triangle normals and gather triangle corners
- float normals[256][3];
- float corners[256][3][3];
+ float normals[kMeshletMaxTriangles][3];
+ float corners[kMeshletMaxTriangles][3][3];
size_t triangles = 0;
for (size_t i = 0; i < index_count; i += 3)
@@ -327,25 +835,23 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t
return bounds;
}
-meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
{
+ using namespace meshopt;
+
+ assert(triangle_count <= kMeshletMaxTriangles);
assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
assert(vertex_positions_stride % sizeof(float) == 0);
- unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])];
+ unsigned int indices[kMeshletMaxTriangles * 3];
- for (size_t i = 0; i < meshlet->triangle_count; ++i)
+ for (size_t i = 0; i < triangle_count * 3; ++i)
{
- unsigned int a = meshlet->vertices[meshlet->indices[i][0]];
- unsigned int b = meshlet->vertices[meshlet->indices[i][1]];
- unsigned int c = meshlet->vertices[meshlet->indices[i][2]];
-
- assert(a < vertex_count && b < vertex_count && c < vertex_count);
+ unsigned int index = meshlet_vertices[meshlet_triangles[i]];
+ assert(index < vertex_count);
- indices[i * 3 + 0] = a;
- indices[i * 3 + 1] = b;
- indices[i * 3 + 2] = c;
+ indices[i] = index;
}
- return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
+ return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride);
}
diff --git a/thirdparty/meshoptimizer/indexgenerator.cpp b/thirdparty/meshoptimizer/indexgenerator.cpp
index aa4a30efa4..f60db0dc4f 100644
--- a/thirdparty/meshoptimizer/indexgenerator.cpp
+++ b/thirdparty/meshoptimizer/indexgenerator.cpp
@@ -4,6 +4,8 @@
#include <assert.h>
#include <string.h>
+// This work is based on:
+// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010
namespace meshopt
{
@@ -83,10 +85,49 @@ struct VertexStreamHasher
}
};
+struct EdgeHasher
+{
+ const unsigned int* remap;
+
+ size_t hash(unsigned long long edge) const
+ {
+ unsigned int e0 = unsigned(edge >> 32);
+ unsigned int e1 = unsigned(edge);
+
+ unsigned int h1 = remap[e0];
+ unsigned int h2 = remap[e1];
+
+ const unsigned int m = 0x5bd1e995;
+
+ // MurmurHash64B finalizer
+ h1 ^= h2 >> 18;
+ h1 *= m;
+ h2 ^= h1 >> 22;
+ h2 *= m;
+ h1 ^= h2 >> 17;
+ h1 *= m;
+ h2 ^= h1 >> 19;
+ h2 *= m;
+
+ return h2;
+ }
+
+ bool equal(unsigned long long lhs, unsigned long long rhs) const
+ {
+ unsigned int l0 = unsigned(lhs >> 32);
+ unsigned int l1 = unsigned(lhs);
+
+ unsigned int r0 = unsigned(rhs >> 32);
+ unsigned int r1 = unsigned(rhs);
+
+ return remap[l0] == remap[r0] && remap[l1] == remap[r1];
+ }
+};
+
static size_t hashBuckets(size_t count)
{
size_t buckets = 1;
- while (buckets < count)
+ while (buckets < count + count / 4)
buckets *= 2;
return buckets;
@@ -119,6 +160,26 @@ static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, c
return 0;
}
+static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator)
+{
+ VertexHasher vertex_hasher = {reinterpret_cast<const unsigned char*>(vertex_positions), 3 * sizeof(float), vertex_positions_stride};
+
+ size_t vertex_table_size = hashBuckets(vertex_count);
+ unsigned int* vertex_table = allocator.allocate<unsigned int>(vertex_table_size);
+ memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int));
+
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ unsigned int index = unsigned(i);
+ unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u);
+
+ if (*entry == ~0u)
+ *entry = index;
+
+ remap[index] = *entry;
+ }
+}
+
} // namespace meshopt
size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size)
@@ -345,3 +406,146 @@ void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const uns
destination[i] = remap[index];
}
}
+
+void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+
+ meshopt_Allocator allocator;
+
+ static const int next[4] = {1, 2, 0, 1};
+
+ // build position remap: for each vertex, which other (canonical) vertex does it map to?
+ unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+ buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
+
+ // build edge set; this stores all triangle edges but we can look these up by any other wedge
+ EdgeHasher edge_hasher = {remap};
+
+ size_t edge_table_size = hashBuckets(index_count);
+ unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
+ unsigned int* edge_vertex_table = allocator.allocate<unsigned int>(edge_table_size);
+
+ memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
+ memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int));
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ for (int e = 0; e < 3; ++e)
+ {
+ unsigned int i0 = indices[i + e];
+ unsigned int i1 = indices[i + next[e]];
+ unsigned int i2 = indices[i + next[e + 1]];
+ assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count);
+
+ unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
+ unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+ if (*entry == ~0ull)
+ {
+ *entry = edge;
+
+ // store vertex opposite to the edge
+ edge_vertex_table[entry - edge_table] = i2;
+ }
+ }
+ }
+
+ // build resulting index buffer: 6 indices for each input triangle
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int patch[6];
+
+ for (int e = 0; e < 3; ++e)
+ {
+ unsigned int i0 = indices[i + e];
+ unsigned int i1 = indices[i + next[e]];
+ assert(i0 < vertex_count && i1 < vertex_count);
+
+ // note: this refers to the opposite edge!
+ unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
+ unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+ patch[e * 2 + 0] = i0;
+ patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table];
+ }
+
+ memcpy(destination + i * 2, patch, sizeof(patch));
+ }
+}
+
+void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+ assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+ assert(vertex_positions_stride % sizeof(float) == 0);
+
+ meshopt_Allocator allocator;
+
+ static const int next[3] = {1, 2, 0};
+
+ // build position remap: for each vertex, which other (canonical) vertex does it map to?
+ unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+ buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator);
+
+ // build edge set; this stores all triangle edges but we can look these up by any other wedge
+ EdgeHasher edge_hasher = {remap};
+
+ size_t edge_table_size = hashBuckets(index_count);
+ unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size);
+ memset(edge_table, -1, edge_table_size * sizeof(unsigned long long));
+
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ for (int e = 0; e < 3; ++e)
+ {
+ unsigned int i0 = indices[i + e];
+ unsigned int i1 = indices[i + next[e]];
+ assert(i0 < vertex_count && i1 < vertex_count);
+
+ unsigned long long edge = ((unsigned long long)i0 << 32) | i1;
+ unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+ if (*entry == ~0ull)
+ *entry = edge;
+ }
+ }
+
+ // build resulting index buffer: 12 indices for each input triangle
+ for (size_t i = 0; i < index_count; i += 3)
+ {
+ unsigned int patch[12];
+
+ for (int e = 0; e < 3; ++e)
+ {
+ unsigned int i0 = indices[i + e];
+ unsigned int i1 = indices[i + next[e]];
+ assert(i0 < vertex_count && i1 < vertex_count);
+
+ // note: this refers to the opposite edge!
+ unsigned long long edge = ((unsigned long long)i1 << 32) | i0;
+ unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull);
+
+ // use the same edge if opposite edge doesn't exist (border)
+ oppe = (oppe == ~0ull) ? edge : oppe;
+
+ // triangle index (0, 1, 2)
+ patch[e] = i0;
+
+ // opposite edge (3, 4; 5, 6; 7, 8)
+ patch[3 + e * 2 + 0] = unsigned(oppe);
+ patch[3 + e * 2 + 1] = unsigned(oppe >> 32);
+
+ // dominant vertex (9, 10, 11)
+ patch[9 + e] = remap[i0];
+ }
+
+ memcpy(destination + i * 4, patch, sizeof(patch));
+ }
+}
diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h
index 1714000384..e44b99ce52 100644
--- a/thirdparty/meshoptimizer/meshoptimizer.h
+++ b/thirdparty/meshoptimizer/meshoptimizer.h
@@ -1,7 +1,7 @@
/**
- * meshoptimizer - version 0.15
+ * meshoptimizer - version 0.16
*
- * Copyright (C) 2016-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
+ * Copyright (C) 2016-2021, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at https://github.com/zeux/meshoptimizer
*
* This library is distributed under the MIT License. See notice at the end of this file.
@@ -12,7 +12,7 @@
#include <stddef.h>
/* Version macro; major * 1000 + minor * 10 + patch */
-#define MESHOPTIMIZER_VERSION 150 /* 0.15 */
+#define MESHOPTIMIZER_VERSION 160 /* 0.16 */
/* If no API is defined, assume default */
#ifndef MESHOPTIMIZER_API
@@ -98,6 +98,35 @@ MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destinati
MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
/**
+ * Generate index buffer that can be used as a geometry shader input with triangle adjacency topology
+ * Each triangle is converted into a 6-vertex patch with the following layout:
+ * - 0, 2, 4: original triangle vertices
+ * - 1, 3, 5: vertices adjacent to edges 02, 24 and 40
+ * The resulting patch can be rendered with geometry shaders using e.g. VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY.
+ * This can be used to implement algorithms like silhouette detection/expansion and other forms of GS-driven rendering.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count*2 elements)
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+
+/**
+ * Generate index buffer that can be used for PN-AEN tessellation with crack-free displacement
+ * Each triangle is converted into a 12-vertex patch with the following layout:
+ * - 0, 1, 2: original triangle vertices
+ * - 3, 4: opposing edge for edge 0, 1
+ * - 5, 6: opposing edge for edge 1, 2
+ * - 7, 8: opposing edge for edge 2, 0
+ * - 9, 10, 11: dominant vertices for corners 0, 1, 2
+ * The resulting patch can be rendered with hardware tessellation using PN-AEN and displacement mapping.
+ * See "Tessellation on Any Budget" (John McDonald, GDC 2011) for implementation details.
+ *
+ * destination must contain enough space for the resulting index buffer (index_count*4 elements)
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ */
+MESHOPTIMIZER_EXPERIMENTAL void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+
+/**
* Vertex transform cache optimizer
* Reorders indices to reduce the number of GPU vertex shader invocations
* If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually.
@@ -270,6 +299,11 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error);
/**
+ * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex)
+ */
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count);
+
+/**
* Experimental: Mesh simplifier (sloppy)
* Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance
* The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error.
@@ -373,22 +407,31 @@ MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetc
struct meshopt_Meshlet
{
- unsigned int vertices[64];
- unsigned char indices[126][3];
- unsigned char triangle_count;
- unsigned char vertex_count;
+ /* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */
+ unsigned int vertex_offset;
+ unsigned int triangle_offset;
+
+ /* number of vertices and triangles used in the meshlet; data is stored in consecutive range defined by offset and count */
+ unsigned int vertex_count;
+ unsigned int triangle_count;
};
/**
* Experimental: Meshlet builder
* Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer
* The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers.
- * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
+ * When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters.
+ * When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first.
*
- * destination must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
- * max_vertices and max_triangles can't exceed limits statically declared in meshopt_Meshlet (max_vertices <= 64, max_triangles <= 126)
+ * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound
+ * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices
+ * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3
+ * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
+ * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512)
+ * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency
*/
-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
+MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles);
struct meshopt_Bounds
@@ -426,10 +469,10 @@ struct meshopt_Bounds
* to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable.
*
* vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer
- * index_count should be less than or equal to 256*3 (the function assumes clusters of limited size)
+ * index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size)
*/
MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
-MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
/**
* Experimental: Spatial sorter
@@ -513,6 +556,10 @@ inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices,
template <typename T>
inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count);
template <typename T>
+inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+template <typename T>
+inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
+template <typename T>
inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count);
template <typename T>
inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count);
@@ -547,7 +594,9 @@ inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size
template <typename T>
inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size);
template <typename T>
-inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
+inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight);
+template <typename T>
+inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles);
template <typename T>
inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
template <typename T>
@@ -762,6 +811,24 @@ inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indi
}
template <typename T>
+inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count * 2);
+
+ meshopt_generateAdjacencyIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+}
+
+template <typename T>
+inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+ meshopt_IndexAdapter<T> out(destination, 0, index_count * 4);
+
+ meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride);
+}
+
+template <typename T>
inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count)
{
meshopt_IndexAdapter<T> in(0, indices, index_count);
@@ -908,11 +975,19 @@ inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices
}
template <typename T>
-inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
+inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight)
+{
+ meshopt_IndexAdapter<T> in(0, indices, index_count);
+
+ return meshopt_buildMeshlets(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, cone_weight);
+}
+
+template <typename T>
+inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles)
{
meshopt_IndexAdapter<T> in(0, indices, index_count);
- return meshopt_buildMeshlets(destination, in.data, index_count, vertex_count, max_vertices, max_triangles);
+ return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles);
}
template <typename T>
@@ -934,7 +1009,7 @@ inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_
#endif
/**
- * Copyright (c) 2016-2020 Arseny Kapoulkine
+ * Copyright (c) 2016-2021 Arseny Kapoulkine
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
diff --git a/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch
new file mode 100644
index 0000000000..cf648b0da3
--- /dev/null
+++ b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch
@@ -0,0 +1,262 @@
+diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h
+index fe8d349731..e44b99ce52 100644
+--- a/thirdparty/meshoptimizer/meshoptimizer.h
++++ b/thirdparty/meshoptimizer/meshoptimizer.h
+@@ -298,6 +298,11 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver
+ */
+ MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error);
+
++/**
++ * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex)
++ */
++MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count);
++
+ /**
+ * Experimental: Mesh simplifier (sloppy)
+ * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance
+diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
+index b2cb589462..059cabb055 100644
+--- a/thirdparty/meshoptimizer/simplifier.cpp
++++ b/thirdparty/meshoptimizer/simplifier.cpp
+@@ -20,6 +20,8 @@
+ #define TRACESTATS(i) (void)0
+ #endif
+
++#define ATTRIBUTES 8
++
+ // This work is based on:
+ // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
+ // Michael Garland. Quadric-based polygonal surface simplification. 1999
+@@ -358,6 +360,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
+ struct Vector3
+ {
+ float x, y, z;
++
++#if ATTRIBUTES
++ float a[ATTRIBUTES];
++#endif
+ };
+
+ static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
+@@ -414,6 +420,13 @@ struct Quadric
+ float a10, a20, a21;
+ float b0, b1, b2, c;
+ float w;
++
++#if ATTRIBUTES
++ float gx[ATTRIBUTES];
++ float gy[ATTRIBUTES];
++ float gz[ATTRIBUTES];
++ float gw[ATTRIBUTES];
++#endif
+ };
+
+ struct Collapse
+@@ -456,6 +469,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R)
+ Q.b2 += R.b2;
+ Q.c += R.c;
+ Q.w += R.w;
++
++#if ATTRIBUTES
++ for (int k = 0; k < ATTRIBUTES; ++k)
++ {
++ Q.gx[k] += R.gx[k];
++ Q.gy[k] += R.gy[k];
++ Q.gz[k] += R.gz[k];
++ Q.gw[k] += R.gw[k];
++ }
++#endif
+ }
+
+ static float quadricError(const Quadric& Q, const Vector3& v)
+@@ -481,6 +504,17 @@ static float quadricError(const Quadric& Q, const Vector3& v)
+ r += ry * v.y;
+ r += rz * v.z;
+
++#if ATTRIBUTES
++ // see quadricUpdateAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr
++ for (int k = 0; k < ATTRIBUTES; ++k)
++ {
++ float a = v.a[k];
++
++ r += a * a * Q.w;
++ r -= 2 * a * (v.x * Q.gx[k] + v.y * Q.gy[k] + v.z * Q.gz[k] + Q.gw[k]);
++ }
++#endif
++
+ float s = Q.w == 0.f ? 0.f : 1.f / Q.w;
+
+ return fabsf(r) * s;
+@@ -504,6 +538,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo
+ Q.b2 = c * dw;
+ Q.c = d * dw;
+ Q.w = w;
++
++#if ATTRIBUTES
++ memset(Q.gx, 0, sizeof(Q.gx));
++ memset(Q.gy, 0, sizeof(Q.gy));
++ memset(Q.gz, 0, sizeof(Q.gz));
++ memset(Q.gw, 0, sizeof(Q.gw));
++#endif
+ }
+
+ static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w)
+@@ -556,6 +597,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3
+ quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight);
+ }
+
++#if ATTRIBUTES
++static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float w)
++{
++ // for each attribute we want to encode the following function into the quadric:
++ // (eval(pos) - attr)^2
++ // where eval(pos) interpolates attribute across the triangle like so:
++ // eval(pos) = pos.x * gx + pos.y * gy + pos.z * gz + gw
++ // where gx/gy/gz/gw are gradients
++ Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
++ Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
++
++ // we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows:
++ // v = (d11 * d20 - d01 * d21) / denom
++ // w = (d00 * d21 - d01 * d20) / denom
++ // u = 1 - v - w
++ // here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj)
++ const Vector3& v0 = p10;
++ const Vector3& v1 = p20;
++ float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z;
++ float d01 = v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
++ float d11 = v1.x * v1.x + v1.y * v1.y + v1.z * v1.z;
++ float denom = d00 * d11 - d01 * d01;
++ float denomr = denom == 0 ? 0.f : 1.f / denom;
++
++ // precompute gradient factors
++ // these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out common factors that are shared between attributes
++ float gx1 = (d11 * v0.x - d01 * v1.x) * denomr;
++ float gx2 = (d00 * v1.x - d01 * v0.x) * denomr;
++ float gy1 = (d11 * v0.y - d01 * v1.y) * denomr;
++ float gy2 = (d00 * v1.y - d01 * v0.y) * denomr;
++ float gz1 = (d11 * v0.z - d01 * v1.z) * denomr;
++ float gz2 = (d00 * v1.z - d01 * v0.z) * denomr;
++
++ for (int k = 0; k < ATTRIBUTES; ++k)
++ {
++ float a0 = p0.a[k], a1 = p1.a[k], a2 = p2.a[k];
++
++ // compute gradient of eval(pos) for x/y/z/w
++ // the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w
++ float gx = gx1 * (a1 - a0) + gx2 * (a2 - a0);
++ float gy = gy1 * (a1 - a0) + gy2 * (a2 - a0);
++ float gz = gz1 * (a1 - a0) + gz2 * (a2 - a0);
++ float gw = a0 - p0.x * gx - p0.y * gy - p0.z * gz;
++
++ // quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K
++ // since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields
++ Q.a00 += w * (gx * gx);
++ Q.a11 += w * (gy * gy);
++ Q.a22 += w * (gz * gz);
++
++ Q.a10 += w * (gy * gx);
++ Q.a20 += w * (gz * gx);
++ Q.a21 += w * (gz * gy);
++
++ Q.b0 += w * (gx * gw);
++ Q.b1 += w * (gy * gw);
++ Q.b2 += w * (gz * gw);
++
++ Q.c += w * (gw * gw);
++
++ // the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError
++ Q.gx[k] = w * gx;
++ Q.gy[k] = w * gy;
++ Q.gz[k] = w * gz;
++ Q.gw[k] = w * gw;
++
++#if TRACE > 2
++ printf("attr%d: %e %e %e\n",
++ k,
++ (gx * p0.x + gy * p0.y + gz * p0.z + gw - a0),
++ (gx * p1.x + gy * p1.y + gz * p1.z + gw - a1),
++ (gx * p2.x + gy * p2.y + gz * p2.z + gw - a2)
++ );
++#endif
++ }
++}
++#endif
++
+ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
+ {
+ for (size_t i = 0; i < index_count; i += 3)
+@@ -567,6 +686,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
+ Quadric Q;
+ quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
+
++#if ATTRIBUTES
++ quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w);
++#endif
+ quadricAdd(vertex_quadrics[remap[i0]], Q);
+ quadricAdd(vertex_quadrics[remap[i1]], Q);
+ quadricAdd(vertex_quadrics[remap[i2]], Q);
+@@ -1259,13 +1381,19 @@ unsigned int* meshopt_simplifyDebugLoopBack = 0;
+ #endif
+
+ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error)
++{
++ return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, out_result_error, 0, 0, 0);
++}
++
++size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count)
+ {
+ using namespace meshopt;
+
+ assert(index_count % 3 == 0);
+- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
+- assert(vertex_positions_stride % sizeof(float) == 0);
++ assert(vertex_stride > 0 && vertex_stride <= 256);
++ assert(vertex_stride % sizeof(float) == 0);
+ assert(target_index_count <= index_count);
++ assert(attribute_count <= ATTRIBUTES);
+
+ meshopt_Allocator allocator;
+
+@@ -1279,7 +1407,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+ // build position remap that maps each vertex to the one with identical position
+ unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
+ unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count);
+- buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator);
++ buildPositionRemap(remap, wedge, vertex_data, vertex_count, vertex_stride, allocator);
+
+ // classify vertices; vertex kind determines collapse rules, see kCanCollapse
+ unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count);
+@@ -1303,7 +1431,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+ #endif
+
+ Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
+- rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
++ rescalePositions(vertex_positions, vertex_data, vertex_count, vertex_stride);
++
++#if ATTRIBUTES
++ for (size_t i = 0; i < vertex_count; ++i)
++ {
++ memset(vertex_positions[i].a, 0, sizeof(vertex_positions[i].a));
++
++ for (size_t k = 0; k < attribute_count; ++k)
++ {
++ float a = attributes[i * attribute_count + k];
++
++ vertex_positions[i].a[k] = a * attribute_weights[k];
++ }
++ }
++#endif
+
+ Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
+ memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
+@@ -1395,7 +1537,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
+
+ // result_error is quadratic; we need to remap it back to linear
+ if (out_result_error)
++ {
+ *out_result_error = sqrtf(result_error);
++ }
+
+ return result_count;
+ }
diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp
index 942db14461..0f10ebef4b 100644
--- a/thirdparty/meshoptimizer/simplifier.cpp
+++ b/thirdparty/meshoptimizer/simplifier.cpp
@@ -20,6 +20,8 @@
#define TRACESTATS(i) (void)0
#endif
+#define ATTRIBUTES 8
+
// This work is based on:
// Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997
// Michael Garland. Quadric-based polygonal surface simplification. 1999
@@ -118,8 +120,13 @@ struct PositionHasher
{
const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + index * vertex_stride_float);
+ // scramble bits to make sure that integer coordinates have entropy in lower bits
+ unsigned int x = key[0] ^ (key[0] >> 17);
+ unsigned int y = key[1] ^ (key[1] >> 17);
+ unsigned int z = key[2] ^ (key[2] >> 17);
+
// Optimized Spatial Hashing for Collision Detection of Deformable Objects
- return (key[0] * 73856093) ^ (key[1] * 19349663) ^ (key[2] * 83492791);
+ return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791);
}
bool equal(unsigned int lhs, unsigned int rhs) const
@@ -131,7 +138,7 @@ struct PositionHasher
static size_t hashBuckets2(size_t count)
{
size_t buckets = 1;
- while (buckets < count)
+ while (buckets < count + count / 4)
buckets *= 2;
return buckets;
@@ -358,6 +365,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
struct Vector3
{
float x, y, z;
+
+#if ATTRIBUTES
+ float a[ATTRIBUTES];
+#endif
};
static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride)
@@ -414,6 +425,13 @@ struct Quadric
float a10, a20, a21;
float b0, b1, b2, c;
float w;
+
+#if ATTRIBUTES
+ float gx[ATTRIBUTES];
+ float gy[ATTRIBUTES];
+ float gz[ATTRIBUTES];
+ float gw[ATTRIBUTES];
+#endif
};
struct Collapse
@@ -456,6 +474,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R)
Q.b2 += R.b2;
Q.c += R.c;
Q.w += R.w;
+
+#if ATTRIBUTES
+ for (int k = 0; k < ATTRIBUTES; ++k)
+ {
+ Q.gx[k] += R.gx[k];
+ Q.gy[k] += R.gy[k];
+ Q.gz[k] += R.gz[k];
+ Q.gw[k] += R.gw[k];
+ }
+#endif
}
static float quadricError(const Quadric& Q, const Vector3& v)
@@ -481,6 +509,17 @@ static float quadricError(const Quadric& Q, const Vector3& v)
r += ry * v.y;
r += rz * v.z;
+#if ATTRIBUTES
+ // see quadricUpdateAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr
+ for (int k = 0; k < ATTRIBUTES; ++k)
+ {
+ float a = v.a[k];
+
+ r += a * a * Q.w;
+ r -= 2 * a * (v.x * Q.gx[k] + v.y * Q.gy[k] + v.z * Q.gz[k] + Q.gw[k]);
+ }
+#endif
+
float s = Q.w == 0.f ? 0.f : 1.f / Q.w;
return fabsf(r) * s;
@@ -504,6 +543,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo
Q.b2 = c * dw;
Q.c = d * dw;
Q.w = w;
+
+#if ATTRIBUTES
+ memset(Q.gx, 0, sizeof(Q.gx));
+ memset(Q.gy, 0, sizeof(Q.gy));
+ memset(Q.gz, 0, sizeof(Q.gz));
+ memset(Q.gw, 0, sizeof(Q.gw));
+#endif
}
static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w)
@@ -556,6 +602,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3
quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight);
}
+#if ATTRIBUTES
+static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float w)
+{
+ // for each attribute we want to encode the following function into the quadric:
+ // (eval(pos) - attr)^2
+ // where eval(pos) interpolates attribute across the triangle like so:
+ // eval(pos) = pos.x * gx + pos.y * gy + pos.z * gz + gw
+ // where gx/gy/gz/gw are gradients
+ Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
+ Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
+
+ // we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows:
+ // v = (d11 * d20 - d01 * d21) / denom
+ // w = (d00 * d21 - d01 * d20) / denom
+ // u = 1 - v - w
+ // here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj)
+ const Vector3& v0 = p10;
+ const Vector3& v1 = p20;
+ float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z;
+ float d01 = v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
+ float d11 = v1.x * v1.x + v1.y * v1.y + v1.z * v1.z;
+ float denom = d00 * d11 - d01 * d01;
+ float denomr = denom == 0 ? 0.f : 1.f / denom;
+
+ // precompute gradient factors
+ // these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out common factors that are shared between attributes
+ float gx1 = (d11 * v0.x - d01 * v1.x) * denomr;
+ float gx2 = (d00 * v1.x - d01 * v0.x) * denomr;
+ float gy1 = (d11 * v0.y - d01 * v1.y) * denomr;
+ float gy2 = (d00 * v1.y - d01 * v0.y) * denomr;
+ float gz1 = (d11 * v0.z - d01 * v1.z) * denomr;
+ float gz2 = (d00 * v1.z - d01 * v0.z) * denomr;
+
+ for (int k = 0; k < ATTRIBUTES; ++k)
+ {
+ float a0 = p0.a[k], a1 = p1.a[k], a2 = p2.a[k];
+
+ // compute gradient of eval(pos) for x/y/z/w
+ // the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w
+ float gx = gx1 * (a1 - a0) + gx2 * (a2 - a0);
+ float gy = gy1 * (a1 - a0) + gy2 * (a2 - a0);
+ float gz = gz1 * (a1 - a0) + gz2 * (a2 - a0);
+ float gw = a0 - p0.x * gx - p0.y * gy - p0.z * gz;
+
+ // quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K
+ // since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields
+ Q.a00 += w * (gx * gx);
+ Q.a11 += w * (gy * gy);
+ Q.a22 += w * (gz * gz);
+
+ Q.a10 += w * (gy * gx);
+ Q.a20 += w * (gz * gx);
+ Q.a21 += w * (gz * gy);
+
+ Q.b0 += w * (gx * gw);
+ Q.b1 += w * (gy * gw);
+ Q.b2 += w * (gz * gw);
+
+ Q.c += w * (gw * gw);
+
+ // the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError
+ Q.gx[k] = w * gx;
+ Q.gy[k] = w * gy;
+ Q.gz[k] = w * gz;
+ Q.gw[k] = w * gw;
+
+#if TRACE > 2
+ printf("attr%d: %e %e %e\n",
+ k,
+ (gx * p0.x + gy * p0.y + gz * p0.z + gw - a0),
+ (gx * p1.x + gy * p1.y + gz * p1.z + gw - a1),
+ (gx * p2.x + gy * p2.y + gz * p2.z + gw - a2)
+ );
+#endif
+ }
+}
+#endif
+
static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
{
for (size_t i = 0; i < index_count; i += 3)
@@ -567,6 +691,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
Quadric Q;
quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f);
+#if ATTRIBUTES
+ quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w);
+#endif
quadricAdd(vertex_quadrics[remap[i0]], Q);
quadricAdd(vertex_quadrics[remap[i1]], Q);
quadricAdd(vertex_quadrics[remap[i2]], Q);
@@ -1038,7 +1165,7 @@ struct IdHasher
struct TriangleHasher
{
- unsigned int* indices;
+ const unsigned int* indices;
size_t hash(unsigned int i) const
{
@@ -1253,19 +1380,26 @@ static float interpolate(float y, float x0, float y0, float x1, float y1, float
} // namespace meshopt
#ifndef NDEBUG
-unsigned char* meshopt_simplifyDebugKind = 0;
-unsigned int* meshopt_simplifyDebugLoop = 0;
-unsigned int* meshopt_simplifyDebugLoopBack = 0;
+// Note: this is only exposed for debug visualization purposes; do *not* use these in debug builds
+MESHOPTIMIZER_API unsigned char* meshopt_simplifyDebugKind = 0;
+MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = 0;
+MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0;
#endif
size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error)
{
+ return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, out_result_error, 0, 0, 0);
+}
+
+size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count)
+{
using namespace meshopt;
assert(index_count % 3 == 0);
- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256);
- assert(vertex_positions_stride % sizeof(float) == 0);
+ assert(vertex_stride > 0 && vertex_stride <= 256);
+ assert(vertex_stride % sizeof(float) == 0);
assert(target_index_count <= index_count);
+ assert(attribute_count <= ATTRIBUTES);
meshopt_Allocator allocator;
@@ -1279,7 +1413,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
// build position remap that maps each vertex to the one with identical position
unsigned int* remap = allocator.allocate<unsigned int>(vertex_count);
unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count);
- buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator);
+ buildPositionRemap(remap, wedge, vertex_data, vertex_count, vertex_stride, allocator);
// classify vertices; vertex kind determines collapse rules, see kCanCollapse
unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count);
@@ -1303,7 +1437,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
#endif
Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
- rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride);
+ rescalePositions(vertex_positions, vertex_data, vertex_count, vertex_stride);
+
+#if ATTRIBUTES
+ for (size_t i = 0; i < vertex_count; ++i)
+ {
+ memset(vertex_positions[i].a, 0, sizeof(vertex_positions[i].a));
+
+ for (size_t k = 0; k < attribute_count; ++k)
+ {
+ float a = attributes[i * attribute_count + k];
+
+ vertex_positions[i].a[k] = a * attribute_weights[k];
+ }
+ }
+#endif
Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count);
memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric));
@@ -1395,7 +1543,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices,
// result_error is quadratic; we need to remap it back to linear
if (out_result_error)
+ {
*out_result_error = sqrtf(result_error);
+ }
return result_count;
}
diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp
index 2cbfaac367..5f3ec204ab 100644
--- a/thirdparty/meshoptimizer/vertexcodec.cpp
+++ b/thirdparty/meshoptimizer/vertexcodec.cpp
@@ -710,18 +710,12 @@ static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1)
SIMD_TARGET
static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1)
{
- v128_t mask_0 = wasm_v32x4_shuffle(mask, mask, 0, 2, 1, 3);
-
- uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull;
- uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull;
+ // magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00
+ const uint64_t magic = 0x000103070f1f3f80ull;
// TODO: This can use v8x16_bitmask in the future
- uint64_t mask_2 = mask_1a | mask_1b;
- uint64_t mask_4 = mask_2 | (mask_2 >> 16);
- uint64_t mask_8 = mask_4 | (mask_4 >> 8);
-
- mask0 = uint8_t(mask_8);
- mask1 = uint8_t(mask_8 >> 32);
+ mask0 = uint8_t((wasm_i64x2_extract_lane(mask, 0) * magic) >> 56);
+ mask1 = uint8_t((wasm_i64x2_extract_lane(mask, 1) * magic) >> 56);
}
SIMD_TARGET