diff options
Diffstat (limited to 'thirdparty/meshoptimizer')
-rw-r--r-- | thirdparty/meshoptimizer/LICENSE.md | 2 | ||||
-rw-r--r-- | thirdparty/meshoptimizer/clusterizer.cpp | 616 | ||||
-rw-r--r-- | thirdparty/meshoptimizer/indexgenerator.cpp | 206 | ||||
-rw-r--r-- | thirdparty/meshoptimizer/meshoptimizer.h | 109 | ||||
-rw-r--r-- | thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch | 262 | ||||
-rw-r--r-- | thirdparty/meshoptimizer/simplifier.cpp | 170 | ||||
-rw-r--r-- | thirdparty/meshoptimizer/vertexcodec.cpp | 14 |
7 files changed, 1285 insertions, 94 deletions
diff --git a/thirdparty/meshoptimizer/LICENSE.md b/thirdparty/meshoptimizer/LICENSE.md index 4fcd766d22..3c52415f62 100644 --- a/thirdparty/meshoptimizer/LICENSE.md +++ b/thirdparty/meshoptimizer/LICENSE.md @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2016-2020 Arseny Kapoulkine +Copyright (c) 2016-2021 Arseny Kapoulkine Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thirdparty/meshoptimizer/clusterizer.cpp b/thirdparty/meshoptimizer/clusterizer.cpp index f7d88c5136..f8aad7b49c 100644 --- a/thirdparty/meshoptimizer/clusterizer.cpp +++ b/thirdparty/meshoptimizer/clusterizer.cpp @@ -2,6 +2,7 @@ #include "meshoptimizer.h" #include <assert.h> +#include <float.h> #include <math.h> #include <string.h> @@ -12,6 +13,68 @@ namespace meshopt { +// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet +const size_t kMeshletMaxVertices = 255; + +// A reasonable limit is around 2*max_vertices or less +const size_t kMeshletMaxTriangles = 512; + +struct TriangleAdjacency2 +{ + unsigned int* counts; + unsigned int* offsets; + unsigned int* data; +}; + +static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +{ + size_t face_count = index_count / 3; + + // allocate arrays + adjacency.counts = allocator.allocate<unsigned int>(vertex_count); + adjacency.offsets = allocator.allocate<unsigned int>(vertex_count); + adjacency.data = allocator.allocate<unsigned int>(index_count); + + // fill triangle counts + memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + assert(indices[i] < vertex_count); + + adjacency.counts[indices[i]]++; + } + + // fill offset table + unsigned int offset = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + adjacency.offsets[i] = offset; + offset += adjacency.counts[i]; + } + + assert(offset == index_count); + + // fill triangle data + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + + adjacency.data[adjacency.offsets[a]++] = unsigned(i); + adjacency.data[adjacency.offsets[b]++] = unsigned(i); + adjacency.data[adjacency.offsets[c]++] = unsigned(i); + } + + // fix offsets that have been disturbed by the previous pass + for (size_t i = 0; i < vertex_count; ++i) + { + assert(adjacency.offsets[i] >= adjacency.counts[i]); + + adjacency.offsets[i] -= adjacency.counts[i]; + } +} + static void computeBoundingSphere(float result[4], const float points[][3], size_t count) { assert(count > 0); @@ -82,13 +145,310 @@ static void computeBoundingSphere(float result[4], const float points[][3], size result[3] = radius; } +struct Cone +{ + float px, py, pz; + float nx, ny, nz; +}; + +static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius) +{ + float cone = 1.f - spread * cone_weight; + float cone_clamped = cone < 1e-3f ? 1e-3f : cone; + + return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped; +} + +static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count) +{ + Cone result = acc; + + float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count); + + result.px *= center_scale; + result.py *= center_scale; + result.pz *= center_scale; + + float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz; + float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length); + + result.nx *= axis_scale; + result.ny *= axis_scale; + result.nz *= axis_scale; + + return result; +} + +static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + (void)vertex_count; + + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + size_t face_count = index_count / 3; + + float mesh_area = 0; + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + const float* p0 = vertex_positions + vertex_stride_float * a; + const float* p1 = vertex_positions + vertex_stride_float * b; + const float* p2 = vertex_positions + vertex_stride_float * c; + + float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; + float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; + + float normalx = p10[1] * p20[2] - p10[2] * p20[1]; + float normaly = p10[2] * p20[0] - p10[0] * p20[2]; + float normalz = p10[0] * p20[1] - p10[1] * p20[0]; + + float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); + float invarea = (area == 0.f) ? 0.f : 1.f / area; + + triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f; + triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f; + triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f; + + triangles[i].nx = normalx * invarea; + triangles[i].ny = normaly * invarea; + triangles[i].nz = normalz * invarea; + + mesh_area += area; + } + + return mesh_area; +} + +static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles) +{ + size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3; + + // fill 4b padding with 0 + while (offset & 3) + meshlet_triangles[offset++] = 0; +} + +static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles) +{ + unsigned char& av = used[a]; + unsigned char& bv = used[b]; + unsigned char& cv = used[c]; + + bool result = false; + + unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff); + + if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles) + { + meshlets[meshlet_offset] = meshlet; + + for (size_t j = 0; j < meshlet.vertex_count; ++j) + used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff; + + finishMeshlet(meshlet, meshlet_triangles); + + meshlet.vertex_offset += meshlet.vertex_count; + meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding + meshlet.vertex_count = 0; + meshlet.triangle_count = 0; + + result = true; + } + + if (av == 0xff) + { + av = (unsigned char)meshlet.vertex_count; + meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a; + } + + if (bv == 0xff) + { + bv = (unsigned char)meshlet.vertex_count; + meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b; + } + + if (cv == 0xff) + { + cv = (unsigned char)meshlet.vertex_count; + meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c; + } + + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av; + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv; + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv; + meshlet.triangle_count++; + + return result; +} + +struct KDNode +{ + union + { + float split; + unsigned int index; + }; + + // leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point) + // branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children + unsigned int axis : 2; + unsigned int children : 30; +}; + +static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot) +{ + size_t m = 0; + + // invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot + for (size_t i = 0; i < count; ++i) + { + float v = points[indices[i] * stride + axis]; + + // swap(m, i) unconditionally + unsigned int t = indices[m]; + indices[m] = indices[i]; + indices[i] = t; + + // when v >= pivot, we swap i with m without advancing it, preserving invariants + m += v < pivot; + } + + return m; +} + +static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count) +{ + assert(offset + count <= node_count); + (void)node_count; + + KDNode& result = nodes[offset]; + + result.index = indices[0]; + result.axis = 3; + result.children = unsigned(count - 1); + + // all remaining points are stored in nodes immediately following the leaf + for (size_t i = 1; i < count; ++i) + { + KDNode& tail = nodes[offset + i]; + + tail.index = indices[i]; + tail.axis = 3; + tail.children = ~0u >> 2; // bogus value to prevent misuse + } + + return offset + count; +} + +static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size) +{ + assert(count > 0); + assert(offset < node_count); + + if (count <= leaf_size) + return kdtreeBuildLeaf(offset, nodes, node_count, indices, count); + + float mean[3] = {}; + float vars[3] = {}; + float runc = 1, runs = 1; + + // gather statistics on the points in the subtree using Welford's algorithm + for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc) + { + const float* point = points + indices[i] * stride; + + for (int k = 0; k < 3; ++k) + { + float delta = point[k] - mean[k]; + mean[k] += delta * runs; + vars[k] += delta * (point[k] - mean[k]); + } + } + + // split axis is one where the variance is largest + unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 + : 2; + + float split = mean[axis]; + size_t middle = kdtreePartition(indices, count, points, stride, axis, split); + + // when the partition is degenerate simply consolidate the points into a single node + if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2) + return kdtreeBuildLeaf(offset, nodes, node_count, indices, count); + + KDNode& result = nodes[offset]; + + result.split = split; + result.axis = axis; + + // left subtree is right after our node + size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size); + + // distance to the right subtree is represented explicitly + result.children = unsigned(next_offset - offset - 1); + + return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size); +} + +static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit) +{ + const KDNode& node = nodes[root]; + + if (node.axis == 3) + { + // leaf + for (unsigned int i = 0; i <= node.children; ++i) + { + unsigned int index = nodes[root + i].index; + + if (emitted_flags[index]) + continue; + + const float* point = points + index * stride; + + float distance2 = + (point[0] - position[0]) * (point[0] - position[0]) + + (point[1] - position[1]) * (point[1] - position[1]) + + (point[2] - position[2]) * (point[2] - position[2]); + float distance = sqrtf(distance2); + + if (distance < limit) + { + result = index; + limit = distance; + } + } + } + else + { + // branch; we order recursion to process the node that search position is in first + float delta = position[node.axis] - node.split; + unsigned int first = (delta <= 0) ? 0 : node.children; + unsigned int second = first ^ node.children; + + kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit); + + // only process the other node if it can have a match based on closest distance so far + if (fabsf(delta) <= limit) + kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit); + } +} + } // namespace meshopt size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles) { + using namespace meshopt; + assert(index_count % 3 == 0); - assert(max_vertices >= 3); - assert(max_triangles >= 1); + assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); + assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); + assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned + + (void)kMeshletMaxVertices; + (void)kMeshletMaxTriangles; // meshlet construction is limited by max vertices and max triangles per meshlet // the worst case is that the input is an unindexed stream since this equally stresses both limits @@ -100,77 +460,226 @@ size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_ return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles; } -size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) { + using namespace meshopt; + assert(index_count % 3 == 0); - assert(max_vertices >= 3); - assert(max_triangles >= 1); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); + assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); + assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned meshopt_Allocator allocator; - meshopt_Meshlet meshlet; - memset(&meshlet, 0, sizeof(meshlet)); + TriangleAdjacency2 adjacency = {}; + buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); + + unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); + memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); + + size_t face_count = index_count / 3; + + unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); + memset(emitted_flags, 0, face_count); + + // for each triangle, precompute centroid & normal to use for scoring + Cone* triangles = allocator.allocate<Cone>(face_count); + float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride); + + // assuming each meshlet is a square patch, expected radius is sqrt(expected area) + float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f; + float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f; + + // build a kd-tree for nearest neighbor lookup + unsigned int* kdindices = allocator.allocate<unsigned int>(face_count); + for (size_t i = 0; i < face_count; ++i) + kdindices[i] = unsigned(i); - assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0])); - assert(max_triangles <= sizeof(meshlet.indices) / 3); + KDNode* nodes = allocator.allocate<KDNode>(face_count * 2); + kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8); // index of the vertex in the meshlet, 0xff if the vertex isn't used unsigned char* used = allocator.allocate<unsigned char>(vertex_count); memset(used, -1, vertex_count); - size_t offset = 0; + meshopt_Meshlet meshlet = {}; + size_t meshlet_offset = 0; - for (size_t i = 0; i < index_count; i += 3) - { - unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; - assert(a < vertex_count && b < vertex_count && c < vertex_count); + Cone meshlet_cone_acc = {}; - unsigned char& av = used[a]; - unsigned char& bv = used[b]; - unsigned char& cv = used[c]; + for (;;) + { + unsigned int best_triangle = ~0u; + unsigned int best_extra = 5; + float best_score = FLT_MAX; - unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff); + Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count); - if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles) + for (size_t i = 0; i < meshlet.vertex_count; ++i) { - destination[offset++] = meshlet; + unsigned int index = meshlet_vertices[meshlet.vertex_offset + i]; + + unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbours_size = adjacency.counts[index]; + + for (size_t j = 0; j < neighbours_size; ++j) + { + unsigned int triangle = neighbours[j]; + assert(!emitted_flags[triangle]); + + unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff); + + // triangles that don't add new vertices to meshlets are max. priority + if (extra != 0) + { + // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets + if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1) + extra = 0; + + extra++; + } + + // since topology-based priority is always more important than the score, we can skip scoring in some cases + if (extra > best_extra) + continue; + + const Cone& tri_cone = triangles[triangle]; + + float distance2 = + (tri_cone.px - meshlet_cone.px) * (tri_cone.px - meshlet_cone.px) + + (tri_cone.py - meshlet_cone.py) * (tri_cone.py - meshlet_cone.py) + + (tri_cone.pz - meshlet_cone.pz) * (tri_cone.pz - meshlet_cone.pz); - for (size_t j = 0; j < meshlet.vertex_count; ++j) - used[meshlet.vertices[j]] = 0xff; + float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz; - memset(&meshlet, 0, sizeof(meshlet)); + float score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius); + + // note that topology-based priority is always more important than the score + // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost + if (extra < best_extra || score < best_score) + { + best_triangle = triangle; + best_extra = extra; + best_score = score; + } + } } - if (av == 0xff) + if (best_triangle == ~0u) { - av = meshlet.vertex_count; - meshlet.vertices[meshlet.vertex_count++] = a; + float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz}; + unsigned int index = ~0u; + float limit = FLT_MAX; + + kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit); + + best_triangle = index; } - if (bv == 0xff) + if (best_triangle == ~0u) + break; + + unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + // add meshlet to the output; when the current meshlet is full we reset the accumulated bounds + if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles)) { - bv = meshlet.vertex_count; - meshlet.vertices[meshlet.vertex_count++] = b; + meshlet_offset++; + memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc)); } - if (cv == 0xff) + live_triangles[a]--; + live_triangles[b]--; + live_triangles[c]--; + + // remove emitted triangle from adjacency data + // this makes sure that we spend less time traversing these lists on subsequent iterations + for (size_t k = 0; k < 3; ++k) { - cv = meshlet.vertex_count; - meshlet.vertices[meshlet.vertex_count++] = c; + unsigned int index = indices[best_triangle * 3 + k]; + + unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbours_size = adjacency.counts[index]; + + for (size_t i = 0; i < neighbours_size; ++i) + { + unsigned int tri = neighbours[i]; + + if (tri == best_triangle) + { + neighbours[i] = neighbours[neighbours_size - 1]; + adjacency.counts[index]--; + break; + } + } } - meshlet.indices[meshlet.triangle_count][0] = av; - meshlet.indices[meshlet.triangle_count][1] = bv; - meshlet.indices[meshlet.triangle_count][2] = cv; - meshlet.triangle_count++; + // update aggregated meshlet cone data for scoring subsequent triangles + meshlet_cone_acc.px += triangles[best_triangle].px; + meshlet_cone_acc.py += triangles[best_triangle].py; + meshlet_cone_acc.pz += triangles[best_triangle].pz; + meshlet_cone_acc.nx += triangles[best_triangle].nx; + meshlet_cone_acc.ny += triangles[best_triangle].ny; + meshlet_cone_acc.nz += triangles[best_triangle].nz; + + emitted_flags[best_triangle] = 1; + } + + if (meshlet.triangle_count) + { + finishMeshlet(meshlet, meshlet_triangles); + + meshlets[meshlet_offset++] = meshlet; + } + + assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + return meshlet_offset; +} + +size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + + assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); + assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); + assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned + + meshopt_Allocator allocator; + + // index of the vertex in the meshlet, 0xff if the vertex isn't used + unsigned char* used = allocator.allocate<unsigned char>(vertex_count); + memset(used, -1, vertex_count); + + meshopt_Meshlet meshlet = {}; + size_t meshlet_offset = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + // appends triangle to the meshlet and writes previous meshlet to the output if full + meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles); } if (meshlet.triangle_count) - destination[offset++] = meshlet; + { + finishMeshlet(meshlet, meshlet_triangles); - assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + meshlets[meshlet_offset++] = meshlet; + } - return offset; + assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + return meshlet_offset; } meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) @@ -178,18 +687,17 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t using namespace meshopt; assert(index_count % 3 == 0); + assert(index_count / 3 <= kMeshletMaxTriangles); assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); - assert(index_count / 3 <= 256); - (void)vertex_count; size_t vertex_stride_float = vertex_positions_stride / sizeof(float); // compute triangle normals and gather triangle corners - float normals[256][3]; - float corners[256][3][3]; + float normals[kMeshletMaxTriangles][3]; + float corners[kMeshletMaxTriangles][3][3]; size_t triangles = 0; for (size_t i = 0; i < index_count; i += 3) @@ -327,25 +835,23 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t return bounds; } -meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { + using namespace meshopt; + + assert(triangle_count <= kMeshletMaxTriangles); assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); - unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])]; + unsigned int indices[kMeshletMaxTriangles * 3]; - for (size_t i = 0; i < meshlet->triangle_count; ++i) + for (size_t i = 0; i < triangle_count * 3; ++i) { - unsigned int a = meshlet->vertices[meshlet->indices[i][0]]; - unsigned int b = meshlet->vertices[meshlet->indices[i][1]]; - unsigned int c = meshlet->vertices[meshlet->indices[i][2]]; - - assert(a < vertex_count && b < vertex_count && c < vertex_count); + unsigned int index = meshlet_vertices[meshlet_triangles[i]]; + assert(index < vertex_count); - indices[i * 3 + 0] = a; - indices[i * 3 + 1] = b; - indices[i * 3 + 2] = c; + indices[i] = index; } - return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); + return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); } diff --git a/thirdparty/meshoptimizer/indexgenerator.cpp b/thirdparty/meshoptimizer/indexgenerator.cpp index aa4a30efa4..f60db0dc4f 100644 --- a/thirdparty/meshoptimizer/indexgenerator.cpp +++ b/thirdparty/meshoptimizer/indexgenerator.cpp @@ -4,6 +4,8 @@ #include <assert.h> #include <string.h> +// This work is based on: +// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010 namespace meshopt { @@ -83,10 +85,49 @@ struct VertexStreamHasher } }; +struct EdgeHasher +{ + const unsigned int* remap; + + size_t hash(unsigned long long edge) const + { + unsigned int e0 = unsigned(edge >> 32); + unsigned int e1 = unsigned(edge); + + unsigned int h1 = remap[e0]; + unsigned int h2 = remap[e1]; + + const unsigned int m = 0x5bd1e995; + + // MurmurHash64B finalizer + h1 ^= h2 >> 18; + h1 *= m; + h2 ^= h1 >> 22; + h2 *= m; + h1 ^= h2 >> 17; + h1 *= m; + h2 ^= h1 >> 19; + h2 *= m; + + return h2; + } + + bool equal(unsigned long long lhs, unsigned long long rhs) const + { + unsigned int l0 = unsigned(lhs >> 32); + unsigned int l1 = unsigned(lhs); + + unsigned int r0 = unsigned(rhs >> 32); + unsigned int r1 = unsigned(rhs); + + return remap[l0] == remap[r0] && remap[l1] == remap[r1]; + } +}; + static size_t hashBuckets(size_t count) { size_t buckets = 1; - while (buckets < count) + while (buckets < count + count / 4) buckets *= 2; return buckets; @@ -119,6 +160,26 @@ static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, c return 0; } +static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator) +{ + VertexHasher vertex_hasher = {reinterpret_cast<const unsigned char*>(vertex_positions), 3 * sizeof(float), vertex_positions_stride}; + + size_t vertex_table_size = hashBuckets(vertex_count); + unsigned int* vertex_table = allocator.allocate<unsigned int>(vertex_table_size); + memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int)); + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int index = unsigned(i); + unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u); + + if (*entry == ~0u) + *entry = index; + + remap[index] = *entry; + } +} + } // namespace meshopt size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) @@ -345,3 +406,146 @@ void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const uns destination[i] = remap[index]; } } + +void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + static const int next[4] = {1, 2, 0, 1}; + + // build position remap: for each vertex, which other (canonical) vertex does it map to? + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator); + + // build edge set; this stores all triangle edges but we can look these up by any other wedge + EdgeHasher edge_hasher = {remap}; + + size_t edge_table_size = hashBuckets(index_count); + unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size); + unsigned int* edge_vertex_table = allocator.allocate<unsigned int>(edge_table_size); + + memset(edge_table, -1, edge_table_size * sizeof(unsigned long long)); + memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; i += 3) + { + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + unsigned int i2 = indices[i + next[e + 1]]; + assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count); + + unsigned long long edge = ((unsigned long long)i0 << 32) | i1; + unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); + + if (*entry == ~0ull) + { + *entry = edge; + + // store vertex opposite to the edge + edge_vertex_table[entry - edge_table] = i2; + } + } + } + + // build resulting index buffer: 6 indices for each input triangle + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int patch[6]; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + assert(i0 < vertex_count && i1 < vertex_count); + + // note: this refers to the opposite edge! + unsigned long long edge = ((unsigned long long)i1 << 32) | i0; + unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); + + patch[e * 2 + 0] = i0; + patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table]; + } + + memcpy(destination + i * 2, patch, sizeof(patch)); + } +} + +void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + static const int next[3] = {1, 2, 0}; + + // build position remap: for each vertex, which other (canonical) vertex does it map to? + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator); + + // build edge set; this stores all triangle edges but we can look these up by any other wedge + EdgeHasher edge_hasher = {remap}; + + size_t edge_table_size = hashBuckets(index_count); + unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size); + memset(edge_table, -1, edge_table_size * sizeof(unsigned long long)); + + for (size_t i = 0; i < index_count; i += 3) + { + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + assert(i0 < vertex_count && i1 < vertex_count); + + unsigned long long edge = ((unsigned long long)i0 << 32) | i1; + unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); + + if (*entry == ~0ull) + *entry = edge; + } + } + + // build resulting index buffer: 12 indices for each input triangle + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int patch[12]; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + assert(i0 < vertex_count && i1 < vertex_count); + + // note: this refers to the opposite edge! + unsigned long long edge = ((unsigned long long)i1 << 32) | i0; + unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); + + // use the same edge if opposite edge doesn't exist (border) + oppe = (oppe == ~0ull) ? edge : oppe; + + // triangle index (0, 1, 2) + patch[e] = i0; + + // opposite edge (3, 4; 5, 6; 7, 8) + patch[3 + e * 2 + 0] = unsigned(oppe); + patch[3 + e * 2 + 1] = unsigned(oppe >> 32); + + // dominant vertex (9, 10, 11) + patch[9 + e] = remap[i0]; + } + + memcpy(destination + i * 4, patch, sizeof(patch)); + } +} diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h index 1714000384..e44b99ce52 100644 --- a/thirdparty/meshoptimizer/meshoptimizer.h +++ b/thirdparty/meshoptimizer/meshoptimizer.h @@ -1,7 +1,7 @@ /** - * meshoptimizer - version 0.15 + * meshoptimizer - version 0.16 * - * Copyright (C) 2016-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2016-2021, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://github.com/zeux/meshoptimizer * * This library is distributed under the MIT License. See notice at the end of this file. @@ -12,7 +12,7 @@ #include <stddef.h> /* Version macro; major * 1000 + minor * 10 + patch */ -#define MESHOPTIMIZER_VERSION 150 /* 0.15 */ +#define MESHOPTIMIZER_VERSION 160 /* 0.16 */ /* If no API is defined, assume default */ #ifndef MESHOPTIMIZER_API @@ -98,6 +98,35 @@ MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destinati MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); /** + * Generate index buffer that can be used as a geometry shader input with triangle adjacency topology + * Each triangle is converted into a 6-vertex patch with the following layout: + * - 0, 2, 4: original triangle vertices + * - 1, 3, 5: vertices adjacent to edges 02, 24 and 40 + * The resulting patch can be rendered with geometry shaders using e.g. VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY. + * This can be used to implement algorithms like silhouette detection/expansion and other forms of GS-driven rendering. + * + * destination must contain enough space for the resulting index buffer (index_count*2 elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Generate index buffer that can be used for PN-AEN tessellation with crack-free displacement + * Each triangle is converted into a 12-vertex patch with the following layout: + * - 0, 1, 2: original triangle vertices + * - 3, 4: opposing edge for edge 0, 1 + * - 5, 6: opposing edge for edge 1, 2 + * - 7, 8: opposing edge for edge 2, 0 + * - 9, 10, 11: dominant vertices for corners 0, 1, 2 + * The resulting patch can be rendered with hardware tessellation using PN-AEN and displacement mapping. + * See "Tessellation on Any Budget" (John McDonald, GDC 2011) for implementation details. + * + * destination must contain enough space for the resulting index buffer (index_count*4 elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** * Vertex transform cache optimizer * Reorders indices to reduce the number of GPU vertex shader invocations * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. @@ -270,6 +299,11 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); /** + * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex) + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count); + +/** * Experimental: Mesh simplifier (sloppy) * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance * The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error. @@ -373,22 +407,31 @@ MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetc struct meshopt_Meshlet { - unsigned int vertices[64]; - unsigned char indices[126][3]; - unsigned char triangle_count; - unsigned char vertex_count; + /* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */ + unsigned int vertex_offset; + unsigned int triangle_offset; + + /* number of vertices and triangles used in the meshlet; data is stored in consecutive range defined by offset and count */ + unsigned int vertex_count; + unsigned int triangle_count; }; /** * Experimental: Meshlet builder * Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer * The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers. - * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first. + * When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters. + * When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first. * - * destination must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound - * max_vertices and max_triangles can't exceed limits statically declared in meshopt_Meshlet (max_vertices <= 64, max_triangles <= 126) + * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound + * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices + * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3 + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512) + * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles); struct meshopt_Bounds @@ -426,10 +469,10 @@ struct meshopt_Bounds * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable. * * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer - * index_count should be less than or equal to 256*3 (the function assumes clusters of limited size) + * index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size) */ MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); -MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); /** * Experimental: Spatial sorter @@ -513,6 +556,10 @@ inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, template <typename T> inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); template <typename T> +inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template <typename T> +inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template <typename T> inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count); template <typename T> inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count); @@ -547,7 +594,9 @@ inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size template <typename T> inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size); template <typename T> -inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); +template <typename T> +inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); template <typename T> inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); template <typename T> @@ -762,6 +811,24 @@ inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indi } template <typename T> +inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count * 2); + + meshopt_generateAdjacencyIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template <typename T> +inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count * 4); + + meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template <typename T> inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count) { meshopt_IndexAdapter<T> in(0, indices, index_count); @@ -908,11 +975,19 @@ inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices } template <typename T> -inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_buildMeshlets(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, cone_weight); +} + +template <typename T> +inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) { meshopt_IndexAdapter<T> in(0, indices, index_count); - return meshopt_buildMeshlets(destination, in.data, index_count, vertex_count, max_vertices, max_triangles); + return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles); } template <typename T> @@ -934,7 +1009,7 @@ inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_ #endif /** - * Copyright (c) 2016-2020 Arseny Kapoulkine + * Copyright (c) 2016-2021 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch new file mode 100644 index 0000000000..cf648b0da3 --- /dev/null +++ b/thirdparty/meshoptimizer/patches/attribute-aware-simplify.patch @@ -0,0 +1,262 @@ +diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h +index fe8d349731..e44b99ce52 100644 +--- a/thirdparty/meshoptimizer/meshoptimizer.h ++++ b/thirdparty/meshoptimizer/meshoptimizer.h +@@ -298,6 +298,11 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver + */ + MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error); + ++/** ++ * Experimental: Mesh simplifier with attribute metric; attributes follow xyz position data atm (vertex data must contain 3 + attribute_count floats per vertex) ++ */ ++MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* result_error, const float* attributes, const float* attribute_weights, size_t attribute_count); ++ + /** + * Experimental: Mesh simplifier (sloppy) + * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance +diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp +index b2cb589462..059cabb055 100644 +--- a/thirdparty/meshoptimizer/simplifier.cpp ++++ b/thirdparty/meshoptimizer/simplifier.cpp +@@ -20,6 +20,8 @@ + #define TRACESTATS(i) (void)0 + #endif + ++#define ATTRIBUTES 8 ++ + // This work is based on: + // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 + // Michael Garland. Quadric-based polygonal surface simplification. 1999 +@@ -358,6 +360,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned + struct Vector3 + { + float x, y, z; ++ ++#if ATTRIBUTES ++ float a[ATTRIBUTES]; ++#endif + }; + + static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +@@ -414,6 +420,13 @@ struct Quadric + float a10, a20, a21; + float b0, b1, b2, c; + float w; ++ ++#if ATTRIBUTES ++ float gx[ATTRIBUTES]; ++ float gy[ATTRIBUTES]; ++ float gz[ATTRIBUTES]; ++ float gw[ATTRIBUTES]; ++#endif + }; + + struct Collapse +@@ -456,6 +469,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R) + Q.b2 += R.b2; + Q.c += R.c; + Q.w += R.w; ++ ++#if ATTRIBUTES ++ for (int k = 0; k < ATTRIBUTES; ++k) ++ { ++ Q.gx[k] += R.gx[k]; ++ Q.gy[k] += R.gy[k]; ++ Q.gz[k] += R.gz[k]; ++ Q.gw[k] += R.gw[k]; ++ } ++#endif + } + + static float quadricError(const Quadric& Q, const Vector3& v) +@@ -481,6 +504,17 @@ static float quadricError(const Quadric& Q, const Vector3& v) + r += ry * v.y; + r += rz * v.z; + ++#if ATTRIBUTES ++ // see quadricUpdateAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr ++ for (int k = 0; k < ATTRIBUTES; ++k) ++ { ++ float a = v.a[k]; ++ ++ r += a * a * Q.w; ++ r -= 2 * a * (v.x * Q.gx[k] + v.y * Q.gy[k] + v.z * Q.gz[k] + Q.gw[k]); ++ } ++#endif ++ + float s = Q.w == 0.f ? 0.f : 1.f / Q.w; + + return fabsf(r) * s; +@@ -504,6 +538,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo + Q.b2 = c * dw; + Q.c = d * dw; + Q.w = w; ++ ++#if ATTRIBUTES ++ memset(Q.gx, 0, sizeof(Q.gx)); ++ memset(Q.gy, 0, sizeof(Q.gy)); ++ memset(Q.gz, 0, sizeof(Q.gz)); ++ memset(Q.gw, 0, sizeof(Q.gw)); ++#endif + } + + static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w) +@@ -556,6 +597,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3 + quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight); + } + ++#if ATTRIBUTES ++static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float w) ++{ ++ // for each attribute we want to encode the following function into the quadric: ++ // (eval(pos) - attr)^2 ++ // where eval(pos) interpolates attribute across the triangle like so: ++ // eval(pos) = pos.x * gx + pos.y * gy + pos.z * gz + gw ++ // where gx/gy/gz/gw are gradients ++ Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; ++ Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; ++ ++ // we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows: ++ // v = (d11 * d20 - d01 * d21) / denom ++ // w = (d00 * d21 - d01 * d20) / denom ++ // u = 1 - v - w ++ // here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj) ++ const Vector3& v0 = p10; ++ const Vector3& v1 = p20; ++ float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z; ++ float d01 = v0.x * v1.x + v0.y * v1.y + v0.z * v1.z; ++ float d11 = v1.x * v1.x + v1.y * v1.y + v1.z * v1.z; ++ float denom = d00 * d11 - d01 * d01; ++ float denomr = denom == 0 ? 0.f : 1.f / denom; ++ ++ // precompute gradient factors ++ // these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out common factors that are shared between attributes ++ float gx1 = (d11 * v0.x - d01 * v1.x) * denomr; ++ float gx2 = (d00 * v1.x - d01 * v0.x) * denomr; ++ float gy1 = (d11 * v0.y - d01 * v1.y) * denomr; ++ float gy2 = (d00 * v1.y - d01 * v0.y) * denomr; ++ float gz1 = (d11 * v0.z - d01 * v1.z) * denomr; ++ float gz2 = (d00 * v1.z - d01 * v0.z) * denomr; ++ ++ for (int k = 0; k < ATTRIBUTES; ++k) ++ { ++ float a0 = p0.a[k], a1 = p1.a[k], a2 = p2.a[k]; ++ ++ // compute gradient of eval(pos) for x/y/z/w ++ // the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w ++ float gx = gx1 * (a1 - a0) + gx2 * (a2 - a0); ++ float gy = gy1 * (a1 - a0) + gy2 * (a2 - a0); ++ float gz = gz1 * (a1 - a0) + gz2 * (a2 - a0); ++ float gw = a0 - p0.x * gx - p0.y * gy - p0.z * gz; ++ ++ // quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K ++ // since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields ++ Q.a00 += w * (gx * gx); ++ Q.a11 += w * (gy * gy); ++ Q.a22 += w * (gz * gz); ++ ++ Q.a10 += w * (gy * gx); ++ Q.a20 += w * (gz * gx); ++ Q.a21 += w * (gz * gy); ++ ++ Q.b0 += w * (gx * gw); ++ Q.b1 += w * (gy * gw); ++ Q.b2 += w * (gz * gw); ++ ++ Q.c += w * (gw * gw); ++ ++ // the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError ++ Q.gx[k] = w * gx; ++ Q.gy[k] = w * gy; ++ Q.gz[k] = w * gz; ++ Q.gw[k] = w * gw; ++ ++#if TRACE > 2 ++ printf("attr%d: %e %e %e\n", ++ k, ++ (gx * p0.x + gy * p0.y + gz * p0.z + gw - a0), ++ (gx * p1.x + gy * p1.y + gz * p1.z + gw - a1), ++ (gx * p2.x + gy * p2.y + gz * p2.z + gw - a2) ++ ); ++#endif ++ } ++} ++#endif ++ + static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) + { + for (size_t i = 0; i < index_count; i += 3) +@@ -567,6 +686,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic + Quadric Q; + quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f); + ++#if ATTRIBUTES ++ quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w); ++#endif + quadricAdd(vertex_quadrics[remap[i0]], Q); + quadricAdd(vertex_quadrics[remap[i1]], Q); + quadricAdd(vertex_quadrics[remap[i2]], Q); +@@ -1259,13 +1381,19 @@ unsigned int* meshopt_simplifyDebugLoopBack = 0; + #endif + + size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) ++{ ++ return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, out_result_error, 0, 0, 0); ++} ++ ++size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count) + { + using namespace meshopt; + + assert(index_count % 3 == 0); +- assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); +- assert(vertex_positions_stride % sizeof(float) == 0); ++ assert(vertex_stride > 0 && vertex_stride <= 256); ++ assert(vertex_stride % sizeof(float) == 0); + assert(target_index_count <= index_count); ++ assert(attribute_count <= ATTRIBUTES); + + meshopt_Allocator allocator; + +@@ -1279,7 +1407,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, + // build position remap that maps each vertex to the one with identical position + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count); +- buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator); ++ buildPositionRemap(remap, wedge, vertex_data, vertex_count, vertex_stride, allocator); + + // classify vertices; vertex kind determines collapse rules, see kCanCollapse + unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count); +@@ -1303,7 +1431,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, + #endif + + Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); +- rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); ++ rescalePositions(vertex_positions, vertex_data, vertex_count, vertex_stride); ++ ++#if ATTRIBUTES ++ for (size_t i = 0; i < vertex_count; ++i) ++ { ++ memset(vertex_positions[i].a, 0, sizeof(vertex_positions[i].a)); ++ ++ for (size_t k = 0; k < attribute_count; ++k) ++ { ++ float a = attributes[i * attribute_count + k]; ++ ++ vertex_positions[i].a[k] = a * attribute_weights[k]; ++ } ++ } ++#endif + + Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count); + memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); +@@ -1395,7 +1537,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, + + // result_error is quadratic; we need to remap it back to linear + if (out_result_error) ++ { + *out_result_error = sqrtf(result_error); ++ } + + return result_count; + } diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp index 942db14461..0f10ebef4b 100644 --- a/thirdparty/meshoptimizer/simplifier.cpp +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -20,6 +20,8 @@ #define TRACESTATS(i) (void)0 #endif +#define ATTRIBUTES 8 + // This work is based on: // Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 // Michael Garland. Quadric-based polygonal surface simplification. 1999 @@ -118,8 +120,13 @@ struct PositionHasher { const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + index * vertex_stride_float); + // scramble bits to make sure that integer coordinates have entropy in lower bits + unsigned int x = key[0] ^ (key[0] >> 17); + unsigned int y = key[1] ^ (key[1] >> 17); + unsigned int z = key[2] ^ (key[2] >> 17); + // Optimized Spatial Hashing for Collision Detection of Deformable Objects - return (key[0] * 73856093) ^ (key[1] * 19349663) ^ (key[2] * 83492791); + return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791); } bool equal(unsigned int lhs, unsigned int rhs) const @@ -131,7 +138,7 @@ struct PositionHasher static size_t hashBuckets2(size_t count) { size_t buckets = 1; - while (buckets < count) + while (buckets < count + count / 4) buckets *= 2; return buckets; @@ -358,6 +365,10 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned struct Vector3 { float x, y, z; + +#if ATTRIBUTES + float a[ATTRIBUTES]; +#endif }; static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) @@ -414,6 +425,13 @@ struct Quadric float a10, a20, a21; float b0, b1, b2, c; float w; + +#if ATTRIBUTES + float gx[ATTRIBUTES]; + float gy[ATTRIBUTES]; + float gz[ATTRIBUTES]; + float gw[ATTRIBUTES]; +#endif }; struct Collapse @@ -456,6 +474,16 @@ static void quadricAdd(Quadric& Q, const Quadric& R) Q.b2 += R.b2; Q.c += R.c; Q.w += R.w; + +#if ATTRIBUTES + for (int k = 0; k < ATTRIBUTES; ++k) + { + Q.gx[k] += R.gx[k]; + Q.gy[k] += R.gy[k]; + Q.gz[k] += R.gz[k]; + Q.gw[k] += R.gw[k]; + } +#endif } static float quadricError(const Quadric& Q, const Vector3& v) @@ -481,6 +509,17 @@ static float quadricError(const Quadric& Q, const Vector3& v) r += ry * v.y; r += rz * v.z; +#if ATTRIBUTES + // see quadricUpdateAttributes for general derivation; here we need to add the parts of (eval(pos) - attr)^2 that depend on attr + for (int k = 0; k < ATTRIBUTES; ++k) + { + float a = v.a[k]; + + r += a * a * Q.w; + r -= 2 * a * (v.x * Q.gx[k] + v.y * Q.gy[k] + v.z * Q.gz[k] + Q.gw[k]); + } +#endif + float s = Q.w == 0.f ? 0.f : 1.f / Q.w; return fabsf(r) * s; @@ -504,6 +543,13 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo Q.b2 = c * dw; Q.c = d * dw; Q.w = w; + +#if ATTRIBUTES + memset(Q.gx, 0, sizeof(Q.gx)); + memset(Q.gy, 0, sizeof(Q.gy)); + memset(Q.gz, 0, sizeof(Q.gz)); + memset(Q.gw, 0, sizeof(Q.gw)); +#endif } static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w) @@ -556,6 +602,84 @@ static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3 quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight); } +#if ATTRIBUTES +static void quadricUpdateAttributes(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float w) +{ + // for each attribute we want to encode the following function into the quadric: + // (eval(pos) - attr)^2 + // where eval(pos) interpolates attribute across the triangle like so: + // eval(pos) = pos.x * gx + pos.y * gy + pos.z * gz + gw + // where gx/gy/gz/gw are gradients + Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; + Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; + + // we compute gradients using barycentric coordinates; barycentric coordinates can be computed as follows: + // v = (d11 * d20 - d01 * d21) / denom + // w = (d00 * d21 - d01 * d20) / denom + // u = 1 - v - w + // here v0, v1 are triangle edge vectors, v2 is a vector from point to triangle corner, and dij = dot(vi, vj) + const Vector3& v0 = p10; + const Vector3& v1 = p20; + float d00 = v0.x * v0.x + v0.y * v0.y + v0.z * v0.z; + float d01 = v0.x * v1.x + v0.y * v1.y + v0.z * v1.z; + float d11 = v1.x * v1.x + v1.y * v1.y + v1.z * v1.z; + float denom = d00 * d11 - d01 * d01; + float denomr = denom == 0 ? 0.f : 1.f / denom; + + // precompute gradient factors + // these are derived by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w and factoring out common factors that are shared between attributes + float gx1 = (d11 * v0.x - d01 * v1.x) * denomr; + float gx2 = (d00 * v1.x - d01 * v0.x) * denomr; + float gy1 = (d11 * v0.y - d01 * v1.y) * denomr; + float gy2 = (d00 * v1.y - d01 * v0.y) * denomr; + float gz1 = (d11 * v0.z - d01 * v1.z) * denomr; + float gz2 = (d00 * v1.z - d01 * v0.z) * denomr; + + for (int k = 0; k < ATTRIBUTES; ++k) + { + float a0 = p0.a[k], a1 = p1.a[k], a2 = p2.a[k]; + + // compute gradient of eval(pos) for x/y/z/w + // the formulas below are obtained by directly computing derivative of eval(pos) = a0 * u + a1 * v + a2 * w + float gx = gx1 * (a1 - a0) + gx2 * (a2 - a0); + float gy = gy1 * (a1 - a0) + gy2 * (a2 - a0); + float gz = gz1 * (a1 - a0) + gz2 * (a2 - a0); + float gw = a0 - p0.x * gx - p0.y * gy - p0.z * gz; + + // quadric encodes (eval(pos)-attr)^2; this means that the resulting expansion needs to compute, for example, pos.x * pos.y * K + // since quadrics already encode factors for pos.x * pos.y, we can accumulate almost everything in basic quadric fields + Q.a00 += w * (gx * gx); + Q.a11 += w * (gy * gy); + Q.a22 += w * (gz * gz); + + Q.a10 += w * (gy * gx); + Q.a20 += w * (gz * gx); + Q.a21 += w * (gz * gy); + + Q.b0 += w * (gx * gw); + Q.b1 += w * (gy * gw); + Q.b2 += w * (gz * gw); + + Q.c += w * (gw * gw); + + // the only remaining sum components are ones that depend on attr; these will be addded during error evaluation, see quadricError + Q.gx[k] = w * gx; + Q.gy[k] = w * gy; + Q.gz[k] = w * gz; + Q.gw[k] = w * gw; + +#if TRACE > 2 + printf("attr%d: %e %e %e\n", + k, + (gx * p0.x + gy * p0.y + gz * p0.z + gw - a0), + (gx * p1.x + gy * p1.y + gz * p1.z + gw - a1), + (gx * p2.x + gy * p2.y + gz * p2.z + gw - a2) + ); +#endif + } +} +#endif + static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) { for (size_t i = 0; i < index_count; i += 3) @@ -567,6 +691,9 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic Quadric Q; quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f); +#if ATTRIBUTES + quadricUpdateAttributes(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], Q.w); +#endif quadricAdd(vertex_quadrics[remap[i0]], Q); quadricAdd(vertex_quadrics[remap[i1]], Q); quadricAdd(vertex_quadrics[remap[i2]], Q); @@ -1038,7 +1165,7 @@ struct IdHasher struct TriangleHasher { - unsigned int* indices; + const unsigned int* indices; size_t hash(unsigned int i) const { @@ -1253,19 +1380,26 @@ static float interpolate(float y, float x0, float y0, float x1, float y1, float } // namespace meshopt #ifndef NDEBUG -unsigned char* meshopt_simplifyDebugKind = 0; -unsigned int* meshopt_simplifyDebugLoop = 0; -unsigned int* meshopt_simplifyDebugLoopBack = 0; +// Note: this is only exposed for debug visualization purposes; do *not* use these in debug builds +MESHOPTIMIZER_API unsigned char* meshopt_simplifyDebugKind = 0; +MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoop = 0; +MESHOPTIMIZER_API unsigned int* meshopt_simplifyDebugLoopBack = 0; #endif size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error) { + return meshopt_simplifyWithAttributes(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, target_index_count, target_error, out_result_error, 0, 0, 0); +} + +size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_data, size_t vertex_count, size_t vertex_stride, size_t target_index_count, float target_error, float* out_result_error, const float* attributes, const float* attribute_weights, size_t attribute_count) +{ using namespace meshopt; assert(index_count % 3 == 0); - assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); - assert(vertex_positions_stride % sizeof(float) == 0); + assert(vertex_stride > 0 && vertex_stride <= 256); + assert(vertex_stride % sizeof(float) == 0); assert(target_index_count <= index_count); + assert(attribute_count <= ATTRIBUTES); meshopt_Allocator allocator; @@ -1279,7 +1413,7 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, // build position remap that maps each vertex to the one with identical position unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count); - buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator); + buildPositionRemap(remap, wedge, vertex_data, vertex_count, vertex_stride, allocator); // classify vertices; vertex kind determines collapse rules, see kCanCollapse unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count); @@ -1303,7 +1437,21 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, #endif Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); - rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); + rescalePositions(vertex_positions, vertex_data, vertex_count, vertex_stride); + +#if ATTRIBUTES + for (size_t i = 0; i < vertex_count; ++i) + { + memset(vertex_positions[i].a, 0, sizeof(vertex_positions[i].a)); + + for (size_t k = 0; k < attribute_count; ++k) + { + float a = attributes[i * attribute_count + k]; + + vertex_positions[i].a[k] = a * attribute_weights[k]; + } + } +#endif Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count); memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); @@ -1395,7 +1543,9 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, // result_error is quadratic; we need to remap it back to linear if (out_result_error) + { *out_result_error = sqrtf(result_error); + } return result_count; } diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp index 2cbfaac367..5f3ec204ab 100644 --- a/thirdparty/meshoptimizer/vertexcodec.cpp +++ b/thirdparty/meshoptimizer/vertexcodec.cpp @@ -710,18 +710,12 @@ static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1) SIMD_TARGET static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1) { - v128_t mask_0 = wasm_v32x4_shuffle(mask, mask, 0, 2, 1, 3); - - uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull; - uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull; + // magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00 + const uint64_t magic = 0x000103070f1f3f80ull; // TODO: This can use v8x16_bitmask in the future - uint64_t mask_2 = mask_1a | mask_1b; - uint64_t mask_4 = mask_2 | (mask_2 >> 16); - uint64_t mask_8 = mask_4 | (mask_4 >> 8); - - mask0 = uint8_t(mask_8); - mask1 = uint8_t(mask_8 >> 32); + mask0 = uint8_t((wasm_i64x2_extract_lane(mask, 0) * magic) >> 56); + mask1 = uint8_t((wasm_i64x2_extract_lane(mask, 1) * magic) >> 56); } SIMD_TARGET |