diff options
Diffstat (limited to 'thirdparty')
26 files changed, 13873 insertions, 3289 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md index d011832210..3962a83597 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -357,6 +357,16 @@ File extracted from upstream release tarball: - Added 2 files `godot_core_mbedtls_platform.{c,h}` providing configuration for light bundling with core. +## meshoptimizer + +- Upstream: https://github.com/zeux/meshoptimizer +- Version: 0.15(2020) +- License: MIT + +File extracted from upstream release tarball: + +- Files in src/ go to thirdparty/meshoptimizer + ## miniupnpc @@ -413,7 +423,7 @@ Collection of single-file libraries used in Godot components. * License: Apache 2.0 - `open-simplex-noise.{c,h}` * Upstream: https://github.com/smcameron/open-simplex-noise-in-c - * Version: git (0fef0dbedd76f767da7e3c894822729d0f07e54d, 2020) + custom changes + * Version: git (826f1dd1724e6fb3ff45f58e48c0fbae864c3403, 2020) + custom changes * License: Unlicense - `pcg.{cpp,h}` * Upstream: http://www.pcg-random.org @@ -671,7 +681,7 @@ File extracted from upstream release tarball: ## xatlas - Upstream: https://github.com/jpcy/xatlas -- Version: git (470576d3516f7e6d8b4554e7c941194a935969fd, 2020) +- Version: git (5571fc7ef0d06832947c0a935ccdcf083f7a9264, 2020) - License: MIT Files extracted from upstream source: diff --git a/thirdparty/meshoptimizer/LICENSE.md b/thirdparty/meshoptimizer/LICENSE.md new file mode 100644 index 0000000000..4fcd766d22 --- /dev/null +++ b/thirdparty/meshoptimizer/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016-2020 Arseny Kapoulkine + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/thirdparty/meshoptimizer/allocator.cpp b/thirdparty/meshoptimizer/allocator.cpp new file mode 100644 index 0000000000..da7cc540b2 --- /dev/null +++ b/thirdparty/meshoptimizer/allocator.cpp @@ -0,0 +1,8 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*)) +{ + meshopt_Allocator::Storage::allocate = allocate; + meshopt_Allocator::Storage::deallocate = deallocate; +} diff --git a/thirdparty/meshoptimizer/clusterizer.cpp b/thirdparty/meshoptimizer/clusterizer.cpp new file mode 100644 index 0000000000..f7d88c5136 --- /dev/null +++ b/thirdparty/meshoptimizer/clusterizer.cpp @@ -0,0 +1,351 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <math.h> +#include <string.h> + +// This work is based on: +// Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016 +// Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016 +// Jack Ritter. An Efficient Bounding Sphere. 1990 +namespace meshopt +{ + +static void computeBoundingSphere(float result[4], const float points[][3], size_t count) +{ + assert(count > 0); + + // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates + size_t pmin[3] = {0, 0, 0}; + size_t pmax[3] = {0, 0, 0}; + + for (size_t i = 0; i < count; ++i) + { + const float* p = points[i]; + + for (int axis = 0; axis < 3; ++axis) + { + pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis]; + pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis]; + } + } + + // find the pair of points with largest distance + float paxisd2 = 0; + int paxis = 0; + + for (int axis = 0; axis < 3; ++axis) + { + const float* p1 = points[pmin[axis]]; + const float* p2 = points[pmax[axis]]; + + float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]); + + if (d2 > paxisd2) + { + paxisd2 = d2; + paxis = axis; + } + } + + // use the longest segment as the initial sphere diameter + const float* p1 = points[pmin[paxis]]; + const float* p2 = points[pmax[paxis]]; + + float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2}; + float radius = sqrtf(paxisd2) / 2; + + // iteratively adjust the sphere up until all points fit + for (size_t i = 0; i < count; ++i) + { + const float* p = points[i]; + float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]); + + if (d2 > radius * radius) + { + float d = sqrtf(d2); + assert(d > 0); + + float k = 0.5f + (radius / d) / 2; + + center[0] = center[0] * k + p[0] * (1 - k); + center[1] = center[1] * k + p[1] * (1 - k); + center[2] = center[2] * k + p[2] * (1 - k); + radius = (radius + d) / 2; + } + } + + result[0] = center[0]; + result[1] = center[1]; + result[2] = center[2]; + result[3] = radius; +} + +} // namespace meshopt + +size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles) +{ + assert(index_count % 3 == 0); + assert(max_vertices >= 3); + assert(max_triangles >= 1); + + // meshlet construction is limited by max vertices and max triangles per meshlet + // the worst case is that the input is an unindexed stream since this equally stresses both limits + // note that we assume that in the worst case, we leave 2 vertices unpacked in each meshlet - if we have space for 3 we can pack any triangle + size_t max_vertices_conservative = max_vertices - 2; + size_t meshlet_limit_vertices = (index_count + max_vertices_conservative - 1) / max_vertices_conservative; + size_t meshlet_limit_triangles = (index_count / 3 + max_triangles - 1) / max_triangles; + + return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles; +} + +size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +{ + assert(index_count % 3 == 0); + assert(max_vertices >= 3); + assert(max_triangles >= 1); + + meshopt_Allocator allocator; + + meshopt_Meshlet meshlet; + memset(&meshlet, 0, sizeof(meshlet)); + + assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0])); + assert(max_triangles <= sizeof(meshlet.indices) / 3); + + // index of the vertex in the meshlet, 0xff if the vertex isn't used + unsigned char* used = allocator.allocate<unsigned char>(vertex_count); + memset(used, -1, vertex_count); + + size_t offset = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + unsigned char& av = used[a]; + unsigned char& bv = used[b]; + unsigned char& cv = used[c]; + + unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff); + + if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles) + { + destination[offset++] = meshlet; + + for (size_t j = 0; j < meshlet.vertex_count; ++j) + used[meshlet.vertices[j]] = 0xff; + + memset(&meshlet, 0, sizeof(meshlet)); + } + + if (av == 0xff) + { + av = meshlet.vertex_count; + meshlet.vertices[meshlet.vertex_count++] = a; + } + + if (bv == 0xff) + { + bv = meshlet.vertex_count; + meshlet.vertices[meshlet.vertex_count++] = b; + } + + if (cv == 0xff) + { + cv = meshlet.vertex_count; + meshlet.vertices[meshlet.vertex_count++] = c; + } + + meshlet.indices[meshlet.triangle_count][0] = av; + meshlet.indices[meshlet.triangle_count][1] = bv; + meshlet.indices[meshlet.triangle_count][2] = cv; + meshlet.triangle_count++; + } + + if (meshlet.triangle_count) + destination[offset++] = meshlet; + + assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + + return offset; +} + +meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + assert(index_count / 3 <= 256); + + (void)vertex_count; + + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + // compute triangle normals and gather triangle corners + float normals[256][3]; + float corners[256][3][3]; + size_t triangles = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + const float* p0 = vertex_positions + vertex_stride_float * a; + const float* p1 = vertex_positions + vertex_stride_float * b; + const float* p2 = vertex_positions + vertex_stride_float * c; + + float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; + float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; + + float normalx = p10[1] * p20[2] - p10[2] * p20[1]; + float normaly = p10[2] * p20[0] - p10[0] * p20[2]; + float normalz = p10[0] * p20[1] - p10[1] * p20[0]; + + float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); + + // no need to include degenerate triangles - they will be invisible anyway + if (area == 0.f) + continue; + + // record triangle normals & corners for future use; normal and corner 0 define a plane equation + normals[triangles][0] = normalx / area; + normals[triangles][1] = normaly / area; + normals[triangles][2] = normalz / area; + memcpy(corners[triangles][0], p0, 3 * sizeof(float)); + memcpy(corners[triangles][1], p1, 3 * sizeof(float)); + memcpy(corners[triangles][2], p2, 3 * sizeof(float)); + triangles++; + } + + meshopt_Bounds bounds = {}; + + // degenerate cluster, no valid triangles => trivial reject (cone data is 0) + if (triangles == 0) + return bounds; + + // compute cluster bounding sphere; we'll use the center to determine normal cone apex as well + float psphere[4] = {}; + computeBoundingSphere(psphere, corners[0], triangles * 3); + + float center[3] = {psphere[0], psphere[1], psphere[2]}; + + // treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis + float nsphere[4] = {}; + computeBoundingSphere(nsphere, normals, triangles); + + float axis[3] = {nsphere[0], nsphere[1], nsphere[2]}; + float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]); + float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength; + + axis[0] *= invaxislength; + axis[1] *= invaxislength; + axis[2] *= invaxislength; + + // compute a tight cone around all normals, mindp = cos(angle/2) + float mindp = 1.f; + + for (size_t i = 0; i < triangles; ++i) + { + float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2]; + + mindp = (dp < mindp) ? dp : mindp; + } + + // fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones + bounds.center[0] = center[0]; + bounds.center[1] = center[1]; + bounds.center[2] = center[2]; + bounds.radius = psphere[3]; + + // degenerate cluster, normal cone is larger than a hemisphere => trivial accept + // note that if mindp is positive but close to 0, the triangle intersection code below gets less stable + // we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful + if (mindp <= 0.1f) + { + bounds.cone_cutoff = 1; + bounds.cone_cutoff_s8 = 127; + return bounds; + } + + float maxt = 0; + + // we need to find the point on center-t*axis ray that lies in negative half-space of all triangles + for (size_t i = 0; i < triangles; ++i) + { + // dot(center-t*axis-corner, trinormal) = 0 + // dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0 + float cx = center[0] - corners[i][0][0]; + float cy = center[1] - corners[i][0][1]; + float cz = center[2] - corners[i][0][2]; + + float dc = cx * normals[i][0] + cy * normals[i][1] + cz * normals[i][2]; + float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2]; + + // dn should be larger than mindp cutoff above + assert(dn > 0.f); + float t = dc / dn; + + maxt = (t > maxt) ? t : maxt; + } + + // cone apex should be in the negative half-space of all cluster triangles by construction + bounds.cone_apex[0] = center[0] - axis[0] * maxt; + bounds.cone_apex[1] = center[1] - axis[1] * maxt; + bounds.cone_apex[2] = center[2] - axis[2] * maxt; + + // note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis + bounds.cone_axis[0] = axis[0]; + bounds.cone_axis[1] = axis[1]; + bounds.cone_axis[2] = axis[2]; + + // cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone + // which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a)) + bounds.cone_cutoff = sqrtf(1 - mindp * mindp); + + // quantize axis & cutoff to 8-bit SNORM format + bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8)); + bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8)); + bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8)); + + // for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error + float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]); + float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]); + float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]); + + // note that we need to round this up instead of rounding to nearest, hence +1 + int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1); + + bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8); + + return bounds; +} + +meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])]; + + for (size_t i = 0; i < meshlet->triangle_count; ++i) + { + unsigned int a = meshlet->vertices[meshlet->indices[i][0]]; + unsigned int b = meshlet->vertices[meshlet->indices[i][1]]; + unsigned int c = meshlet->vertices[meshlet->indices[i][2]]; + + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + indices[i * 3 + 0] = a; + indices[i * 3 + 1] = b; + indices[i * 3 + 2] = c; + } + + return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); +} diff --git a/thirdparty/meshoptimizer/indexcodec.cpp b/thirdparty/meshoptimizer/indexcodec.cpp new file mode 100644 index 0000000000..eeb541e5be --- /dev/null +++ b/thirdparty/meshoptimizer/indexcodec.cpp @@ -0,0 +1,752 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +#ifndef TRACE +#define TRACE 0 +#endif + +#if TRACE +#include <stdio.h> +#endif + +// This work is based on: +// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013 +// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014 +namespace meshopt +{ + +const unsigned char kIndexHeader = 0xe0; +const unsigned char kSequenceHeader = 0xd0; + +static int gEncodeIndexVersion = 0; + +typedef unsigned int VertexFifo[16]; +typedef unsigned int EdgeFifo[16][2]; + +static const unsigned int kTriangleIndexOrder[3][3] = { + {0, 1, 2}, + {1, 2, 0}, + {2, 0, 1}, +}; + +static const unsigned char kCodeAuxEncodingTable[16] = { + 0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69, + 0, 0, // last two entries aren't used for encoding +}; + +static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next) +{ + (void)a; + + return (b == next) ? 1 : (c == next) ? 2 : 0; +} + +static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset) +{ + for (int i = 0; i < 16; ++i) + { + size_t index = (offset - 1 - i) & 15; + + unsigned int e0 = fifo[index][0]; + unsigned int e1 = fifo[index][1]; + + if (e0 == a && e1 == b) + return (i << 2) | 0; + if (e0 == b && e1 == c) + return (i << 2) | 1; + if (e0 == c && e1 == a) + return (i << 2) | 2; + } + + return -1; +} + +static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset) +{ + fifo[offset][0] = a; + fifo[offset][1] = b; + offset = (offset + 1) & 15; +} + +static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset) +{ + for (int i = 0; i < 16; ++i) + { + size_t index = (offset - 1 - i) & 15; + + if (fifo[index] == v) + return i; + } + + return -1; +} + +static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1) +{ + fifo[offset] = v; + offset = (offset + cond) & 15; +} + +static void encodeVByte(unsigned char*& data, unsigned int v) +{ + // encode 32-bit value in up to 5 7-bit groups + do + { + *data++ = (v & 127) | (v > 127 ? 128 : 0); + v >>= 7; + } while (v); +} + +static unsigned int decodeVByte(const unsigned char*& data) +{ + unsigned char lead = *data++; + + // fast path: single byte + if (lead < 128) + return lead; + + // slow path: up to 4 extra bytes + // note that this loop always terminates, which is important for malformed data + unsigned int result = lead & 127; + unsigned int shift = 7; + + for (int i = 0; i < 4; ++i) + { + unsigned char group = *data++; + result |= (group & 127) << shift; + shift += 7; + + if (group < 128) + break; + } + + return result; +} + +static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last) +{ + unsigned int d = index - last; + unsigned int v = (d << 1) ^ (int(d) >> 31); + + encodeVByte(data, v); +} + +static unsigned int decodeIndex(const unsigned char*& data, unsigned int last) +{ + unsigned int v = decodeVByte(data); + unsigned int d = (v >> 1) ^ -int(v & 1); + + return last + d; +} + +static int getCodeAuxIndex(unsigned char v, const unsigned char* table) +{ + for (int i = 0; i < 16; ++i) + if (table[i] == v) + return i; + + return -1; +} + +static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c) +{ + if (index_size == 2) + { + static_cast<unsigned short*>(destination)[offset + 0] = (unsigned short)(a); + static_cast<unsigned short*>(destination)[offset + 1] = (unsigned short)(b); + static_cast<unsigned short*>(destination)[offset + 2] = (unsigned short)(c); + } + else + { + static_cast<unsigned int*>(destination)[offset + 0] = a; + static_cast<unsigned int*>(destination)[offset + 1] = b; + static_cast<unsigned int*>(destination)[offset + 2] = c; + } +} + +#if TRACE +static size_t sortTop16(unsigned char dest[16], size_t stats[256]) +{ + size_t destsize = 0; + + for (size_t i = 0; i < 256; ++i) + { + size_t j = 0; + for (; j < destsize; ++j) + { + if (stats[i] >= stats[dest[j]]) + { + if (destsize < 16) + destsize++; + + memmove(&dest[j + 1], &dest[j], destsize - 1 - j); + dest[j] = (unsigned char)i; + break; + } + } + + if (j == destsize && destsize < 16) + { + dest[destsize] = (unsigned char)i; + destsize++; + } + } + + return destsize; +} +#endif + +} // namespace meshopt + +size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + +#if TRACE + size_t codestats[256] = {}; + size_t codeauxstats[256] = {}; +#endif + + // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table + if (buffer_size < 1 + index_count / 3 + 16) + return 0; + + int version = gEncodeIndexVersion; + + buffer[0] = (unsigned char)(kIndexHeader | version); + + EdgeFifo edgefifo; + memset(edgefifo, -1, sizeof(edgefifo)); + + VertexFifo vertexfifo; + memset(vertexfifo, -1, sizeof(vertexfifo)); + + size_t edgefifooffset = 0; + size_t vertexfifooffset = 0; + + unsigned int next = 0; + unsigned int last = 0; + + unsigned char* code = buffer + 1; + unsigned char* data = code + index_count / 3; + unsigned char* data_safe_end = buffer + buffer_size - 16; + + int fecmax = version >= 1 ? 13 : 15; + + // use static encoding table; it's possible to pack the result and then build an optimal table and repack + // for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set + const unsigned char* codeaux_table = kCodeAuxEncodingTable; + + for (size_t i = 0; i < index_count; i += 3) + { + // make sure we have enough space to write a triangle + // each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index + // after this we can be sure we can write without extra bounds checks + if (data > data_safe_end) + return 0; + + int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset); + + if (fer >= 0 && (fer >> 2) < 15) + { + const unsigned int* order = kTriangleIndexOrder[fer & 3]; + + unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; + + // encode edge index and vertex fifo index, next or free index + int fe = fer >> 2; + int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); + + int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next) ? (next++, 0) : 15; + + if (fec == 15 && version >= 1) + { + // encode last-1 and last+1 to optimize strip-like sequences + if (c + 1 == last) + fec = 13, last = c; + if (c == last + 1) + fec = 14, last = c; + } + + *code++ = (unsigned char)((fe << 4) | fec); + +#if TRACE + codestats[code[-1]]++; +#endif + + // note that we need to update the last index since free indices are delta-encoded + if (fec == 15) + encodeIndex(data, c, last), last = c; + + // we only need to push third vertex since first two are likely already in the vertex fifo + if (fec == 0 || fec >= fecmax) + pushVertexFifo(vertexfifo, c, vertexfifooffset); + + // we only need to push two new edges to edge fifo since the third one is already there + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + else + { + int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next); + const unsigned int* order = kTriangleIndexOrder[rotation]; + + unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; + + // if a/b/c are 0/1/2, we emit a reset code + bool reset = false; + + if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1) + { + reset = true; + next = 0; + + // reset vertex fifo to make sure we don't accidentally reference vertices from that in the future + // this makes sure next continues to get incremented instead of being stuck + memset(vertexfifo, -1, sizeof(vertexfifo)); + } + + int fb = getVertexFifo(vertexfifo, b, vertexfifooffset); + int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); + + // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a + int fea = (a == next) ? (next++, 0) : 15; + int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15; + int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15; + + // we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise + unsigned char codeaux = (unsigned char)((feb << 4) | fec); + int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table); + + // <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15 + if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset) + { + *code++ = (unsigned char)((15 << 4) | codeauxindex); + } + else + { + *code++ = (unsigned char)((15 << 4) | 14 | fea); + *data++ = codeaux; + } + +#if TRACE + codestats[code[-1]]++; + codeauxstats[codeaux]++; +#endif + + // note that we need to update the last index since free indices are delta-encoded + if (fea == 15) + encodeIndex(data, a, last), last = a; + + if (feb == 15) + encodeIndex(data, b, last), last = b; + + if (fec == 15) + encodeIndex(data, c, last), last = c; + + // only push vertices that weren't already in fifo + if (fea == 0 || fea == 15) + pushVertexFifo(vertexfifo, a, vertexfifooffset); + + if (feb == 0 || feb == 15) + pushVertexFifo(vertexfifo, b, vertexfifooffset); + + if (fec == 0 || fec == 15) + pushVertexFifo(vertexfifo, c, vertexfifooffset); + + // all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles + pushEdgeFifo(edgefifo, b, a, edgefifooffset); + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + } + + // make sure we have enough space to write codeaux table + if (data > data_safe_end) + return 0; + + // add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding + // we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data + // this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input + for (size_t i = 0; i < 16; ++i) + { + // decoder assumes that table entries never refer to separately encoded indices + assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf); + + *data++ = codeaux_table[i]; + } + + // since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference + assert(codeaux_table[0] == 0); + + assert(data >= buffer + index_count / 3 + 16); + assert(data <= buffer + buffer_size); + +#if TRACE + unsigned char codetop[16], codeauxtop[16]; + size_t codetopsize = sortTop16(codetop, codestats); + size_t codeauxtopsize = sortTop16(codeauxtop, codeauxstats); + + size_t sumcode = 0, sumcodeaux = 0; + for (size_t i = 0; i < 256; ++i) + sumcode += codestats[i], sumcodeaux += codeauxstats[i]; + + size_t acccode = 0, acccodeaux = 0; + + printf("code\t\t\t\t\tcodeaux\n"); + + for (size_t i = 0; i < codetopsize && i < codeauxtopsize; ++i) + { + acccode += codestats[codetop[i]]; + acccodeaux += codeauxstats[codeauxtop[i]]; + + printf("%2d: %02x = %d (%.1f%% ..%.1f%%)\t\t%2d: %02x = %d (%.1f%% ..%.1f%%)\n", + int(i), codetop[i], int(codestats[codetop[i]]), double(codestats[codetop[i]]) / double(sumcode) * 100, double(acccode) / double(sumcode) * 100, + int(i), codeauxtop[i], int(codeauxstats[codeauxtop[i]]), double(codeauxstats[codeauxtop[i]]) / double(sumcodeaux) * 100, double(acccodeaux) / double(sumcodeaux) * 100); + } +#endif + + return data - buffer; +} + +size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count) +{ + assert(index_count % 3 == 0); + + // compute number of bits required for each index + unsigned int vertex_bits = 1; + + while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits) + vertex_bits++; + + // worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas + unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7; + + return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16; +} + +void meshopt_encodeIndexVersion(int version) +{ + assert(unsigned(version) <= 1); + + meshopt::gEncodeIndexVersion = version; +} + +int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(index_size == 2 || index_size == 4); + + // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table + if (buffer_size < 1 + index_count / 3 + 16) + return -2; + + if ((buffer[0] & 0xf0) != kIndexHeader) + return -1; + + int version = buffer[0] & 0x0f; + if (version > 1) + return -1; + + EdgeFifo edgefifo; + memset(edgefifo, -1, sizeof(edgefifo)); + + VertexFifo vertexfifo; + memset(vertexfifo, -1, sizeof(vertexfifo)); + + size_t edgefifooffset = 0; + size_t vertexfifooffset = 0; + + unsigned int next = 0; + unsigned int last = 0; + + int fecmax = version >= 1 ? 13 : 15; + + // since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end + const unsigned char* code = buffer + 1; + const unsigned char* data = code + index_count / 3; + const unsigned char* data_safe_end = buffer + buffer_size - 16; + + const unsigned char* codeaux_table = data_safe_end; + + for (size_t i = 0; i < index_count; i += 3) + { + // make sure we have enough data to read for a triangle + // each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index + // after this we can be sure we can read without extra bounds checks + if (data > data_safe_end) + return -2; + + unsigned char codetri = *code++; + + if (codetri < 0xf0) + { + int fe = codetri >> 4; + + // fifo reads are wrapped around 16 entry buffer + unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0]; + unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1]; + + int fec = codetri & 15; + + // note: this is the most common path in the entire decoder + // inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable + if (fec < fecmax) + { + // fifo reads are wrapped around 16 entry buffer + unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15]; + unsigned int c = (fec == 0) ? next : cf; + + int fec0 = fec == 0; + next += fec0; + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); + + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + else + { + unsigned int c = 0; + + // fec - (fec ^ 3) decodes 13, 14 into -1, 1 + // note that we need to update the last index since free indices are delta-encoded + last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last); + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, c, vertexfifooffset); + + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + } + else + { + // fast path: read codeaux from the table + if (codetri < 0xfe) + { + unsigned char codeaux = codeaux_table[codetri & 15]; + + // note: table can't contain feb/fec=15 + int feb = codeaux >> 4; + int fec = codeaux & 15; + + // fifo reads are wrapped around 16 entry buffer + // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior + unsigned int a = next++; + + unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15]; + unsigned int b = (feb == 0) ? next : bf; + + int feb0 = feb == 0; + next += feb0; + + unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15]; + unsigned int c = (fec == 0) ? next : cf; + + int fec0 = fec == 0; + next += fec0; + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, a, vertexfifooffset); + pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0); + pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); + + pushEdgeFifo(edgefifo, b, a, edgefifooffset); + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + else + { + // slow path: read a full byte for codeaux instead of using a table lookup + unsigned char codeaux = *data++; + + int fea = codetri == 0xfe ? 0 : 15; + int feb = codeaux >> 4; + int fec = codeaux & 15; + + // reset: codeaux is 0 but encoded as not-a-table + if (codeaux == 0) + next = 0; + + // fifo reads are wrapped around 16 entry buffer + // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior + unsigned int a = (fea == 0) ? next++ : 0; + unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15]; + unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15]; + + // note that we need to update the last index since free indices are delta-encoded + if (fea == 15) + last = a = decodeIndex(data, last); + + if (feb == 15) + last = b = decodeIndex(data, last); + + if (fec == 15) + last = c = decodeIndex(data, last); + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, a, vertexfifooffset); + pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15)); + pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15)); + + pushEdgeFifo(edgefifo, b, a, edgefifooffset); + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + } + } + + // we should've read all data bytes and stopped at the boundary between data and codeaux table + if (data != data_safe_end) + return -3; + + return 0; +} + +size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count) +{ + using namespace meshopt; + + // the minimum valid encoding is header, 1 byte per index and a 4-byte tail + if (buffer_size < 1 + index_count + 4) + return 0; + + int version = gEncodeIndexVersion; + + buffer[0] = (unsigned char)(kSequenceHeader | version); + + unsigned int last[2] = {}; + unsigned int current = 0; + + unsigned char* data = buffer + 1; + unsigned char* data_safe_end = buffer + buffer_size - 4; + + for (size_t i = 0; i < index_count; ++i) + { + // make sure we have enough data to write + // each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end + // after this we can be sure we can write without extra bounds checks + if (data >= data_safe_end) + return 0; + + unsigned int index = indices[i]; + + // this is a heuristic that switches between baselines when the delta grows too large + // we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index + // for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily + int cd = int(index - last[current]); + current ^= ((cd < 0 ? -cd : cd) >= 30); + + // encode delta from the last index + unsigned int d = index - last[current]; + unsigned int v = (d << 1) ^ (int(d) >> 31); + + // note: low bit encodes the index of the last baseline which will be used for reconstruction + encodeVByte(data, (v << 1) | current); + + // update last for the next iteration that uses it + last[current] = index; + } + + // make sure we have enough space to write tail + if (data > data_safe_end) + return 0; + + for (int k = 0; k < 4; ++k) + *data++ = 0; + + return data - buffer; +} + +size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count) +{ + // compute number of bits required for each index + unsigned int vertex_bits = 1; + + while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits) + vertex_bits++; + + // worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit + unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7; + + return 1 + index_count * vertex_groups + 4; +} + +int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size) +{ + using namespace meshopt; + + // the minimum valid encoding is header, 1 byte per index and a 4-byte tail + if (buffer_size < 1 + index_count + 4) + return -2; + + if ((buffer[0] & 0xf0) != kSequenceHeader) + return -1; + + int version = buffer[0] & 0x0f; + if (version > 1) + return -1; + + const unsigned char* data = buffer + 1; + const unsigned char* data_safe_end = buffer + buffer_size - 4; + + unsigned int last[2] = {}; + + for (size_t i = 0; i < index_count; ++i) + { + // make sure we have enough data to read + // each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end + // after this we can be sure we can read without extra bounds checks + if (data >= data_safe_end) + return -2; + + unsigned int v = decodeVByte(data); + + // decode the index of the last baseline + unsigned int current = v & 1; + v >>= 1; + + // reconstruct index as a delta + unsigned int d = (v >> 1) ^ -int(v & 1); + unsigned int index = last[current] + d; + + // update last for the next iteration that uses it + last[current] = index; + + if (index_size == 2) + { + static_cast<unsigned short*>(destination)[i] = (unsigned short)(index); + } + else + { + static_cast<unsigned int*>(destination)[i] = index; + } + } + + // we should've read all data bytes and stopped at the boundary between data and tail + if (data != data_safe_end) + return -3; + + return 0; +} diff --git a/thirdparty/meshoptimizer/indexgenerator.cpp b/thirdparty/meshoptimizer/indexgenerator.cpp new file mode 100644 index 0000000000..aa4a30efa4 --- /dev/null +++ b/thirdparty/meshoptimizer/indexgenerator.cpp @@ -0,0 +1,347 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +namespace meshopt +{ + +static unsigned int hashUpdate4(unsigned int h, const unsigned char* key, size_t len) +{ + // MurmurHash2 + const unsigned int m = 0x5bd1e995; + const int r = 24; + + while (len >= 4) + { + unsigned int k = *reinterpret_cast<const unsigned int*>(key); + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + key += 4; + len -= 4; + } + + return h; +} + +struct VertexHasher +{ + const unsigned char* vertices; + size_t vertex_size; + size_t vertex_stride; + + size_t hash(unsigned int index) const + { + return hashUpdate4(0, vertices + index * vertex_stride, vertex_size); + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + return memcmp(vertices + lhs * vertex_stride, vertices + rhs * vertex_stride, vertex_size) == 0; + } +}; + +struct VertexStreamHasher +{ + const meshopt_Stream* streams; + size_t stream_count; + + size_t hash(unsigned int index) const + { + unsigned int h = 0; + + for (size_t i = 0; i < stream_count; ++i) + { + const meshopt_Stream& s = streams[i]; + const unsigned char* data = static_cast<const unsigned char*>(s.data); + + h = hashUpdate4(h, data + index * s.stride, s.size); + } + + return h; + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + for (size_t i = 0; i < stream_count; ++i) + { + const meshopt_Stream& s = streams[i]; + const unsigned char* data = static_cast<const unsigned char*>(s.data); + + if (memcmp(data + lhs * s.stride, data + rhs * s.stride, s.size) != 0) + return false; + } + + return true; + } +}; + +static size_t hashBuckets(size_t count) +{ + size_t buckets = 1; + while (buckets < count) + buckets *= 2; + + return buckets; +} + +template <typename T, typename Hash> +static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty) +{ + assert(buckets > 0); + assert((buckets & (buckets - 1)) == 0); + + size_t hashmod = buckets - 1; + size_t bucket = hash.hash(key) & hashmod; + + for (size_t probe = 0; probe <= hashmod; ++probe) + { + T& item = table[bucket]; + + if (item == empty) + return &item; + + if (hash.equal(item, key)) + return &item; + + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } + + assert(false && "Hash table is full"); // unreachable + return 0; +} + +} // namespace meshopt + +size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(indices || index_count == vertex_count); + assert(index_count % 3 == 0); + assert(vertex_size > 0 && vertex_size <= 256); + + meshopt_Allocator allocator; + + memset(destination, -1, vertex_count * sizeof(unsigned int)); + + VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_size}; + + size_t table_size = hashBuckets(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + unsigned int next_vertex = 0; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices ? indices[i] : unsigned(i); + assert(index < vertex_count); + + if (destination[index] == ~0u) + { + unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); + + if (*entry == ~0u) + { + *entry = index; + + destination[index] = next_vertex++; + } + else + { + assert(destination[*entry] != ~0u); + + destination[index] = destination[*entry]; + } + } + } + + assert(next_vertex <= vertex_count); + + return next_vertex; +} + +size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count) +{ + using namespace meshopt; + + assert(indices || index_count == vertex_count); + assert(index_count % 3 == 0); + assert(stream_count > 0 && stream_count <= 16); + + for (size_t i = 0; i < stream_count; ++i) + { + assert(streams[i].size > 0 && streams[i].size <= 256); + assert(streams[i].size <= streams[i].stride); + } + + meshopt_Allocator allocator; + + memset(destination, -1, vertex_count * sizeof(unsigned int)); + + VertexStreamHasher hasher = {streams, stream_count}; + + size_t table_size = hashBuckets(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + unsigned int next_vertex = 0; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices ? indices[i] : unsigned(i); + assert(index < vertex_count); + + if (destination[index] == ~0u) + { + unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); + + if (*entry == ~0u) + { + *entry = index; + + destination[index] = next_vertex++; + } + else + { + assert(destination[*entry] != ~0u); + + destination[index] = destination[*entry]; + } + } + } + + assert(next_vertex <= vertex_count); + + return next_vertex; +} + +void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap) +{ + assert(vertex_size > 0 && vertex_size <= 256); + + meshopt_Allocator allocator; + + // support in-place remap + if (destination == vertices) + { + unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size); + memcpy(vertices_copy, vertices, vertex_count * vertex_size); + vertices = vertices_copy; + } + + for (size_t i = 0; i < vertex_count; ++i) + { + if (remap[i] != ~0u) + { + assert(remap[i] < vertex_count); + + memcpy(static_cast<unsigned char*>(destination) + remap[i] * vertex_size, static_cast<const unsigned char*>(vertices) + i * vertex_size, vertex_size); + } + } +} + +void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap) +{ + assert(index_count % 3 == 0); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices ? indices[i] : unsigned(i); + assert(remap[index] != ~0u); + + destination[i] = remap[index]; + } +} + +void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride) +{ + using namespace meshopt; + + assert(indices); + assert(index_count % 3 == 0); + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size <= vertex_stride); + + meshopt_Allocator allocator; + + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + memset(remap, -1, vertex_count * sizeof(unsigned int)); + + VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_stride}; + + size_t table_size = hashBuckets(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + if (remap[index] == ~0u) + { + unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); + + if (*entry == ~0u) + *entry = index; + + remap[index] = *entry; + } + + destination[i] = remap[index]; + } +} + +void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count) +{ + using namespace meshopt; + + assert(indices); + assert(index_count % 3 == 0); + assert(stream_count > 0 && stream_count <= 16); + + for (size_t i = 0; i < stream_count; ++i) + { + assert(streams[i].size > 0 && streams[i].size <= 256); + assert(streams[i].size <= streams[i].stride); + } + + meshopt_Allocator allocator; + + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + memset(remap, -1, vertex_count * sizeof(unsigned int)); + + VertexStreamHasher hasher = {streams, stream_count}; + + size_t table_size = hashBuckets(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + if (remap[index] == ~0u) + { + unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); + + if (*entry == ~0u) + *entry = index; + + remap[index] = *entry; + } + + destination[i] = remap[index]; + } +} diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h new file mode 100644 index 0000000000..a442d103c8 --- /dev/null +++ b/thirdparty/meshoptimizer/meshoptimizer.h @@ -0,0 +1,948 @@ +/** + * meshoptimizer - version 0.15 + * + * Copyright (C) 2016-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at https://github.com/zeux/meshoptimizer + * + * This library is distributed under the MIT License. See notice at the end of this file. + */ +#pragma once + +#include <assert.h> +#include <stddef.h> + +/* Version macro; major * 1000 + minor * 10 + patch */ +#define MESHOPTIMIZER_VERSION 150 /* 0.15 */ + +/* If no API is defined, assume default */ +#ifndef MESHOPTIMIZER_API +#define MESHOPTIMIZER_API +#endif + +/* Experimental APIs have unstable interface and might have implementation that's not fully tested or optimized */ +#define MESHOPTIMIZER_EXPERIMENTAL MESHOPTIMIZER_API + +/* C interface */ +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Vertex attribute stream, similar to glVertexPointer + * Each element takes size bytes, with stride controlling the spacing between successive elements. + */ +struct meshopt_Stream +{ + const void* data; + size_t size; + size_t stride; +}; + +/** + * Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices + * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence. + * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer. + * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + * indices can be NULL if the input is unindexed + */ +MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); + +/** + * Generates a vertex remap table from multiple vertex streams and an optional index buffer and returns number of unique vertices + * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence. + * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer. + * To remap vertex buffers, you will need to call meshopt_remapVertexBuffer for each vertex stream. + * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + * indices can be NULL if the input is unindexed + */ +MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); + +/** + * Generates vertex buffer from the source vertex buffer and remap table generated by meshopt_generateVertexRemap + * + * destination must contain enough space for the resulting vertex buffer (unique_vertex_count elements, returned by meshopt_generateVertexRemap) + * vertex_count should be the initial vertex count and not the value returned by meshopt_generateVertexRemap + */ +MESHOPTIMIZER_API void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap); + +/** + * Generate index buffer from the source index buffer and remap table generated by meshopt_generateVertexRemap + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * indices can be NULL if the input is unindexed + */ +MESHOPTIMIZER_API void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap); + +/** + * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary + * All vertices that are binary equivalent (wrt first vertex_size bytes) map to the first vertex in the original vertex buffer. + * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering. + * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride); + +/** + * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary + * All vertices that are binary equivalent (wrt specified streams) map to the first vertex in the original vertex buffer. + * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering. + * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); + +/** + * Vertex transform cache optimizer + * Reorders indices to reduce the number of GPU vertex shader invocations + * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); + +/** + * Vertex transform cache optimizer for strip-like caches + * Produces inferior results to meshopt_optimizeVertexCache from the GPU vertex cache perspective + * However, the resulting index order is more optimal if the goal is to reduce the triangle strip length or improve compression efficiency + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); + +/** + * Vertex transform cache optimizer for FIFO caches + * Reorders indices to reduce the number of GPU vertex shader invocations + * Generally takes ~3x less time to optimize meshes but produces inferior results compared to meshopt_optimizeVertexCache + * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * cache_size should be less than the actual GPU cache size to avoid cache thrashing + */ +MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size); + +/** + * Overdraw optimizer + * Reorders indices to reduce the number of GPU vertex shader invocations and the pixel overdraw + * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently + */ +MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold); + +/** + * Vertex fetch cache optimizer + * Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing + * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused + * This functions works for a single vertex stream; for multiple vertex streams, use meshopt_optimizeVertexFetchRemap + meshopt_remapVertexBuffer for each stream. + * + * destination must contain enough space for the resulting vertex buffer (vertex_count elements) + * indices is used both as an input and as an output index buffer + */ +MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); + +/** + * Vertex fetch cache optimizer + * Generates vertex remap to reduce the amount of GPU memory fetches during vertex processing + * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused + * The resulting remap table should be used to reorder vertex/index buffers using meshopt_remapVertexBuffer/meshopt_remapIndexBuffer + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + */ +MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); + +/** + * Index buffer encoder + * Encodes index data into an array of bytes that is generally much smaller (<1.5 bytes/triangle) and compresses better (<1 bytes/triangle) compared to original. + * Input index buffer must represent a triangle list. + * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space + * For maximum efficiency the index buffer being encoded has to be optimized for vertex cache and vertex fetch first. + * + * buffer must contain enough space for the encoded index buffer (use meshopt_encodeIndexBufferBound to compute worst case size) + */ +MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count); +MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count); + +/** + * Experimental: Set index encoder format version + * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) and 1 (decodable by 0.14+) + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeIndexVersion(int version); + +/** + * Index buffer decoder + * Decodes index data from an array of bytes generated by meshopt_encodeIndexBuffer + * Returns 0 if decoding was successful, and an error code otherwise + * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices). + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size); + +/** + * Experimental: Index sequence encoder + * Encodes index sequence into an array of bytes that is generally smaller and compresses better compared to original. + * Input index sequence can represent arbitrary topology; for triangle lists meshopt_encodeIndexBuffer is likely to be better. + * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space + * + * buffer must contain enough space for the encoded index sequence (use meshopt_encodeIndexSequenceBound to compute worst case size) + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count); + +/** + * Index sequence decoder + * Decodes index data from an array of bytes generated by meshopt_encodeIndexSequence + * Returns 0 if decoding was successful, and an error code otherwise + * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices). + * + * destination must contain enough space for the resulting index sequence (index_count elements) + */ +MESHOPTIMIZER_EXPERIMENTAL int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size); + +/** + * Vertex buffer encoder + * Encodes vertex data into an array of bytes that is generally smaller and compresses better compared to original. + * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space + * This function works for a single vertex stream; for multiple vertex streams, call meshopt_encodeVertexBuffer for each stream. + * Note that all vertex_size bytes of each vertex are encoded verbatim, including padding which should be zero-initialized. + * + * buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to compute worst case size) + */ +MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size); +MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size); + +/** + * Experimental: Set vertex encoder format version + * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeVertexVersion(int version); + +/** + * Vertex buffer decoder + * Decodes vertex data from an array of bytes generated by meshopt_encodeVertexBuffer + * Returns 0 if decoding was successful, and an error code otherwise + * The decoder is safe to use for untrusted input, but it may produce garbage data. + * + * destination must contain enough space for the resulting vertex buffer (vertex_count * vertex_size bytes) + */ +MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size); + +/** + * Vertex buffer filters + * These functions can be used to filter output of meshopt_decodeVertexBuffer in-place. + * count must be aligned by 4 and stride is fixed for each function to facilitate SIMD implementation. + * + * meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f. + * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is. + * + * meshopt_decodeFilterQuat decodes 3-component quaternion encoding with K-bit (4 <= K <= 16) component encoding and a 2-bit component index indicating which component to reconstruct. + * Each component is stored as an 16-bit integer; stride must be equal to 8. + * + * meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M. + * Each 32-bit component is decoded in isolation; stride must be divisible by 4. + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size); + +/** + * Experimental: Mesh simplifier + * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible + * The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error. + * If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification. + * Returns the number of indices after simplification, with destination containing new index data + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * + * destination must contain enough space for the *source* index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error); + +/** + * Experimental: Mesh simplifier (sloppy) + * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance + * The algorithm doesn't preserve mesh topology but is always able to reach target triangle count. + * Returns the number of indices after simplification, with destination containing new index data + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * + * destination must contain enough space for the target index buffer + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count); + +/** + * Experimental: Point cloud simplifier + * Reduces the number of points in the cloud to reach the given target + * Returns the number of points after simplification, with destination containing new index data + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * + * destination must contain enough space for the target index buffer + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count); + +/** + * Mesh stripifier + * Converts a previously vertex cache optimized triangle list to triangle strip, stitching strips using restart index or degenerate triangles + * Returns the number of indices in the resulting strip, with destination containing new index data + * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first. + * Using restart indices can result in ~10% smaller index buffers, but on some GPUs restart indices may result in decreased performance. + * + * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_stripifyBound + * restart_index should be 0xffff or 0xffffffff depending on index size, or 0 to use degenerate triangles + */ +MESHOPTIMIZER_API size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index); +MESHOPTIMIZER_API size_t meshopt_stripifyBound(size_t index_count); + +/** + * Mesh unstripifier + * Converts a triangle strip to a triangle list + * Returns the number of indices in the resulting list, with destination containing new index data + * + * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_unstripifyBound + */ +MESHOPTIMIZER_API size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index); +MESHOPTIMIZER_API size_t meshopt_unstripifyBound(size_t index_count); + +struct meshopt_VertexCacheStatistics +{ + unsigned int vertices_transformed; + unsigned int warps_executed; + float acmr; /* transformed vertices / triangle count; best case 0.5, worst case 3.0, optimum depends on topology */ + float atvr; /* transformed vertices / vertex count; best case 1.0, worst case 6.0, optimum is 1.0 (each vertex is transformed once) */ +}; + +/** + * Vertex transform cache analyzer + * Returns cache hit statistics using a simplified FIFO model + * Results may not match actual GPU performance + */ +MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size); + +struct meshopt_OverdrawStatistics +{ + unsigned int pixels_covered; + unsigned int pixels_shaded; + float overdraw; /* shaded pixels / covered pixels; best case 1.0 */ +}; + +/** + * Overdraw analyzer + * Returns overdraw statistics using a software rasterizer + * Results may not match actual GPU performance + * + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +struct meshopt_VertexFetchStatistics +{ + unsigned int bytes_fetched; + float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */ +}; + +/** + * Vertex fetch cache analyzer + * Returns cache hit statistics using a simplified direct mapped model + * Results may not match actual GPU performance + */ +MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size); + +struct meshopt_Meshlet +{ + unsigned int vertices[64]; + unsigned char indices[126][3]; + unsigned char triangle_count; + unsigned char vertex_count; +}; + +/** + * Experimental: Meshlet builder + * Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer + * The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers. + * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first. + * + * destination must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound + * max_vertices and max_triangles can't exceed limits statically declared in meshopt_Meshlet (max_vertices <= 64, max_triangles <= 126) + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles); + +struct meshopt_Bounds +{ + /* bounding sphere, useful for frustum and occlusion culling */ + float center[3]; + float radius; + + /* normal cone, useful for backface culling */ + float cone_apex[3]; + float cone_axis[3]; + float cone_cutoff; /* = cos(angle/2) */ + + /* normal cone axis and cutoff, stored in 8-bit SNORM format; decode using x/127.0 */ + signed char cone_axis_s8[3]; + signed char cone_cutoff_s8; +}; + +/** + * Experimental: Cluster bounds generator + * Creates bounding volumes that can be used for frustum, backface and occlusion culling. + * + * For backface culling with orthographic projection, use the following formula to reject backfacing clusters: + * dot(view, cone_axis) >= cone_cutoff + * + * For perspective projection, you can the formula that needs cone apex in addition to axis & cutoff: + * dot(normalize(cone_apex - camera_position), cone_axis) >= cone_cutoff + * + * Alternatively, you can use the formula that doesn't need cone apex and uses bounding sphere instead: + * dot(normalize(center - camera_position), cone_axis) >= cone_cutoff + radius / length(center - camera_position) + * or an equivalent formula that doesn't have a singularity at center = camera_position: + * dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius + * + * The formula that uses the apex is slightly more accurate but needs the apex; if you are already using bounding sphere + * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable. + * + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * index_count should be less than or equal to 256*3 (the function assumes clusters of limited size) + */ +MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Experimental: Spatial sorter + * Generates a remap table that can be used to reorder points for spatial locality. + * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Experimental: Spatial sorter + * Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Set allocation callbacks + * These callbacks will be used instead of the default operator new/operator delete for all temporary allocations in the library. + * Note that all algorithms only allocate memory for temporary use. + * allocate/deallocate are always called in a stack-like order - last pointer to be allocated is deallocated first. + */ +MESHOPTIMIZER_API void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*)); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +/* Quantization into commonly supported data formats */ +#ifdef __cplusplus +/** + * Quantize a float in [0..1] range into an N-bit fixed point unorm value + * Assumes reconstruction function (q / (2^N-1)), which is the case for fixed-function normalized fixed point conversion + * Maximum reconstruction error: 1/2^(N+1) + */ +inline int meshopt_quantizeUnorm(float v, int N); + +/** + * Quantize a float in [-1..1] range into an N-bit fixed point snorm value + * Assumes reconstruction function (q / (2^(N-1)-1)), which is the case for fixed-function normalized fixed point conversion (except early OpenGL versions) + * Maximum reconstruction error: 1/2^N + */ +inline int meshopt_quantizeSnorm(float v, int N); + +/** + * Quantize a float into half-precision floating point value + * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest + * Representable magnitude range: [6e-5; 65504] + * Maximum relative reconstruction error: 5e-4 + */ +inline unsigned short meshopt_quantizeHalf(float v); + +/** + * Quantize a float into a floating point value with a limited number of significant mantissa bits + * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest + * Assumes N is in a valid mantissa precision range, which is 1..23 + */ +inline float meshopt_quantizeFloat(float v, int N); +#endif + +/** + * C++ template interface + * + * These functions mirror the C interface the library provides, providing template-based overloads so that + * the caller can use an arbitrary type for the index data, both for input and output. + * When the supplied type is the same size as that of unsigned int, the wrappers are zero-cost; when it's not, + * the wrappers end up allocating memory and copying index data to convert from one type to another. + */ +#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS) +template <typename T> +inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); +template <typename T> +inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); +template <typename T> +inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap); +template <typename T> +inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride); +template <typename T> +inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); +template <typename T> +inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count); +template <typename T> +inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count); +template <typename T> +inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size); +template <typename T> +inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold); +template <typename T> +inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count); +template <typename T> +inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); +template <typename T> +inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count); +template <typename T> +inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); +template <typename T> +inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count); +template <typename T> +inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); +template <typename T> +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error); +template <typename T> +inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count); +template <typename T> +inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index); +template <typename T> +inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index); +template <typename T> +inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size); +template <typename T> +inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template <typename T> +inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size); +template <typename T> +inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +template <typename T> +inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template <typename T> +inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +#endif + +/* Inline implementation */ +#ifdef __cplusplus +inline int meshopt_quantizeUnorm(float v, int N) +{ + const float scale = float((1 << N) - 1); + + v = (v >= 0) ? v : 0; + v = (v <= 1) ? v : 1; + + return int(v * scale + 0.5f); +} + +inline int meshopt_quantizeSnorm(float v, int N) +{ + const float scale = float((1 << (N - 1)) - 1); + + float round = (v >= 0 ? 0.5f : -0.5f); + + v = (v >= -1) ? v : -1; + v = (v <= +1) ? v : +1; + + return int(v * scale + round); +} + +inline unsigned short meshopt_quantizeHalf(float v) +{ + union { float f; unsigned int ui; } u = {v}; + unsigned int ui = u.ui; + + int s = (ui >> 16) & 0x8000; + int em = ui & 0x7fffffff; + + /* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */ + int h = (em - (112 << 23) + (1 << 12)) >> 13; + + /* underflow: flush to zero; 113 encodes exponent -14 */ + h = (em < (113 << 23)) ? 0 : h; + + /* overflow: infinity; 143 encodes exponent 16 */ + h = (em >= (143 << 23)) ? 0x7c00 : h; + + /* NaN; note that we convert all types of NaN to qNaN */ + h = (em > (255 << 23)) ? 0x7e00 : h; + + return (unsigned short)(s | h); +} + +inline float meshopt_quantizeFloat(float v, int N) +{ + union { float f; unsigned int ui; } u = {v}; + unsigned int ui = u.ui; + + const int mask = (1 << (23 - N)) - 1; + const int round = (1 << (23 - N)) >> 1; + + int e = ui & 0x7f800000; + unsigned int rui = (ui + round) & ~mask; + + /* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */ + ui = e == 0x7f800000 ? ui : rui; + + /* flush denormals to zero */ + ui = e == 0 ? 0 : ui; + + u.ui = ui; + return u.f; +} +#endif + +/* Internal implementation helpers */ +#ifdef __cplusplus +class meshopt_Allocator +{ +public: + template <typename T> + struct StorageT + { + static void* (*allocate)(size_t); + static void (*deallocate)(void*); + }; + + typedef StorageT<void> Storage; + + meshopt_Allocator() + : blocks() + , count(0) + { + } + + ~meshopt_Allocator() + { + for (size_t i = count; i > 0; --i) + Storage::deallocate(blocks[i - 1]); + } + + template <typename T> T* allocate(size_t size) + { + assert(count < sizeof(blocks) / sizeof(blocks[0])); + T* result = static_cast<T*>(Storage::allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T))); + blocks[count++] = result; + return result; + } + +private: + void* blocks[24]; + size_t count; +}; + +// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker +template <typename T> void* (*meshopt_Allocator::StorageT<T>::allocate)(size_t) = operator new; +template <typename T> void (*meshopt_Allocator::StorageT<T>::deallocate)(void*) = operator delete; +#endif + +/* Inline implementation for C++ templated wrappers */ +#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS) +template <typename T, bool ZeroCopy = sizeof(T) == sizeof(unsigned int)> +struct meshopt_IndexAdapter; + +template <typename T> +struct meshopt_IndexAdapter<T, false> +{ + T* result; + unsigned int* data; + size_t count; + + meshopt_IndexAdapter(T* result_, const T* input, size_t count_) + : result(result_) + , data(0) + , count(count_) + { + size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int); + + data = static_cast<unsigned int*>(meshopt_Allocator::Storage::allocate(size)); + + if (input) + { + for (size_t i = 0; i < count; ++i) + data[i] = input[i]; + } + } + + ~meshopt_IndexAdapter() + { + if (result) + { + for (size_t i = 0; i < count; ++i) + result[i] = T(data[i]); + } + + meshopt_Allocator::Storage::deallocate(data); + } +}; + +template <typename T> +struct meshopt_IndexAdapter<T, true> +{ + unsigned int* data; + + meshopt_IndexAdapter(T* result, const T* input, size_t) + : data(reinterpret_cast<unsigned int*>(result ? result : const_cast<T*>(input))) + { + } +}; + +template <typename T> +inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0); + + return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size); +} + +template <typename T> +inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count) +{ + meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0); + + return meshopt_generateVertexRemapMulti(destination, indices ? in.data : 0, index_count, vertex_count, streams, stream_count); +} + +template <typename T> +inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap) +{ + meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap); +} + +template <typename T> +inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_generateShadowIndexBuffer(out.data, in.data, index_count, vertices, vertex_count, vertex_size, vertex_stride); +} + +template <typename T> +inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_generateShadowIndexBufferMulti(out.data, in.data, index_count, vertex_count, streams, stream_count); +} + +template <typename T> +inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count); +} + +template <typename T> +inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_optimizeVertexCacheStrip(out.data, in.data, index_count, vertex_count); +} + +template <typename T> +inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size); +} + +template <typename T> +inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold); +} + +template <typename T> +inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_optimizeVertexFetchRemap(destination, in.data, index_count, vertex_count); +} + +template <typename T> +inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + meshopt_IndexAdapter<T> inout(indices, indices, index_count); + + return meshopt_optimizeVertexFetch(destination, inout.data, index_count, vertices, vertex_count, vertex_size); +} + +template <typename T> +inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count); +} + +template <typename T> +inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size) +{ + char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1]; + (void)index_size_valid; + + return meshopt_decodeIndexBuffer(destination, index_count, sizeof(T), buffer, buffer_size); +} + +template <typename T> +inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_encodeIndexSequence(buffer, buffer_size, in.data, index_count); +} + +template <typename T> +inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size) +{ + char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1]; + (void)index_size_valid; + + return meshopt_decodeIndexSequence(destination, index_count, sizeof(T), buffer, buffer_size); +} + +template <typename T> +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error); +} + +template <typename T> +inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, target_index_count); + + return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count); +} + +template <typename T> +inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, (index_count / 3) * 5); + + return meshopt_stripify(out.data, in.data, index_count, vertex_count, unsigned(restart_index)); +} + +template <typename T> +inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, (index_count - 2) * 3); + + return meshopt_unstripify(out.data, in.data, index_count, unsigned(restart_index)); +} + +template <typename T> +inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size); +} + +template <typename T> +inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template <typename T> +inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size); +} + +template <typename T> +inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_buildMeshlets(destination, in.data, index_count, vertex_count, max_vertices, max_triangles); +} + +template <typename T> +inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template <typename T> +inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_spatialSortTriangles(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} +#endif + +/** + * Copyright (c) 2016-2020 Arseny Kapoulkine + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ diff --git a/thirdparty/meshoptimizer/overdrawanalyzer.cpp b/thirdparty/meshoptimizer/overdrawanalyzer.cpp new file mode 100644 index 0000000000..8d5859ba39 --- /dev/null +++ b/thirdparty/meshoptimizer/overdrawanalyzer.cpp @@ -0,0 +1,230 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <float.h> +#include <string.h> + +// This work is based on: +// Nicolas Capens. Advanced Rasterization. 2004 +namespace meshopt +{ + +const int kViewport = 256; + +struct OverdrawBuffer +{ + float z[kViewport][kViewport][2]; + unsigned int overdraw[kViewport][kViewport][2]; +}; + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + +static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3) +{ + // z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1) + // z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1) + // (x2-x1 y2-y1)(dzdx) = (z2-z1) + // (x3-x1 y3-y1)(dzdy) (z3-z1) + // we'll solve it with Cramer's rule + float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1); + float invdet = (det == 0) ? 0 : 1 / det; + + dzdx = (z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1) * invdet; + dzdy = (x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1) * invdet; + + return det; +} + +// half-space fixed point triangle rasterizer +static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z) +{ + // compute depth gradients + float DZx, DZy; + float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z); + int sign = det > 0; + + // flip backfacing triangles to simplify rasterization logic + if (sign) + { + // flipping v2 & v3 preserves depth gradients since they're based on v1 + float t; + t = v2x, v2x = v3x, v3x = t; + t = v2y, v2y = v3y, v3y = t; + t = v2z, v2z = v3z, v3z = t; + + // flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below + v1z = kViewport - v1z; + DZx = -DZx; + DZy = -DZy; + } + + // coordinates, 28.4 fixed point + int X1 = int(16.0f * v1x + 0.5f); + int X2 = int(16.0f * v2x + 0.5f); + int X3 = int(16.0f * v3x + 0.5f); + + int Y1 = int(16.0f * v1y + 0.5f); + int Y2 = int(16.0f * v2y + 0.5f); + int Y3 = int(16.0f * v3y + 0.5f); + + // bounding rectangle, clipped against viewport + // since we rasterize pixels with covered centers, min >0.5 should round up + // as for max, due to top-left filling convention we will never rasterize right/bottom edges + // so max >= 0.5 should round down + int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0); + int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport); + int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0); + int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport); + + // deltas, 28.4 fixed point + int DX12 = X1 - X2; + int DX23 = X2 - X3; + int DX31 = X3 - X1; + + int DY12 = Y1 - Y2; + int DY23 = Y2 - Y3; + int DY31 = Y3 - Y1; + + // fill convention correction + int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0); + int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0); + int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0); + + // half edge equations, 24.8 fixed point + // note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers + int FX = (minx << 4) + 8; + int FY = (miny << 4) + 8; + int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1; + int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1; + int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1; + float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f); + + for (int y = miny; y < maxy; y++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + float ZX = ZY; + + for (int x = minx; x < maxx; x++) + { + // check if all CXn are non-negative + if ((CX1 | CX2 | CX3) >= 0) + { + if (ZX >= buffer->z[y][x][sign]) + { + buffer->z[y][x][sign] = ZX; + buffer->overdraw[y][x][sign]++; + } + } + + // signed left shift is UB for negative numbers so use unsigned-signed casts + CX1 -= int(unsigned(DY12) << 4); + CX2 -= int(unsigned(DY23) << 4); + CX3 -= int(unsigned(DY31) << 4); + ZX += DZx; + } + + // signed left shift is UB for negative numbers so use unsigned-signed casts + CY1 += int(unsigned(DX12) << 4); + CY2 += int(unsigned(DX23) << 4); + CY3 += int(unsigned(DX31) << 4); + ZY += DZy; + } +} + +} // namespace meshopt + +meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + meshopt_OverdrawStatistics result = {}; + + float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; + float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; + + for (size_t i = 0; i < vertex_count; ++i) + { + const float* v = vertex_positions + i * vertex_stride_float; + + for (int j = 0; j < 3; ++j) + { + minv[j] = min(minv[j], v[j]); + maxv[j] = max(maxv[j], v[j]); + } + } + + float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2])); + float scale = kViewport / extent; + + float* triangles = allocator.allocate<float>(index_count * 3); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + const float* v = vertex_positions + index * vertex_stride_float; + + triangles[i * 3 + 0] = (v[0] - minv[0]) * scale; + triangles[i * 3 + 1] = (v[1] - minv[1]) * scale; + triangles[i * 3 + 2] = (v[2] - minv[2]) * scale; + } + + OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1); + + for (int axis = 0; axis < 3; ++axis) + { + memset(buffer, 0, sizeof(OverdrawBuffer)); + + for (size_t i = 0; i < index_count; i += 3) + { + const float* vn0 = &triangles[3 * (i + 0)]; + const float* vn1 = &triangles[3 * (i + 1)]; + const float* vn2 = &triangles[3 * (i + 2)]; + + switch (axis) + { + case 0: + rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]); + break; + case 1: + rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]); + break; + case 2: + rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]); + break; + } + } + + for (int y = 0; y < kViewport; ++y) + for (int x = 0; x < kViewport; ++x) + for (int s = 0; s < 2; ++s) + { + unsigned int overdraw = buffer->overdraw[y][x][s]; + + result.pixels_covered += overdraw > 0; + result.pixels_shaded += overdraw; + } + } + + result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f; + + return result; +} diff --git a/thirdparty/meshoptimizer/overdrawoptimizer.cpp b/thirdparty/meshoptimizer/overdrawoptimizer.cpp new file mode 100644 index 0000000000..143656ed76 --- /dev/null +++ b/thirdparty/meshoptimizer/overdrawoptimizer.cpp @@ -0,0 +1,333 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <math.h> +#include <string.h> + +// This work is based on: +// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007 +namespace meshopt +{ + +static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count) +{ + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + float mesh_centroid[3] = {}; + + for (size_t i = 0; i < index_count; ++i) + { + const float* p = vertex_positions + vertex_stride_float * indices[i]; + + mesh_centroid[0] += p[0]; + mesh_centroid[1] += p[1]; + mesh_centroid[2] += p[2]; + } + + mesh_centroid[0] /= index_count; + mesh_centroid[1] /= index_count; + mesh_centroid[2] /= index_count; + + for (size_t cluster = 0; cluster < cluster_count; ++cluster) + { + size_t cluster_begin = clusters[cluster] * 3; + size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count; + assert(cluster_begin < cluster_end); + + float cluster_area = 0; + float cluster_centroid[3] = {}; + float cluster_normal[3] = {}; + + for (size_t i = cluster_begin; i < cluster_end; i += 3) + { + const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0]; + const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1]; + const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2]; + + float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; + float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; + + float normalx = p10[1] * p20[2] - p10[2] * p20[1]; + float normaly = p10[2] * p20[0] - p10[0] * p20[2]; + float normalz = p10[0] * p20[1] - p10[1] * p20[0]; + + float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); + + cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3); + cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3); + cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3); + cluster_normal[0] += normalx; + cluster_normal[1] += normaly; + cluster_normal[2] += normalz; + cluster_area += area; + } + + float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area; + + cluster_centroid[0] *= inv_cluster_area; + cluster_centroid[1] *= inv_cluster_area; + cluster_centroid[2] *= inv_cluster_area; + + float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]); + float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length; + + cluster_normal[0] *= inv_cluster_normal_length; + cluster_normal[1] *= inv_cluster_normal_length; + cluster_normal[2] *= inv_cluster_normal_length; + + float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]}; + + sort_data[cluster] = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2]; + } +} + +static void calculateSortOrderRadix(unsigned int* sort_order, const float* sort_data, unsigned short* sort_keys, size_t cluster_count) +{ + // compute sort data bounds and renormalize, using fixed point snorm + float sort_data_max = 1e-3f; + + for (size_t i = 0; i < cluster_count; ++i) + { + float dpa = fabsf(sort_data[i]); + + sort_data_max = (sort_data_max < dpa) ? dpa : sort_data_max; + } + + const int sort_bits = 11; + + for (size_t i = 0; i < cluster_count; ++i) + { + // note that we flip distribution since high dot product should come first + float sort_key = 0.5f - 0.5f * (sort_data[i] / sort_data_max); + + sort_keys[i] = meshopt_quantizeUnorm(sort_key, sort_bits) & ((1 << sort_bits) - 1); + } + + // fill histogram for counting sort + unsigned int histogram[1 << sort_bits]; + memset(histogram, 0, sizeof(histogram)); + + for (size_t i = 0; i < cluster_count; ++i) + { + histogram[sort_keys[i]]++; + } + + // compute offsets based on histogram data + size_t histogram_sum = 0; + + for (size_t i = 0; i < 1 << sort_bits; ++i) + { + size_t count = histogram[i]; + histogram[i] = unsigned(histogram_sum); + histogram_sum += count; + } + + assert(histogram_sum == cluster_count); + + // compute sort order based on offsets + for (size_t i = 0; i < cluster_count; ++i) + { + sort_order[histogram[sort_keys[i]]++] = unsigned(i); + } +} + +static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp) +{ + unsigned int cache_misses = 0; + + // if vertex is not in cache, put it in cache + if (timestamp - cache_timestamps[a] > cache_size) + { + cache_timestamps[a] = timestamp++; + cache_misses++; + } + + if (timestamp - cache_timestamps[b] > cache_size) + { + cache_timestamps[b] = timestamp++; + cache_misses++; + } + + if (timestamp - cache_timestamps[c] > cache_size) + { + cache_timestamps[c] = timestamp++; + cache_misses++; + } + + return cache_misses; +} + +static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int* cache_timestamps) +{ + memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); + + unsigned int timestamp = cache_size + 1; + + size_t face_count = index_count / 3; + + size_t result = 0; + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); + + // when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh + // that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently + // suggests an inefficiency in the vertex cache optimization algorithm + // usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0 + if (i == 0 || m == 3) + { + destination[result++] = unsigned(i); + } + } + + assert(result <= index_count / 3); + + return result; +} + +static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold, unsigned int* cache_timestamps) +{ + memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); + + unsigned int timestamp = 0; + + size_t result = 0; + + for (size_t it = 0; it < cluster_count; ++it) + { + size_t start = clusters[it]; + size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3; + assert(start < end); + + // reset cache + timestamp += cache_size + 1; + + // measure cluster ACMR + unsigned int cluster_misses = 0; + + for (size_t i = start; i < end; ++i) + { + unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); + + cluster_misses += m; + } + + float cluster_threshold = threshold * (float(cluster_misses) / float(end - start)); + + // first cluster always starts from the hard cluster boundary + destination[result++] = unsigned(start); + + // reset cache + timestamp += cache_size + 1; + + unsigned int running_misses = 0; + unsigned int running_faces = 0; + + for (size_t i = start; i < end; ++i) + { + unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); + + running_misses += m; + running_faces += 1; + + if (float(running_misses) / float(running_faces) <= cluster_threshold) + { + // we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one + // note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last + // cluster is empty; however, the 'pop_back' after the loop will clean it up + destination[result++] = unsigned(i + 1); + + // reset cache + timestamp += cache_size + 1; + + running_misses = 0; + running_faces = 0; + } + } + + // each time we reach the target ACMR we flush the cluster + // this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles + // in the last cluster, producing a very bad ACMR and significantly penalizing the overall results + // thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one + // there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end' + // to the cluster boundary array which we need to remove anyway - this code will do that automatically + if (destination[result - 1] != start) + { + result--; + } + } + + assert(result >= cluster_count); + assert(result <= index_count / 3); + + return result; +} + +} // namespace meshopt + +void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + // guard for empty meshes + if (index_count == 0 || vertex_count == 0) + return; + + // support in-place optimization + if (destination == indices) + { + unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); + memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); + indices = indices_copy; + } + + unsigned int cache_size = 16; + + unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count); + + // generate hard boundaries from full-triangle cache misses + unsigned int* hard_clusters = allocator.allocate<unsigned int>(index_count / 3); + size_t hard_cluster_count = generateHardBoundaries(hard_clusters, indices, index_count, vertex_count, cache_size, cache_timestamps); + + // generate soft boundaries + unsigned int* soft_clusters = allocator.allocate<unsigned int>(index_count / 3 + 1); + size_t soft_cluster_count = generateSoftBoundaries(soft_clusters, indices, index_count, vertex_count, hard_clusters, hard_cluster_count, cache_size, threshold, cache_timestamps); + + const unsigned int* clusters = soft_clusters; + size_t cluster_count = soft_cluster_count; + + // fill sort data + float* sort_data = allocator.allocate<float>(cluster_count); + calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count); + + // sort clusters using sort data + unsigned short* sort_keys = allocator.allocate<unsigned short>(cluster_count); + unsigned int* sort_order = allocator.allocate<unsigned int>(cluster_count); + calculateSortOrderRadix(sort_order, sort_data, sort_keys, cluster_count); + + // fill output buffer + size_t offset = 0; + + for (size_t it = 0; it < cluster_count; ++it) + { + unsigned int cluster = sort_order[it]; + assert(cluster < cluster_count); + + size_t cluster_begin = clusters[cluster] * 3; + size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count; + assert(cluster_begin < cluster_end); + + memcpy(destination + offset, indices + cluster_begin, (cluster_end - cluster_begin) * sizeof(unsigned int)); + offset += cluster_end - cluster_begin; + } + + assert(offset == index_count); +} diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp new file mode 100644 index 0000000000..bd523275ce --- /dev/null +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -0,0 +1,1529 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <float.h> +#include <math.h> +#include <string.h> + +#ifndef TRACE +#define TRACE 0 +#endif + +#if TRACE +#include <stdio.h> +#endif + +// This work is based on: +// Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 +// Michael Garland. Quadric-based polygonal surface simplification. 1999 +// Peter Lindstrom. Out-of-Core Simplification of Large Polygonal Models. 2000 +// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003 +// Peter Van Sandt, Yannis Chronis, Jignesh M. Patel. Efficiently Searching In-Memory Sorted Arrays: Revenge of the Interpolation Search? 2019 +namespace meshopt +{ + +struct EdgeAdjacency +{ + unsigned int* counts; + unsigned int* offsets; + unsigned int* data; +}; + +static void buildEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +{ + size_t face_count = index_count / 3; + + // allocate arrays + adjacency.counts = allocator.allocate<unsigned int>(vertex_count); + adjacency.offsets = allocator.allocate<unsigned int>(vertex_count); + adjacency.data = allocator.allocate<unsigned int>(index_count); + + // fill edge counts + memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + assert(indices[i] < vertex_count); + + adjacency.counts[indices[i]]++; + } + + // fill offset table + unsigned int offset = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + adjacency.offsets[i] = offset; + offset += adjacency.counts[i]; + } + + assert(offset == index_count); + + // fill edge data + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + + adjacency.data[adjacency.offsets[a]++] = b; + adjacency.data[adjacency.offsets[b]++] = c; + adjacency.data[adjacency.offsets[c]++] = a; + } + + // fix offsets that have been disturbed by the previous pass + for (size_t i = 0; i < vertex_count; ++i) + { + assert(adjacency.offsets[i] >= adjacency.counts[i]); + + adjacency.offsets[i] -= adjacency.counts[i]; + } +} + +struct PositionHasher +{ + const float* vertex_positions; + size_t vertex_stride_float; + + size_t hash(unsigned int index) const + { + const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + index * vertex_stride_float); + + // Optimized Spatial Hashing for Collision Detection of Deformable Objects + return (key[0] * 73856093) ^ (key[1] * 19349663) ^ (key[2] * 83492791); + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + return memcmp(vertex_positions + lhs * vertex_stride_float, vertex_positions + rhs * vertex_stride_float, sizeof(float) * 3) == 0; + } +}; + +static size_t hashBuckets2(size_t count) +{ + size_t buckets = 1; + while (buckets < count) + buckets *= 2; + + return buckets; +} + +template <typename T, typename Hash> +static T* hashLookup2(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty) +{ + assert(buckets > 0); + assert((buckets & (buckets - 1)) == 0); + + size_t hashmod = buckets - 1; + size_t bucket = hash.hash(key) & hashmod; + + for (size_t probe = 0; probe <= hashmod; ++probe) + { + T& item = table[bucket]; + + if (item == empty) + return &item; + + if (hash.equal(item, key)) + return &item; + + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } + + assert(false && "Hash table is full"); // unreachable + return 0; +} + +static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator) +{ + PositionHasher hasher = {vertex_positions_data, vertex_positions_stride / sizeof(float)}; + + size_t table_size = hashBuckets2(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + // build forward remap: for each vertex, which other (canonical) vertex does it map to? + // we use position equivalence for this, and remap vertices to other existing vertices + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int index = unsigned(i); + unsigned int* entry = hashLookup2(table, table_size, hasher, index, ~0u); + + if (*entry == ~0u) + *entry = index; + + remap[index] = *entry; + } + + // build wedge table: for each vertex, which other vertex is the next wedge that also maps to the same vertex? + // entries in table form a (cyclic) wedge loop per vertex; for manifold vertices, wedge[i] == remap[i] == i + for (size_t i = 0; i < vertex_count; ++i) + wedge[i] = unsigned(i); + + for (size_t i = 0; i < vertex_count; ++i) + if (remap[i] != i) + { + unsigned int r = remap[i]; + + wedge[i] = wedge[r]; + wedge[r] = unsigned(i); + } +} + +enum VertexKind +{ + Kind_Manifold, // not on an attribute seam, not on any boundary + Kind_Border, // not on an attribute seam, has exactly two open edges + Kind_Seam, // on an attribute seam with exactly two attribute seam edges + Kind_Complex, // none of the above; these vertices can move as long as all wedges move to the target vertex + Kind_Locked, // none of the above; these vertices can't move + + Kind_Count +}; + +// manifold vertices can collapse onto anything +// border/seam vertices can only be collapsed onto border/seam respectively +// complex vertices can collapse onto complex/locked +// a rule of thumb is that collapsing kind A into kind B preserves the kind B in the target vertex +// for example, while we could collapse Complex into Manifold, this would mean the target vertex isn't Manifold anymore +const unsigned char kCanCollapse[Kind_Count][Kind_Count] = { + {1, 1, 1, 1, 1}, + {0, 1, 0, 0, 0}, + {0, 0, 1, 0, 0}, + {0, 0, 0, 1, 1}, + {0, 0, 0, 0, 0}, +}; + +// if a vertex is manifold or seam, adjoining edges are guaranteed to have an opposite edge +// note that for seam edges, the opposite edge isn't present in the attribute-based topology +// but is present if you consider a position-only mesh variant +const unsigned char kHasOpposite[Kind_Count][Kind_Count] = { + {1, 1, 1, 0, 1}, + {1, 0, 1, 0, 0}, + {1, 1, 1, 0, 1}, + {0, 0, 0, 0, 0}, + {1, 0, 1, 0, 0}, +}; + +static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b) +{ + unsigned int count = adjacency.counts[a]; + const unsigned int* data = adjacency.data + adjacency.offsets[a]; + + for (size_t i = 0; i < count; ++i) + if (data[i] == b) + return true; + + return false; +} + +static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge) +{ + memset(loop, -1, vertex_count * sizeof(unsigned int)); + memset(loopback, -1, vertex_count * sizeof(unsigned int)); + + // incoming & outgoing open edges: ~0u if no open edges, i if there are more than 1 + // note that this is the same data as required in loop[] arrays; loop[] data is only valid for border/seam + // but here it's okay to fill the data out for other types of vertices as well + unsigned int* openinc = loopback; + unsigned int* openout = loop; + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int vertex = unsigned(i); + + unsigned int count = adjacency.counts[vertex]; + const unsigned int* data = adjacency.data + adjacency.offsets[vertex]; + + for (size_t j = 0; j < count; ++j) + { + unsigned int target = data[j]; + + if (!hasEdge(adjacency, target, vertex)) + { + openinc[target] = (openinc[target] == ~0u) ? vertex : target; + openout[vertex] = (openout[vertex] == ~0u) ? target : vertex; + } + } + } + +#if TRACE + size_t lockedstats[4] = {}; +#define TRACELOCKED(i) lockedstats[i]++; +#else +#define TRACELOCKED(i) (void)0 +#endif + + for (size_t i = 0; i < vertex_count; ++i) + { + if (remap[i] == i) + { + if (wedge[i] == i) + { + // no attribute seam, need to check if it's manifold + unsigned int openi = openinc[i], openo = openout[i]; + + // note: we classify any vertices with no open edges as manifold + // this is technically incorrect - if 4 triangles share an edge, we'll classify vertices as manifold + // it's unclear if this is a problem in practice + if (openi == ~0u && openo == ~0u) + { + result[i] = Kind_Manifold; + } + else if (openi != i && openo != i) + { + result[i] = Kind_Border; + } + else + { + result[i] = Kind_Locked; + TRACELOCKED(0); + } + } + else if (wedge[wedge[i]] == i) + { + // attribute seam; need to distinguish between Seam and Locked + unsigned int w = wedge[i]; + unsigned int openiv = openinc[i], openov = openout[i]; + unsigned int openiw = openinc[w], openow = openout[w]; + + // seam should have one open half-edge for each vertex, and the edges need to "connect" - point to the same vertex post-remap + if (openiv != ~0u && openiv != i && openov != ~0u && openov != i && + openiw != ~0u && openiw != w && openow != ~0u && openow != w) + { + if (remap[openiv] == remap[openow] && remap[openov] == remap[openiw]) + { + result[i] = Kind_Seam; + } + else + { + result[i] = Kind_Locked; + TRACELOCKED(1); + } + } + else + { + result[i] = Kind_Locked; + TRACELOCKED(2); + } + } + else + { + // more than one vertex maps to this one; we don't have classification available + result[i] = Kind_Locked; + TRACELOCKED(3); + } + } + else + { + assert(remap[i] < i); + + result[i] = result[remap[i]]; + } + } + +#if TRACE + printf("locked: many open edges %d, disconnected seam %d, many seam edges %d, many wedges %d\n", + int(lockedstats[0]), int(lockedstats[1]), int(lockedstats[2]), int(lockedstats[3])); +#endif +} + +struct Vector3 +{ + float x, y, z; +}; + +static void rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +{ + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; + float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; + + for (size_t i = 0; i < vertex_count; ++i) + { + const float* v = vertex_positions_data + i * vertex_stride_float; + + result[i].x = v[0]; + result[i].y = v[1]; + result[i].z = v[2]; + + for (int j = 0; j < 3; ++j) + { + float vj = v[j]; + + minv[j] = minv[j] > vj ? vj : minv[j]; + maxv[j] = maxv[j] < vj ? vj : maxv[j]; + } + } + + float extent = 0.f; + + extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]); + extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]); + extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]); + + float scale = extent == 0 ? 0.f : 1.f / extent; + + for (size_t i = 0; i < vertex_count; ++i) + { + result[i].x = (result[i].x - minv[0]) * scale; + result[i].y = (result[i].y - minv[1]) * scale; + result[i].z = (result[i].z - minv[2]) * scale; + } +} + +struct Quadric +{ + float a00, a11, a22; + float a10, a20, a21; + float b0, b1, b2, c; + float w; +}; + +struct Collapse +{ + unsigned int v0; + unsigned int v1; + + union + { + unsigned int bidi; + float error; + unsigned int errorui; + }; +}; + +static float normalize(Vector3& v) +{ + float length = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); + + if (length > 0) + { + v.x /= length; + v.y /= length; + v.z /= length; + } + + return length; +} + +static void quadricAdd(Quadric& Q, const Quadric& R) +{ + Q.a00 += R.a00; + Q.a11 += R.a11; + Q.a22 += R.a22; + Q.a10 += R.a10; + Q.a20 += R.a20; + Q.a21 += R.a21; + Q.b0 += R.b0; + Q.b1 += R.b1; + Q.b2 += R.b2; + Q.c += R.c; + Q.w += R.w; +} + +static float quadricError(const Quadric& Q, const Vector3& v) +{ + float rx = Q.b0; + float ry = Q.b1; + float rz = Q.b2; + + rx += Q.a10 * v.y; + ry += Q.a21 * v.z; + rz += Q.a20 * v.x; + + rx *= 2; + ry *= 2; + rz *= 2; + + rx += Q.a00 * v.x; + ry += Q.a11 * v.y; + rz += Q.a22 * v.z; + + float r = Q.c; + r += rx * v.x; + r += ry * v.y; + r += rz * v.z; + + float s = Q.w == 0.f ? 0.f : 1.f / Q.w; + + return fabsf(r) * s; +} + +static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w) +{ + float aw = a * w; + float bw = b * w; + float cw = c * w; + float dw = d * w; + + Q.a00 = a * aw; + Q.a11 = b * bw; + Q.a22 = c * cw; + Q.a10 = a * bw; + Q.a20 = a * cw; + Q.a21 = b * cw; + Q.b0 = a * dw; + Q.b1 = b * dw; + Q.b2 = c * dw; + Q.c = d * dw; + Q.w = w; +} + +static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w) +{ + // we need to encode (x - X) ^ 2 + (y - Y)^2 + (z - Z)^2 into the quadric + Q.a00 = w; + Q.a11 = w; + Q.a22 = w; + Q.a10 = 0.f; + Q.a20 = 0.f; + Q.a21 = 0.f; + Q.b0 = -2.f * x * w; + Q.b1 = -2.f * y * w; + Q.b2 = -2.f * z * w; + Q.c = (x * x + y * y + z * z) * w; + Q.w = w; +} + +static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight) +{ + Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; + Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; + + // normal = cross(p1 - p0, p2 - p0) + Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x}; + float area = normalize(normal); + + float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z; + + // we use sqrtf(area) so that the error is scaled linearly; this tends to improve silhouettes + quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, sqrtf(area) * weight); +} + +static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight) +{ + Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; + float length = normalize(p10); + + // p20p = length of projection of p2-p0 onto normalize(p1 - p0) + Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; + float p20p = p20.x * p10.x + p20.y * p10.y + p20.z * p10.z; + + // normal = altitude of triangle from point p2 onto edge p1-p0 + Vector3 normal = {p20.x - p10.x * p20p, p20.y - p10.y * p20p, p20.z - p10.z * p20p}; + normalize(normal); + + float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z; + + // note: the weight is scaled linearly with edge length; this has to match the triangle weight + quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight); +} + +static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) +{ + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int i0 = indices[i + 0]; + unsigned int i1 = indices[i + 1]; + unsigned int i2 = indices[i + 2]; + + Quadric Q; + quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f); + + quadricAdd(vertex_quadrics[remap[i0]], Q); + quadricAdd(vertex_quadrics[remap[i1]], Q); + quadricAdd(vertex_quadrics[remap[i2]], Q); + } +} + +static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback) +{ + for (size_t i = 0; i < index_count; i += 3) + { + static const int next[3] = {1, 2, 0}; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + + unsigned char k0 = vertex_kind[i0]; + unsigned char k1 = vertex_kind[i1]; + + // check that either i0 or i1 are border/seam and are on the same edge loop + // note that we need to add the error even for edged that connect e.g. border & locked + // if we don't do that, the adjacent border->border edge won't have correct errors for corners + if (k0 != Kind_Border && k0 != Kind_Seam && k1 != Kind_Border && k1 != Kind_Seam) + continue; + + if ((k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1) + continue; + + if ((k1 == Kind_Border || k1 == Kind_Seam) && loopback[i1] != i0) + continue; + + // seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges + if (kHasOpposite[k0][k1] && remap[i1] > remap[i0]) + continue; + + unsigned int i2 = indices[i + next[next[e]]]; + + // we try hard to maintain border edge geometry; seam edges can move more freely + // due to topological restrictions on collapses, seam quadrics slightly improves collapse structure but aren't critical + const float kEdgeWeightSeam = 1.f; + const float kEdgeWeightBorder = 10.f; + + float edgeWeight = (k0 == Kind_Border || k1 == Kind_Border) ? kEdgeWeightBorder : kEdgeWeightSeam; + + Quadric Q; + quadricFromTriangleEdge(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], edgeWeight); + + quadricAdd(vertex_quadrics[remap[i0]], Q); + quadricAdd(vertex_quadrics[remap[i1]], Q); + } + } +} + +static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop) +{ + size_t collapse_count = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + static const int next[3] = {1, 2, 0}; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + + // this can happen either when input has a zero-length edge, or when we perform collapses for complex + // topology w/seams and collapse a manifold vertex that connects to both wedges onto one of them + // we leave edges like this alone since they may be important for preserving mesh integrity + if (remap[i0] == remap[i1]) + continue; + + unsigned char k0 = vertex_kind[i0]; + unsigned char k1 = vertex_kind[i1]; + + // the edge has to be collapsible in at least one direction + if (!(kCanCollapse[k0][k1] | kCanCollapse[k1][k0])) + continue; + + // manifold and seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges + if (kHasOpposite[k0][k1] && remap[i1] > remap[i0]) + continue; + + // two vertices are on a border or a seam, but there's no direct edge between them + // this indicates that they belong to two different edge loops and we should not collapse this edge + // loop[] tracks half edges so we only need to check i0->i1 + if (k0 == k1 && (k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1) + continue; + + // edge can be collapsed in either direction - we will pick the one with minimum error + // note: we evaluate error later during collapse ranking, here we just tag the edge as bidirectional + if (kCanCollapse[k0][k1] & kCanCollapse[k1][k0]) + { + Collapse c = {i0, i1, {/* bidi= */ 1}}; + collapses[collapse_count++] = c; + } + else + { + // edge can only be collapsed in one direction + unsigned int e0 = kCanCollapse[k0][k1] ? i0 : i1; + unsigned int e1 = kCanCollapse[k0][k1] ? i1 : i0; + + Collapse c = {e0, e1, {/* bidi= */ 0}}; + collapses[collapse_count++] = c; + } + } + } + + return collapse_count; +} + +static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const unsigned int* remap) +{ + for (size_t i = 0; i < collapse_count; ++i) + { + Collapse& c = collapses[i]; + + unsigned int i0 = c.v0; + unsigned int i1 = c.v1; + + // most edges are bidirectional which means we need to evaluate errors for two collapses + // to keep this code branchless we just use the same edge for unidirectional edges + unsigned int j0 = c.bidi ? i1 : i0; + unsigned int j1 = c.bidi ? i0 : i1; + + const Quadric& qi = vertex_quadrics[remap[i0]]; + const Quadric& qj = vertex_quadrics[remap[j0]]; + + float ei = quadricError(qi, vertex_positions[i1]); + float ej = quadricError(qj, vertex_positions[j1]); + + // pick edge direction with minimal error + c.v0 = ei <= ej ? i0 : j0; + c.v1 = ei <= ej ? i1 : j1; + c.error = ei <= ej ? ei : ej; + } +} + +#if TRACE > 1 +static void dumpEdgeCollapses(const Collapse* collapses, size_t collapse_count, const unsigned char* vertex_kind) +{ + size_t ckinds[Kind_Count][Kind_Count] = {}; + float cerrors[Kind_Count][Kind_Count] = {}; + + for (int k0 = 0; k0 < Kind_Count; ++k0) + for (int k1 = 0; k1 < Kind_Count; ++k1) + cerrors[k0][k1] = FLT_MAX; + + for (size_t i = 0; i < collapse_count; ++i) + { + unsigned int i0 = collapses[i].v0; + unsigned int i1 = collapses[i].v1; + + unsigned char k0 = vertex_kind[i0]; + unsigned char k1 = vertex_kind[i1]; + + ckinds[k0][k1]++; + cerrors[k0][k1] = (collapses[i].error < cerrors[k0][k1]) ? collapses[i].error : cerrors[k0][k1]; + } + + for (int k0 = 0; k0 < Kind_Count; ++k0) + for (int k1 = 0; k1 < Kind_Count; ++k1) + if (ckinds[k0][k1]) + printf("collapses %d -> %d: %d, min error %e\n", k0, k1, int(ckinds[k0][k1]), cerrors[k0][k1]); +} + +static void dumpLockedCollapses(const unsigned int* indices, size_t index_count, const unsigned char* vertex_kind) +{ + size_t locked_collapses[Kind_Count][Kind_Count] = {}; + + for (size_t i = 0; i < index_count; i += 3) + { + static const int next[3] = {1, 2, 0}; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + + unsigned char k0 = vertex_kind[i0]; + unsigned char k1 = vertex_kind[i1]; + + locked_collapses[k0][k1] += !kCanCollapse[k0][k1] && !kCanCollapse[k1][k0]; + } + } + + for (int k0 = 0; k0 < Kind_Count; ++k0) + for (int k1 = 0; k1 < Kind_Count; ++k1) + if (locked_collapses[k0][k1]) + printf("locked collapses %d -> %d: %d\n", k0, k1, int(locked_collapses[k0][k1])); +} +#endif + +static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapses, size_t collapse_count) +{ + const int sort_bits = 11; + + // fill histogram for counting sort + unsigned int histogram[1 << sort_bits]; + memset(histogram, 0, sizeof(histogram)); + + for (size_t i = 0; i < collapse_count; ++i) + { + // skip sign bit since error is non-negative + unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits); + + histogram[key]++; + } + + // compute offsets based on histogram data + size_t histogram_sum = 0; + + for (size_t i = 0; i < 1 << sort_bits; ++i) + { + size_t count = histogram[i]; + histogram[i] = unsigned(histogram_sum); + histogram_sum += count; + } + + assert(histogram_sum == collapse_count); + + // compute sort order based on offsets + for (size_t i = 0; i < collapse_count; ++i) + { + // skip sign bit since error is non-negative + unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits); + + sort_order[histogram[key]++] = unsigned(i); + } +} + +static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, size_t triangle_collapse_goal, float error_goal, float error_limit) +{ + size_t edge_collapses = 0; + size_t triangle_collapses = 0; + + for (size_t i = 0; i < collapse_count; ++i) + { + const Collapse& c = collapses[collapse_order[i]]; + + if (c.error > error_limit) + break; + + if (c.error > error_goal && triangle_collapses > triangle_collapse_goal / 10) + break; + + if (triangle_collapses >= triangle_collapse_goal) + break; + + unsigned int i0 = c.v0; + unsigned int i1 = c.v1; + + unsigned int r0 = remap[i0]; + unsigned int r1 = remap[i1]; + + // we don't collapse vertices that had source or target vertex involved in a collapse + // it's important to not move the vertices twice since it complicates the tracking/remapping logic + // it's important to not move other vertices towards a moved vertex to preserve error since we don't re-rank collapses mid-pass + if (collapse_locked[r0] | collapse_locked[r1]) + continue; + + assert(collapse_remap[r0] == r0); + assert(collapse_remap[r1] == r1); + + quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]); + + if (vertex_kind[i0] == Kind_Complex) + { + unsigned int v = i0; + + do + { + collapse_remap[v] = r1; + v = wedge[v]; + } while (v != i0); + } + else if (vertex_kind[i0] == Kind_Seam) + { + // remap v0 to v1 and seam pair of v0 to seam pair of v1 + unsigned int s0 = wedge[i0]; + unsigned int s1 = wedge[i1]; + + assert(s0 != i0 && s1 != i1); + assert(wedge[s0] == i0 && wedge[s1] == i1); + + collapse_remap[i0] = i1; + collapse_remap[s0] = s1; + } + else + { + assert(wedge[i0] == i0); + + collapse_remap[i0] = i1; + } + + collapse_locked[r0] = 1; + collapse_locked[r1] = 1; + + // border edges collapse 1 triangle, other edges collapse 2 or more + triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2; + edge_collapses++; + } + + return edge_collapses; +} + +static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const unsigned int* collapse_remap) +{ + size_t write = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int v0 = collapse_remap[indices[i + 0]]; + unsigned int v1 = collapse_remap[indices[i + 1]]; + unsigned int v2 = collapse_remap[indices[i + 2]]; + + // we never move the vertex twice during a single pass + assert(collapse_remap[v0] == v0); + assert(collapse_remap[v1] == v1); + assert(collapse_remap[v2] == v2); + + if (v0 != v1 && v0 != v2 && v1 != v2) + { + indices[write + 0] = v0; + indices[write + 1] = v1; + indices[write + 2] = v2; + write += 3; + } + } + + return write; +} + +static void remapEdgeLoops(unsigned int* loop, size_t vertex_count, const unsigned int* collapse_remap) +{ + for (size_t i = 0; i < vertex_count; ++i) + { + if (loop[i] != ~0u) + { + unsigned int l = loop[i]; + unsigned int r = collapse_remap[l]; + + // i == r is a special case when the seam edge is collapsed in a direction opposite to where loop goes + loop[i] = (i == r) ? loop[l] : r; + } + } +} + +struct CellHasher +{ + const unsigned int* vertex_ids; + + size_t hash(unsigned int i) const + { + unsigned int h = vertex_ids[i]; + + // MurmurHash2 finalizer + h ^= h >> 13; + h *= 0x5bd1e995; + h ^= h >> 15; + return h; + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + return vertex_ids[lhs] == vertex_ids[rhs]; + } +}; + +struct IdHasher +{ + size_t hash(unsigned int id) const + { + unsigned int h = id; + + // MurmurHash2 finalizer + h ^= h >> 13; + h *= 0x5bd1e995; + h ^= h >> 15; + return h; + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + return lhs == rhs; + } +}; + +struct TriangleHasher +{ + unsigned int* indices; + + size_t hash(unsigned int i) const + { + const unsigned int* tri = indices + i * 3; + + // Optimized Spatial Hashing for Collision Detection of Deformable Objects + return (tri[0] * 73856093) ^ (tri[1] * 19349663) ^ (tri[2] * 83492791); + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + const unsigned int* lt = indices + lhs * 3; + const unsigned int* rt = indices + rhs * 3; + + return lt[0] == rt[0] && lt[1] == rt[1] && lt[2] == rt[2]; + } +}; + +static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_positions, size_t vertex_count, int grid_size) +{ + assert(grid_size >= 1 && grid_size <= 1024); + float cell_scale = float(grid_size - 1); + + for (size_t i = 0; i < vertex_count; ++i) + { + const Vector3& v = vertex_positions[i]; + + int xi = int(v.x * cell_scale + 0.5f); + int yi = int(v.y * cell_scale + 0.5f); + int zi = int(v.z * cell_scale + 0.5f); + + vertex_ids[i] = (xi << 20) | (yi << 10) | zi; + } +} + +static size_t countTriangles(const unsigned int* vertex_ids, const unsigned int* indices, size_t index_count) +{ + size_t result = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int id0 = vertex_ids[indices[i + 0]]; + unsigned int id1 = vertex_ids[indices[i + 1]]; + unsigned int id2 = vertex_ids[indices[i + 2]]; + + result += (id0 != id1) & (id0 != id2) & (id1 != id2); + } + + return result; +} + +static size_t fillVertexCells(unsigned int* table, size_t table_size, unsigned int* vertex_cells, const unsigned int* vertex_ids, size_t vertex_count) +{ + CellHasher hasher = {vertex_ids}; + + memset(table, -1, table_size * sizeof(unsigned int)); + + size_t result = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int* entry = hashLookup2(table, table_size, hasher, unsigned(i), ~0u); + + if (*entry == ~0u) + { + *entry = unsigned(i); + vertex_cells[i] = unsigned(result++); + } + else + { + vertex_cells[i] = vertex_cells[*entry]; + } + } + + return result; +} + +static size_t countVertexCells(unsigned int* table, size_t table_size, const unsigned int* vertex_ids, size_t vertex_count) +{ + IdHasher hasher; + + memset(table, -1, table_size * sizeof(unsigned int)); + + size_t result = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int id = vertex_ids[i]; + unsigned int* entry = hashLookup2(table, table_size, hasher, id, ~0u); + + result += (*entry == ~0u); + *entry = id; + } + + return result; +} + +static void fillCellQuadrics(Quadric* cell_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* vertex_cells) +{ + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int i0 = indices[i + 0]; + unsigned int i1 = indices[i + 1]; + unsigned int i2 = indices[i + 2]; + + unsigned int c0 = vertex_cells[i0]; + unsigned int c1 = vertex_cells[i1]; + unsigned int c2 = vertex_cells[i2]; + + bool single_cell = (c0 == c1) & (c0 == c2); + + Quadric Q; + quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], single_cell ? 3.f : 1.f); + + if (single_cell) + { + quadricAdd(cell_quadrics[c0], Q); + } + else + { + quadricAdd(cell_quadrics[c0], Q); + quadricAdd(cell_quadrics[c1], Q); + quadricAdd(cell_quadrics[c2], Q); + } + } +} + +static void fillCellQuadrics(Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count, const unsigned int* vertex_cells) +{ + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int c = vertex_cells[i]; + const Vector3& v = vertex_positions[i]; + + Quadric Q; + quadricFromPoint(Q, v.x, v.y, v.z, 1.f); + + quadricAdd(cell_quadrics[c], Q); + } +} + +static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t cell_count, const unsigned int* vertex_cells, const Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count) +{ + memset(cell_remap, -1, cell_count * sizeof(unsigned int)); + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int cell = vertex_cells[i]; + float error = quadricError(cell_quadrics[cell], vertex_positions[i]); + + if (cell_remap[cell] == ~0u || cell_errors[cell] > error) + { + cell_remap[cell] = unsigned(i); + cell_errors[cell] = error; + } + } +} + +static size_t filterTriangles(unsigned int* destination, unsigned int* tritable, size_t tritable_size, const unsigned int* indices, size_t index_count, const unsigned int* vertex_cells, const unsigned int* cell_remap) +{ + TriangleHasher hasher = {destination}; + + memset(tritable, -1, tritable_size * sizeof(unsigned int)); + + size_t result = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int c0 = vertex_cells[indices[i + 0]]; + unsigned int c1 = vertex_cells[indices[i + 1]]; + unsigned int c2 = vertex_cells[indices[i + 2]]; + + if (c0 != c1 && c0 != c2 && c1 != c2) + { + unsigned int a = cell_remap[c0]; + unsigned int b = cell_remap[c1]; + unsigned int c = cell_remap[c2]; + + if (b < a && b < c) + { + unsigned int t = a; + a = b, b = c, c = t; + } + else if (c < a && c < b) + { + unsigned int t = c; + c = b, b = a, a = t; + } + + destination[result * 3 + 0] = a; + destination[result * 3 + 1] = b; + destination[result * 3 + 2] = c; + + unsigned int* entry = hashLookup2(tritable, tritable_size, hasher, unsigned(result), ~0u); + + if (*entry == ~0u) + *entry = unsigned(result++); + } + } + + return result * 3; +} + +static float interpolate(float y, float x0, float y0, float x1, float y1, float x2, float y2) +{ + // three point interpolation from "revenge of interpolation search" paper + float num = (y1 - y) * (x1 - x2) * (x1 - x0) * (y2 - y0); + float den = (y2 - y) * (x1 - x2) * (y0 - y1) + (y0 - y) * (x1 - x0) * (y1 - y2); + return x1 + num / den; +} + +} // namespace meshopt + +#ifndef NDEBUG +unsigned char* meshopt_simplifyDebugKind = 0; +unsigned int* meshopt_simplifyDebugLoop = 0; +unsigned int* meshopt_simplifyDebugLoopBack = 0; +#endif + +size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + assert(target_index_count <= index_count); + + meshopt_Allocator allocator; + + unsigned int* result = destination; + + // build adjacency information + EdgeAdjacency adjacency = {}; + buildEdgeAdjacency(adjacency, indices, index_count, vertex_count, allocator); + + // build position remap that maps each vertex to the one with identical position + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count); + buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator); + + // classify vertices; vertex kind determines collapse rules, see kCanCollapse + unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count); + unsigned int* loop = allocator.allocate<unsigned int>(vertex_count); + unsigned int* loopback = allocator.allocate<unsigned int>(vertex_count); + classifyVertices(vertex_kind, loop, loopback, vertex_count, adjacency, remap, wedge); + +#if TRACE + size_t unique_positions = 0; + for (size_t i = 0; i < vertex_count; ++i) + unique_positions += remap[i] == i; + + printf("position remap: %d vertices => %d positions\n", int(vertex_count), int(unique_positions)); + + size_t kinds[Kind_Count] = {}; + for (size_t i = 0; i < vertex_count; ++i) + kinds[vertex_kind[i]] += remap[i] == i; + + printf("kinds: manifold %d, border %d, seam %d, complex %d, locked %d\n", + int(kinds[Kind_Manifold]), int(kinds[Kind_Border]), int(kinds[Kind_Seam]), int(kinds[Kind_Complex]), int(kinds[Kind_Locked])); +#endif + + Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); + rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); + + Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count); + memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); + + fillFaceQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap); + fillEdgeQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback); + + if (result != indices) + memcpy(result, indices, index_count * sizeof(unsigned int)); + +#if TRACE + size_t pass_count = 0; + float worst_error = 0; +#endif + + Collapse* edge_collapses = allocator.allocate<Collapse>(index_count); + unsigned int* collapse_order = allocator.allocate<unsigned int>(index_count); + unsigned int* collapse_remap = allocator.allocate<unsigned int>(vertex_count); + unsigned char* collapse_locked = allocator.allocate<unsigned char>(vertex_count); + + size_t result_count = index_count; + + // target_error input is linear; we need to adjust it to match quadricError units + float error_limit = target_error * target_error; + + while (result_count > target_index_count) + { + size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop); + + // no edges can be collapsed any more due to topology restrictions + if (edge_collapse_count == 0) + break; + + rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, remap); + +#if TRACE > 1 + dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind); +#endif + + sortEdgeCollapses(collapse_order, edge_collapses, edge_collapse_count); + + // most collapses remove 2 triangles; use this to establish a bound on the pass in terms of error limit + // note that edge_collapse_goal is an estimate; triangle_collapse_goal will be used to actually limit collapses + size_t triangle_collapse_goal = (result_count - target_index_count) / 3; + size_t edge_collapse_goal = triangle_collapse_goal / 2; + + // we limit the error in each pass based on the error of optimal last collapse; since many collapses will be locked + // as they will share vertices with other successfull collapses, we need to increase the acceptable error by this factor + const float kPassErrorBound = 1.5f; + + float error_goal = edge_collapse_goal < edge_collapse_count ? edge_collapses[collapse_order[edge_collapse_goal]].error * kPassErrorBound : FLT_MAX; + + for (size_t i = 0; i < vertex_count; ++i) + collapse_remap[i] = unsigned(i); + + memset(collapse_locked, 0, vertex_count); + + size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, triangle_collapse_goal, error_goal, error_limit); + + // no edges can be collapsed any more due to hitting the error limit or triangle collapse limit + if (collapses == 0) + break; + + remapEdgeLoops(loop, vertex_count, collapse_remap); + remapEdgeLoops(loopback, vertex_count, collapse_remap); + + size_t new_count = remapIndexBuffer(result, result_count, collapse_remap); + assert(new_count < result_count); + +#if TRACE + float pass_error = 0.f; + for (size_t i = 0; i < edge_collapse_count; ++i) + { + Collapse& c = edge_collapses[collapse_order[i]]; + + if (collapse_remap[c.v0] == c.v1) + pass_error = c.error; + } + + pass_count++; + worst_error = (worst_error < pass_error) ? pass_error : worst_error; + + printf("pass %d: triangles: %d -> %d, collapses: %d/%d (goal: %d), error: %e (limit %e goal %e)\n", int(pass_count), int(result_count / 3), int(new_count / 3), int(collapses), int(edge_collapse_count), int(edge_collapse_goal), pass_error, error_limit, error_goal); +#endif + + result_count = new_count; + } + +#if TRACE + printf("passes: %d, worst error: %e\n", int(pass_count), worst_error); +#endif + +#if TRACE > 1 + dumpLockedCollapses(result, result_count, vertex_kind); +#endif + +#ifndef NDEBUG + if (meshopt_simplifyDebugKind) + memcpy(meshopt_simplifyDebugKind, vertex_kind, vertex_count); + + if (meshopt_simplifyDebugLoop) + memcpy(meshopt_simplifyDebugLoop, loop, vertex_count * sizeof(unsigned int)); + + if (meshopt_simplifyDebugLoopBack) + memcpy(meshopt_simplifyDebugLoopBack, loopback, vertex_count * sizeof(unsigned int)); +#endif + + return result_count; +} + +size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + assert(target_index_count <= index_count); + + // we expect to get ~2 triangles/vertex in the output + size_t target_cell_count = target_index_count / 6; + + if (target_cell_count == 0) + return 0; + + meshopt_Allocator allocator; + + Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); + rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); + + // find the optimal grid size using guided binary search +#if TRACE + printf("source: %d vertices, %d triangles\n", int(vertex_count), int(index_count / 3)); + printf("target: %d cells, %d triangles\n", int(target_cell_count), int(target_index_count / 3)); +#endif + + unsigned int* vertex_ids = allocator.allocate<unsigned int>(vertex_count); + + const int kInterpolationPasses = 5; + + // invariant: # of triangles in min_grid <= target_count + int min_grid = 0; + int max_grid = 1025; + size_t min_triangles = 0; + size_t max_triangles = index_count / 3; + + // instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size... + int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f); + + for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass) + { + assert(min_triangles < target_index_count / 3); + assert(max_grid - min_grid > 1); + + // we clamp the prediction of the grid size to make sure that the search converges + int grid_size = next_grid_size; + grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size; + + computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size); + size_t triangles = countTriangles(vertex_ids, indices, index_count); + +#if TRACE + printf("pass %d (%s): grid size %d, triangles %d, %s\n", + pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", + grid_size, int(triangles), + (triangles <= target_index_count / 3) ? "under" : "over"); +#endif + + float tip = interpolate(float(target_index_count / 3), float(min_grid), float(min_triangles), float(grid_size), float(triangles), float(max_grid), float(max_triangles)); + + if (triangles <= target_index_count / 3) + { + min_grid = grid_size; + min_triangles = triangles; + } + else + { + max_grid = grid_size; + max_triangles = triangles; + } + + if (triangles == target_index_count / 3 || max_grid - min_grid <= 1) + break; + + // we start by using interpolation search - it usually converges faster + // however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN) + next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2; + } + + if (min_triangles == 0) + return 0; + + // build vertex->cell association by mapping all vertices with the same quantized position to the same cell + size_t table_size = hashBuckets2(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + + unsigned int* vertex_cells = allocator.allocate<unsigned int>(vertex_count); + + computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); + size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count); + + // build a quadric for each target cell + Quadric* cell_quadrics = allocator.allocate<Quadric>(cell_count); + memset(cell_quadrics, 0, cell_count * sizeof(Quadric)); + + fillCellQuadrics(cell_quadrics, indices, index_count, vertex_positions, vertex_cells); + + // for each target cell, find the vertex with the minimal error + unsigned int* cell_remap = allocator.allocate<unsigned int>(cell_count); + float* cell_errors = allocator.allocate<float>(cell_count); + + fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count); + + // collapse triangles! + // note that we need to filter out triangles that we've already output because we very frequently generate redundant triangles between cells :( + size_t tritable_size = hashBuckets2(min_triangles); + unsigned int* tritable = allocator.allocate<unsigned int>(tritable_size); + + size_t write = filterTriangles(destination, tritable, tritable_size, indices, index_count, vertex_cells, cell_remap); + assert(write <= target_index_count); + +#if TRACE + printf("result: %d cells, %d triangles (%d unfiltered)\n", int(cell_count), int(write / 3), int(min_triangles)); +#endif + + return write; +} + +size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count) +{ + using namespace meshopt; + + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + assert(target_vertex_count <= vertex_count); + + size_t target_cell_count = target_vertex_count; + + if (target_cell_count == 0) + return 0; + + meshopt_Allocator allocator; + + Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); + rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); + + // find the optimal grid size using guided binary search +#if TRACE + printf("source: %d vertices\n", int(vertex_count)); + printf("target: %d cells\n", int(target_cell_count)); +#endif + + unsigned int* vertex_ids = allocator.allocate<unsigned int>(vertex_count); + + size_t table_size = hashBuckets2(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + + const int kInterpolationPasses = 5; + + // invariant: # of vertices in min_grid <= target_count + int min_grid = 0; + int max_grid = 1025; + size_t min_vertices = 0; + size_t max_vertices = vertex_count; + + // instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size... + int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f); + + for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass) + { + assert(min_vertices < target_vertex_count); + assert(max_grid - min_grid > 1); + + // we clamp the prediction of the grid size to make sure that the search converges + int grid_size = next_grid_size; + grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size; + + computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size); + size_t vertices = countVertexCells(table, table_size, vertex_ids, vertex_count); + +#if TRACE + printf("pass %d (%s): grid size %d, vertices %d, %s\n", + pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", + grid_size, int(vertices), + (vertices <= target_vertex_count) ? "under" : "over"); +#endif + + float tip = interpolate(float(target_vertex_count), float(min_grid), float(min_vertices), float(grid_size), float(vertices), float(max_grid), float(max_vertices)); + + if (vertices <= target_vertex_count) + { + min_grid = grid_size; + min_vertices = vertices; + } + else + { + max_grid = grid_size; + max_vertices = vertices; + } + + if (vertices == target_vertex_count || max_grid - min_grid <= 1) + break; + + // we start by using interpolation search - it usually converges faster + // however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN) + next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2; + } + + if (min_vertices == 0) + return 0; + + // build vertex->cell association by mapping all vertices with the same quantized position to the same cell + unsigned int* vertex_cells = allocator.allocate<unsigned int>(vertex_count); + + computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); + size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count); + + // build a quadric for each target cell + Quadric* cell_quadrics = allocator.allocate<Quadric>(cell_count); + memset(cell_quadrics, 0, cell_count * sizeof(Quadric)); + + fillCellQuadrics(cell_quadrics, vertex_positions, vertex_count, vertex_cells); + + // for each target cell, find the vertex with the minimal error + unsigned int* cell_remap = allocator.allocate<unsigned int>(cell_count); + float* cell_errors = allocator.allocate<float>(cell_count); + + fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count); + + // copy results to the output + assert(cell_count <= target_vertex_count); + memcpy(destination, cell_remap, sizeof(unsigned int) * cell_count); + +#if TRACE + printf("result: %d cells\n", int(cell_count)); +#endif + + return cell_count; +} diff --git a/thirdparty/meshoptimizer/spatialorder.cpp b/thirdparty/meshoptimizer/spatialorder.cpp new file mode 100644 index 0000000000..b09f80ac6f --- /dev/null +++ b/thirdparty/meshoptimizer/spatialorder.cpp @@ -0,0 +1,194 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <float.h> +#include <string.h> + +// This work is based on: +// Fabian Giesen. Decoding Morton codes. 2009 +namespace meshopt +{ + +// "Insert" two 0 bits after each of the 10 low bits of x +inline unsigned int part1By2(unsigned int x) +{ + x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210 + x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210 + x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210 + x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10 + x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 + return x; +} + +static void computeOrder(unsigned int* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +{ + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; + float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; + + for (size_t i = 0; i < vertex_count; ++i) + { + const float* v = vertex_positions_data + i * vertex_stride_float; + + for (int j = 0; j < 3; ++j) + { + float vj = v[j]; + + minv[j] = minv[j] > vj ? vj : minv[j]; + maxv[j] = maxv[j] < vj ? vj : maxv[j]; + } + } + + float extent = 0.f; + + extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]); + extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]); + extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]); + + float scale = extent == 0 ? 0.f : 1.f / extent; + + // generate Morton order based on the position inside a unit cube + for (size_t i = 0; i < vertex_count; ++i) + { + const float* v = vertex_positions_data + i * vertex_stride_float; + + int x = int((v[0] - minv[0]) * scale * 1023.f + 0.5f); + int y = int((v[1] - minv[1]) * scale * 1023.f + 0.5f); + int z = int((v[2] - minv[2]) * scale * 1023.f + 0.5f); + + result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2); + } +} + +static void computeHistogram(unsigned int (&hist)[1024][3], const unsigned int* data, size_t count) +{ + memset(hist, 0, sizeof(hist)); + + // compute 3 10-bit histograms in parallel + for (size_t i = 0; i < count; ++i) + { + unsigned int id = data[i]; + + hist[(id >> 0) & 1023][0]++; + hist[(id >> 10) & 1023][1]++; + hist[(id >> 20) & 1023][2]++; + } + + unsigned int sumx = 0, sumy = 0, sumz = 0; + + // replace histogram data with prefix histogram sums in-place + for (int i = 0; i < 1024; ++i) + { + unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2]; + + hist[i][0] = sumx; + hist[i][1] = sumy; + hist[i][2] = sumz; + + sumx += hx; + sumy += hy; + sumz += hz; + } + + assert(sumx == count && sumy == count && sumz == count); +} + +static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned int* keys, size_t count, unsigned int (&hist)[1024][3], int pass) +{ + int bitoff = pass * 10; + + for (size_t i = 0; i < count; ++i) + { + unsigned int id = (keys[source[i]] >> bitoff) & 1023; + + destination[hist[id][pass]++] = source[i]; + } +} + +} // namespace meshopt + +void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + unsigned int* keys = allocator.allocate<unsigned int>(vertex_count); + computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride); + + unsigned int hist[1024][3]; + computeHistogram(hist, keys, vertex_count); + + unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count); + + for (size_t i = 0; i < vertex_count; ++i) + destination[i] = unsigned(i); + + // 3-pass radix sort computes the resulting order into scratch + radixPass(scratch, destination, keys, vertex_count, hist, 0); + radixPass(destination, scratch, keys, vertex_count, hist, 1); + radixPass(scratch, destination, keys, vertex_count, hist, 2); + + // since our remap table is mapping old=>new, we need to reverse it + for (size_t i = 0; i < vertex_count; ++i) + destination[scratch[i]] = unsigned(i); +} + +void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + (void)vertex_count; + + size_t face_count = index_count / 3; + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + meshopt_Allocator allocator; + + float* centroids = allocator.allocate<float>(face_count * 3); + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + const float* va = vertex_positions + a * vertex_stride_float; + const float* vb = vertex_positions + b * vertex_stride_float; + const float* vc = vertex_positions + c * vertex_stride_float; + + centroids[i * 3 + 0] = (va[0] + vb[0] + vc[0]) / 3.f; + centroids[i * 3 + 1] = (va[1] + vb[1] + vc[1]) / 3.f; + centroids[i * 3 + 2] = (va[2] + vb[2] + vc[2]) / 3.f; + } + + unsigned int* remap = allocator.allocate<unsigned int>(face_count); + + meshopt_spatialSortRemap(remap, centroids, face_count, sizeof(float) * 3); + + // support in-order remap + if (destination == indices) + { + unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); + memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); + indices = indices_copy; + } + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + unsigned int r = remap[i]; + + destination[r * 3 + 0] = a; + destination[r * 3 + 1] = b; + destination[r * 3 + 2] = c; + } +} diff --git a/thirdparty/meshoptimizer/stripifier.cpp b/thirdparty/meshoptimizer/stripifier.cpp new file mode 100644 index 0000000000..8ce17ef3dc --- /dev/null +++ b/thirdparty/meshoptimizer/stripifier.cpp @@ -0,0 +1,295 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <limits.h> +#include <string.h> + +// This work is based on: +// Francine Evans, Steven Skiena and Amitabh Varshney. Optimizing Triangle Strips for Fast Rendering. 1996 +namespace meshopt +{ + +static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned int* valence) +{ + unsigned int index = 0; + unsigned int iv = ~0u; + + for (size_t i = 0; i < buffer_size; ++i) + { + unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]]; + unsigned int v = (va < vb && va < vc) ? va : (vb < vc) ? vb : vc; + + if (v < iv) + { + index = unsigned(i); + iv = v; + } + } + + return index; +} + +static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1) +{ + for (size_t i = 0; i < buffer_size; ++i) + { + unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; + + if (e0 == a && e1 == b) + return (int(i) << 2) | 2; + else if (e0 == b && e1 == c) + return (int(i) << 2) | 0; + else if (e0 == c && e1 == a) + return (int(i) << 2) | 1; + } + + return -1; +} + +} // namespace meshopt + +size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index) +{ + assert(destination != indices); + assert(index_count % 3 == 0); + + using namespace meshopt; + + meshopt_Allocator allocator; + + const size_t buffer_capacity = 8; + + unsigned int buffer[buffer_capacity][3] = {}; + unsigned int buffer_size = 0; + + size_t index_offset = 0; + + unsigned int strip[2] = {}; + unsigned int parity = 0; + + size_t strip_size = 0; + + // compute vertex valence; this is used to prioritize starting triangle for strips + unsigned int* valence = allocator.allocate<unsigned int>(vertex_count); + memset(valence, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + valence[index]++; + } + + int next = -1; + + while (buffer_size > 0 || index_offset < index_count) + { + assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3)); + + // fill triangle buffer + while (buffer_size < buffer_capacity && index_offset < index_count) + { + buffer[buffer_size][0] = indices[index_offset + 0]; + buffer[buffer_size][1] = indices[index_offset + 1]; + buffer[buffer_size][2] = indices[index_offset + 2]; + + buffer_size++; + index_offset += 3; + } + + assert(buffer_size > 0); + + if (next >= 0) + { + unsigned int i = next >> 2; + unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; + unsigned int v = buffer[i][next & 3]; + + // ordered removal from the buffer + memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0])); + buffer_size--; + + // update vertex valences for strip start heuristic + valence[a]--; + valence[b]--; + valence[c]--; + + // find next triangle (note that edge order flips on every iteration) + // in some cases we need to perform a swap to pick a different outgoing triangle edge + // for [a b c], the default strip edge is [b c], but we might want to use [a c] + int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]); + int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1; + + if (cont < 0 && swap >= 0) + { + // [a b c] => [a b a c] + destination[strip_size++] = strip[0]; + destination[strip_size++] = v; + + // next strip has same winding + // ? a b => b a v + strip[1] = v; + + next = swap; + } + else + { + // emit the next vertex in the strip + destination[strip_size++] = v; + + // next strip has flipped winding + strip[0] = strip[1]; + strip[1] = v; + parity ^= 1; + + next = cont; + } + } + else + { + // if we didn't find anything, we need to find the next new triangle + // we use a heuristic to maximize the strip length + unsigned int i = findStripFirst(buffer, buffer_size, &valence[0]); + unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; + + // ordered removal from the buffer + memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0])); + buffer_size--; + + // update vertex valences for strip start heuristic + valence[a]--; + valence[b]--; + valence[c]--; + + // we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration + int ea = findStripNext(buffer, buffer_size, c, b); + int eb = findStripNext(buffer, buffer_size, a, c); + int ec = findStripNext(buffer, buffer_size, b, a); + + // in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest + // triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear + // reasons - slightly improves the stripification efficiency + int mine = INT_MAX; + mine = (ea >= 0 && mine > ea) ? ea : mine; + mine = (eb >= 0 && mine > eb) ? eb : mine; + mine = (ec >= 0 && mine > ec) ? ec : mine; + + if (ea == mine) + { + // keep abc + next = ea; + } + else if (eb == mine) + { + // abc -> bca + unsigned int t = a; + a = b, b = c, c = t; + + next = eb; + } + else if (ec == mine) + { + // abc -> cab + unsigned int t = c; + c = b, b = a, a = t; + + next = ec; + } + + if (restart_index) + { + if (strip_size) + destination[strip_size++] = restart_index; + + destination[strip_size++] = a; + destination[strip_size++] = b; + destination[strip_size++] = c; + + // new strip always starts with the same edge winding + strip[0] = b; + strip[1] = c; + parity = 1; + } + else + { + if (strip_size) + { + // connect last strip using degenerate triangles + destination[strip_size++] = strip[1]; + destination[strip_size++] = a; + } + + // note that we may need to flip the emitted triangle based on parity + // we always end up with outgoing edge "cb" in the end + unsigned int e0 = parity ? c : b; + unsigned int e1 = parity ? b : c; + + destination[strip_size++] = a; + destination[strip_size++] = e0; + destination[strip_size++] = e1; + + strip[0] = e0; + strip[1] = e1; + parity ^= 1; + } + } + } + + return strip_size; +} + +size_t meshopt_stripifyBound(size_t index_count) +{ + assert(index_count % 3 == 0); + + // worst case without restarts is 2 degenerate indices and 3 indices per triangle + // worst case with restarts is 1 restart index and 3 indices per triangle + return (index_count / 3) * 5; +} + +size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index) +{ + assert(destination != indices); + + size_t offset = 0; + size_t start = 0; + + for (size_t i = 0; i < index_count; ++i) + { + if (restart_index && indices[i] == restart_index) + { + start = i + 1; + } + else if (i - start >= 2) + { + unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i]; + + // flip winding for odd triangles + if ((i - start) & 1) + { + unsigned int t = a; + a = b, b = t; + } + + // although we use restart indices, strip swaps still produce degenerate triangles, so skip them + if (a != b && a != c && b != c) + { + destination[offset + 0] = a; + destination[offset + 1] = b; + destination[offset + 2] = c; + offset += 3; + } + } + } + + return offset; +} + +size_t meshopt_unstripifyBound(size_t index_count) +{ + assert(index_count == 0 || index_count >= 3); + + return (index_count == 0) ? 0 : (index_count - 2) * 3; +} diff --git a/thirdparty/meshoptimizer/vcacheanalyzer.cpp b/thirdparty/meshoptimizer/vcacheanalyzer.cpp new file mode 100644 index 0000000000..3682743820 --- /dev/null +++ b/thirdparty/meshoptimizer/vcacheanalyzer.cpp @@ -0,0 +1,73 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size) +{ + assert(index_count % 3 == 0); + assert(cache_size >= 3); + assert(warp_size == 0 || warp_size >= 3); + + meshopt_Allocator allocator; + + meshopt_VertexCacheStatistics result = {}; + + unsigned int warp_offset = 0; + unsigned int primgroup_offset = 0; + + unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count); + memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); + + unsigned int timestamp = cache_size + 1; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + bool ac = (timestamp - cache_timestamps[a]) > cache_size; + bool bc = (timestamp - cache_timestamps[b]) > cache_size; + bool cc = (timestamp - cache_timestamps[c]) > cache_size; + + // flush cache if triangle doesn't fit into warp or into the primitive buffer + if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size)) + { + result.warps_executed += warp_offset > 0; + + warp_offset = 0; + primgroup_offset = 0; + + // reset cache + timestamp += cache_size + 1; + } + + // update cache and add vertices to warp + for (int j = 0; j < 3; ++j) + { + unsigned int index = indices[i + j]; + + if (timestamp - cache_timestamps[index] > cache_size) + { + cache_timestamps[index] = timestamp++; + result.vertices_transformed++; + warp_offset++; + } + } + + primgroup_offset++; + } + + size_t unique_vertex_count = 0; + + for (size_t i = 0; i < vertex_count; ++i) + unique_vertex_count += cache_timestamps[i] > 0; + + result.warps_executed += warp_offset > 0; + + result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3); + result.atvr = unique_vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(unique_vertex_count); + + return result; +} diff --git a/thirdparty/meshoptimizer/vcacheoptimizer.cpp b/thirdparty/meshoptimizer/vcacheoptimizer.cpp new file mode 100644 index 0000000000..fb8ade4b77 --- /dev/null +++ b/thirdparty/meshoptimizer/vcacheoptimizer.cpp @@ -0,0 +1,473 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +// This work is based on: +// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006 +// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007 +namespace meshopt +{ + +const size_t kCacheSizeMax = 16; +const size_t kValenceMax = 8; + +struct VertexScoreTable +{ + float cache[1 + kCacheSizeMax]; + float live[1 + kValenceMax]; +}; + +// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD +static const VertexScoreTable kVertexScoreTable = { + {0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f}, + {0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f}, +}; + +// Tuned to minimize the encoded index buffer size +static const VertexScoreTable kVertexScoreTableStrip = { + {0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f}, + {0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f}, +}; + +struct TriangleAdjacency +{ + unsigned int* counts; + unsigned int* offsets; + unsigned int* data; +}; + +static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +{ + size_t face_count = index_count / 3; + + // allocate arrays + adjacency.counts = allocator.allocate<unsigned int>(vertex_count); + adjacency.offsets = allocator.allocate<unsigned int>(vertex_count); + adjacency.data = allocator.allocate<unsigned int>(index_count); + + // fill triangle counts + memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + assert(indices[i] < vertex_count); + + adjacency.counts[indices[i]]++; + } + + // fill offset table + unsigned int offset = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + adjacency.offsets[i] = offset; + offset += adjacency.counts[i]; + } + + assert(offset == index_count); + + // fill triangle data + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + + adjacency.data[adjacency.offsets[a]++] = unsigned(i); + adjacency.data[adjacency.offsets[b]++] = unsigned(i); + adjacency.data[adjacency.offsets[c]++] = unsigned(i); + } + + // fix offsets that have been disturbed by the previous pass + for (size_t i = 0; i < vertex_count; ++i) + { + assert(adjacency.offsets[i] >= adjacency.counts[i]); + + adjacency.offsets[i] -= adjacency.counts[i]; + } +} + +static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count) +{ + // check dead-end stack + while (dead_end_top) + { + unsigned int vertex = dead_end[--dead_end_top]; + + if (live_triangles[vertex] > 0) + return vertex; + } + + // input order + while (input_cursor < vertex_count) + { + if (live_triangles[input_cursor] > 0) + return input_cursor; + + ++input_cursor; + } + + return ~0u; +} + +static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size) +{ + unsigned int best_candidate = ~0u; + int best_priority = -1; + + for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate) + { + unsigned int vertex = *next_candidate; + + // otherwise we don't need to process it + if (live_triangles[vertex] > 0) + { + int priority = 0; + + // will it be in cache after fanning? + if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size) + { + priority = timestamp - cache_timestamps[vertex]; // position in cache + } + + if (priority > best_priority) + { + best_candidate = vertex; + best_priority = priority; + } + } + } + + return best_candidate; +} + +static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles) +{ + assert(cache_position >= -1 && cache_position < int(kCacheSizeMax)); + + unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax; + + return table->cache[1 + cache_position] + table->live[live_triangles_clamped]; +} + +static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count) +{ + // input order + while (input_cursor < face_count) + { + if (!emitted_flags[input_cursor]) + return input_cursor; + + ++input_cursor; + } + + return ~0u; +} + +} // namespace meshopt + +void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + + meshopt_Allocator allocator; + + // guard for empty meshes + if (index_count == 0 || vertex_count == 0) + return; + + // support in-place optimization + if (destination == indices) + { + unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); + memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); + indices = indices_copy; + } + + unsigned int cache_size = 16; + assert(cache_size <= kCacheSizeMax); + + size_t face_count = index_count / 3; + + // build adjacency information + TriangleAdjacency adjacency = {}; + buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); + + // live triangle counts + unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); + memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); + + // emitted flags + unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); + memset(emitted_flags, 0, face_count); + + // compute initial vertex scores + float* vertex_scores = allocator.allocate<float>(vertex_count); + + for (size_t i = 0; i < vertex_count; ++i) + vertex_scores[i] = vertexScore(table, -1, live_triangles[i]); + + // compute triangle scores + float* triangle_scores = allocator.allocate<float>(face_count); + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0]; + unsigned int b = indices[i * 3 + 1]; + unsigned int c = indices[i * 3 + 2]; + + triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c]; + } + + unsigned int cache_holder[2 * (kCacheSizeMax + 3)]; + unsigned int* cache = cache_holder; + unsigned int* cache_new = cache_holder + kCacheSizeMax + 3; + size_t cache_count = 0; + + unsigned int current_triangle = 0; + unsigned int input_cursor = 1; + + unsigned int output_triangle = 0; + + while (current_triangle != ~0u) + { + assert(output_triangle < face_count); + + unsigned int a = indices[current_triangle * 3 + 0]; + unsigned int b = indices[current_triangle * 3 + 1]; + unsigned int c = indices[current_triangle * 3 + 2]; + + // output indices + destination[output_triangle * 3 + 0] = a; + destination[output_triangle * 3 + 1] = b; + destination[output_triangle * 3 + 2] = c; + output_triangle++; + + // update emitted flags + emitted_flags[current_triangle] = true; + triangle_scores[current_triangle] = 0; + + // new triangle + size_t cache_write = 0; + cache_new[cache_write++] = a; + cache_new[cache_write++] = b; + cache_new[cache_write++] = c; + + // old triangles + for (size_t i = 0; i < cache_count; ++i) + { + unsigned int index = cache[i]; + + if (index != a && index != b && index != c) + { + cache_new[cache_write++] = index; + } + } + + unsigned int* cache_temp = cache; + cache = cache_new, cache_new = cache_temp; + cache_count = cache_write > cache_size ? cache_size : cache_write; + + // update live triangle counts + live_triangles[a]--; + live_triangles[b]--; + live_triangles[c]--; + + // remove emitted triangle from adjacency data + // this makes sure that we spend less time traversing these lists on subsequent iterations + for (size_t k = 0; k < 3; ++k) + { + unsigned int index = indices[current_triangle * 3 + k]; + + unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbours_size = adjacency.counts[index]; + + for (size_t i = 0; i < neighbours_size; ++i) + { + unsigned int tri = neighbours[i]; + + if (tri == current_triangle) + { + neighbours[i] = neighbours[neighbours_size - 1]; + adjacency.counts[index]--; + break; + } + } + } + + unsigned int best_triangle = ~0u; + float best_score = 0; + + // update cache positions, vertex scores and triangle scores, and find next best triangle + for (size_t i = 0; i < cache_write; ++i) + { + unsigned int index = cache[i]; + + int cache_position = i >= cache_size ? -1 : int(i); + + // update vertex score + float score = vertexScore(table, cache_position, live_triangles[index]); + float score_diff = score - vertex_scores[index]; + + vertex_scores[index] = score; + + // update scores of vertex triangles + const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index]; + const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index]; + + for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) + { + unsigned int tri = *it; + assert(!emitted_flags[tri]); + + float tri_score = triangle_scores[tri] + score_diff; + assert(tri_score > 0); + + if (best_score < tri_score) + { + best_triangle = tri; + best_score = tri_score; + } + + triangle_scores[tri] = tri_score; + } + } + + // step through input triangles in order if we hit a dead-end + current_triangle = best_triangle; + + if (current_triangle == ~0u) + { + current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count); + } + } + + assert(input_cursor == face_count); + assert(output_triangle == face_count); +} + +void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +{ + meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable); +} + +void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +{ + meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip); +} + +void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(cache_size >= 3); + + meshopt_Allocator allocator; + + // guard for empty meshes + if (index_count == 0 || vertex_count == 0) + return; + + // support in-place optimization + if (destination == indices) + { + unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); + memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); + indices = indices_copy; + } + + size_t face_count = index_count / 3; + + // build adjacency information + TriangleAdjacency adjacency = {}; + buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); + + // live triangle counts + unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); + memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); + + // cache time stamps + unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count); + memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); + + // dead-end stack + unsigned int* dead_end = allocator.allocate<unsigned int>(index_count); + unsigned int dead_end_top = 0; + + // emitted flags + unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); + memset(emitted_flags, 0, face_count); + + unsigned int current_vertex = 0; + + unsigned int timestamp = cache_size + 1; + unsigned int input_cursor = 1; // vertex to restart from in case of dead-end + + unsigned int output_triangle = 0; + + while (current_vertex != ~0u) + { + const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top; + + // emit all vertex neighbours + const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex]; + const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex]; + + for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) + { + unsigned int triangle = *it; + + if (!emitted_flags[triangle]) + { + unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; + + // output indices + destination[output_triangle * 3 + 0] = a; + destination[output_triangle * 3 + 1] = b; + destination[output_triangle * 3 + 2] = c; + output_triangle++; + + // update dead-end stack + dead_end[dead_end_top + 0] = a; + dead_end[dead_end_top + 1] = b; + dead_end[dead_end_top + 2] = c; + dead_end_top += 3; + + // update live triangle counts + live_triangles[a]--; + live_triangles[b]--; + live_triangles[c]--; + + // update cache info + // if vertex is not in cache, put it in cache + if (timestamp - cache_timestamps[a] > cache_size) + cache_timestamps[a] = timestamp++; + + if (timestamp - cache_timestamps[b] > cache_size) + cache_timestamps[b] = timestamp++; + + if (timestamp - cache_timestamps[c] > cache_size) + cache_timestamps[c] = timestamp++; + + // update emitted flags + emitted_flags[triangle] = true; + } + } + + // next candidates are the ones we pushed to dead-end stack just now + const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top; + + // get next vertex + current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size); + + if (current_vertex == ~0u) + { + current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count); + } + } + + assert(output_triangle == face_count); +} diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp new file mode 100644 index 0000000000..784c9a13db --- /dev/null +++ b/thirdparty/meshoptimizer/vertexcodec.cpp @@ -0,0 +1,1265 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +// The block below auto-detects SIMD ISA that can be used on the target platform +#ifndef MESHOPTIMIZER_NO_SIMD + +// The SIMD implementation requires SSSE3, which can be enabled unconditionally through compiler settings +#if defined(__AVX__) || defined(__SSSE3__) +#define SIMD_SSE +#endif + +// An experimental implementation using AVX512 instructions; it's only enabled when AVX512 is enabled through compiler settings +#if defined(__AVX512VBMI2__) && defined(__AVX512VBMI__) && defined(__AVX512VL__) && defined(__POPCNT__) +#undef SIMD_SSE +#define SIMD_AVX +#endif + +// MSVC supports compiling SSSE3 code regardless of compile options; we use a cpuid-based scalar fallback +#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) +#define SIMD_SSE +#define SIMD_FALLBACK +#endif + +// GCC 4.9+ and clang 3.8+ support targeting SIMD ISA from individual functions; we use a cpuid-based scalar fallback +#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && ((defined(__clang__) && __clang_major__ * 100 + __clang_minor__ >= 308) || (defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ >= 409)) && (defined(__i386__) || defined(__x86_64__)) +#define SIMD_SSE +#define SIMD_FALLBACK +#define SIMD_TARGET __attribute__((target("ssse3"))) +#endif + +// GCC/clang define these when NEON support is available +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define SIMD_NEON +#endif + +// On MSVC, we assume that ARM builds always target NEON-capable devices +#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) +#define SIMD_NEON +#endif + +// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD +#if defined(__wasm_simd128__) +#define SIMD_WASM +#endif + +#ifndef SIMD_TARGET +#define SIMD_TARGET +#endif + +#endif // !MESHOPTIMIZER_NO_SIMD + +#ifdef SIMD_SSE +#include <tmmintrin.h> +#endif + +#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) +#ifdef _MSC_VER +#include <intrin.h> // __cpuid +#else +#include <cpuid.h> // __cpuid +#endif +#endif + +#ifdef SIMD_AVX +#include <immintrin.h> +#endif + +#ifdef SIMD_NEON +#if defined(_MSC_VER) && defined(_M_ARM64) +#include <arm64_neon.h> +#else +#include <arm_neon.h> +#endif +#endif + +#ifdef SIMD_WASM +#include <wasm_simd128.h> +#endif + +#ifndef TRACE +#define TRACE 0 +#endif + +#if TRACE +#include <stdio.h> +#endif + +#ifdef SIMD_WASM +#define wasmx_splat_v32x4(v, i) wasm_v32x4_shuffle(v, v, i, i, i, i) +#define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) +#define wasmx_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31) +#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) +#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) +#define wasmx_unpacklo_v64x2(a, b) wasm_v64x2_shuffle(a, b, 0, 2) +#define wasmx_unpackhi_v64x2(a, b) wasm_v64x2_shuffle(a, b, 1, 3) +#endif + +namespace meshopt +{ + +const unsigned char kVertexHeader = 0xa0; + +static int gEncodeVertexVersion = 0; + +const size_t kVertexBlockSizeBytes = 8192; +const size_t kVertexBlockMaxSize = 256; +const size_t kByteGroupSize = 16; +const size_t kByteGroupDecodeLimit = 24; +const size_t kTailMaxSize = 32; + +static size_t getVertexBlockSize(size_t vertex_size) +{ + // make sure the entire block fits into the scratch buffer + size_t result = kVertexBlockSizeBytes / vertex_size; + + // align to byte group size; we encode each byte as a byte group + // if vertex block is misaligned, it results in wasted bytes, so just truncate the block size + result &= ~(kByteGroupSize - 1); + + return (result < kVertexBlockMaxSize) ? result : kVertexBlockMaxSize; +} + +inline unsigned char zigzag8(unsigned char v) +{ + return ((signed char)(v) >> 7) ^ (v << 1); +} + +inline unsigned char unzigzag8(unsigned char v) +{ + return -(v & 1) ^ (v >> 1); +} + +#if TRACE +struct Stats +{ + size_t size; + size_t header; + size_t bitg[4]; + size_t bitb[4]; +}; + +Stats* bytestats; +Stats vertexstats[256]; +#endif + +static bool encodeBytesGroupZero(const unsigned char* buffer) +{ + for (size_t i = 0; i < kByteGroupSize; ++i) + if (buffer[i]) + return false; + + return true; +} + +static size_t encodeBytesGroupMeasure(const unsigned char* buffer, int bits) +{ + assert(bits >= 1 && bits <= 8); + + if (bits == 1) + return encodeBytesGroupZero(buffer) ? 0 : size_t(-1); + + if (bits == 8) + return kByteGroupSize; + + size_t result = kByteGroupSize * bits / 8; + + unsigned char sentinel = (1 << bits) - 1; + + for (size_t i = 0; i < kByteGroupSize; ++i) + result += buffer[i] >= sentinel; + + return result; +} + +static unsigned char* encodeBytesGroup(unsigned char* data, const unsigned char* buffer, int bits) +{ + assert(bits >= 1 && bits <= 8); + + if (bits == 1) + return data; + + if (bits == 8) + { + memcpy(data, buffer, kByteGroupSize); + return data + kByteGroupSize; + } + + size_t byte_size = 8 / bits; + assert(kByteGroupSize % byte_size == 0); + + // fixed portion: bits bits for each value + // variable portion: full byte for each out-of-range value (using 1...1 as sentinel) + unsigned char sentinel = (1 << bits) - 1; + + for (size_t i = 0; i < kByteGroupSize; i += byte_size) + { + unsigned char byte = 0; + + for (size_t k = 0; k < byte_size; ++k) + { + unsigned char enc = (buffer[i + k] >= sentinel) ? sentinel : buffer[i + k]; + + byte <<= bits; + byte |= enc; + } + + *data++ = byte; + } + + for (size_t i = 0; i < kByteGroupSize; ++i) + { + if (buffer[i] >= sentinel) + { + *data++ = buffer[i]; + } + } + + return data; +} + +static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, const unsigned char* buffer, size_t buffer_size) +{ + assert(buffer_size % kByteGroupSize == 0); + + unsigned char* header = data; + + // round number of groups to 4 to get number of header bytes + size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; + + if (size_t(data_end - data) < header_size) + return 0; + + data += header_size; + + memset(header, 0, header_size); + + for (size_t i = 0; i < buffer_size; i += kByteGroupSize) + { + if (size_t(data_end - data) < kByteGroupDecodeLimit) + return 0; + + int best_bits = 8; + size_t best_size = encodeBytesGroupMeasure(buffer + i, 8); + + for (int bits = 1; bits < 8; bits *= 2) + { + size_t size = encodeBytesGroupMeasure(buffer + i, bits); + + if (size < best_size) + { + best_bits = bits; + best_size = size; + } + } + + int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2) ? 1 : (best_bits == 4) ? 2 : 3; + assert((1 << bitslog2) == best_bits); + + size_t header_offset = i / kByteGroupSize; + + header[header_offset / 4] |= bitslog2 << ((header_offset % 4) * 2); + + unsigned char* next = encodeBytesGroup(data, buffer + i, best_bits); + + assert(data + best_size == next); + data = next; + +#if TRACE > 1 + bytestats->bitg[bitslog2]++; + bytestats->bitb[bitslog2] += best_size; +#endif + } + +#if TRACE > 1 + bytestats->header += header_size; +#endif + + return data; +} + +static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data_end, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +{ + assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + + unsigned char buffer[kVertexBlockMaxSize]; + assert(sizeof(buffer) % kByteGroupSize == 0); + + // we sometimes encode elements we didn't fill when rounding to kByteGroupSize + memset(buffer, 0, sizeof(buffer)); + + for (size_t k = 0; k < vertex_size; ++k) + { + size_t vertex_offset = k; + + unsigned char p = last_vertex[k]; + + for (size_t i = 0; i < vertex_count; ++i) + { + buffer[i] = zigzag8(vertex_data[vertex_offset] - p); + + p = vertex_data[vertex_offset]; + + vertex_offset += vertex_size; + } + +#if TRACE + const unsigned char* olddata = data; + bytestats = &vertexstats[k]; +#endif + + data = encodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1)); + if (!data) + return 0; + +#if TRACE + bytestats = 0; + vertexstats[k].size += data - olddata; +#endif + } + + memcpy(last_vertex, &vertex_data[vertex_size * (vertex_count - 1)], vertex_size); + + return data; +} + +#if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX)) +static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ +#define READ() byte = *data++ +#define NEXT(bits) enc = byte >> (8 - bits), byte <<= bits, encv = *data_var, *buffer++ = (enc == (1 << bits) - 1) ? encv : enc, data_var += (enc == (1 << bits) - 1) + + unsigned char byte, enc, encv; + const unsigned char* data_var; + + switch (bitslog2) + { + case 0: + memset(buffer, 0, kByteGroupSize); + return data; + case 1: + data_var = data + 4; + + // 4 groups with 4 2-bit values in each byte + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + + return data_var; + case 2: + data_var = data + 8; + + // 8 groups with 2 4-bit values in each byte + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + + return data_var; + case 3: + memcpy(buffer, data, kByteGroupSize); + return data + kByteGroupSize; + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } + +#undef READ +#undef NEXT +} + +static const unsigned char* decodeBytes(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) +{ + assert(buffer_size % kByteGroupSize == 0); + + const unsigned char* header = data; + + // round number of groups to 4 to get number of header bytes + size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; + + if (size_t(data_end - data) < header_size) + return 0; + + data += header_size; + + for (size_t i = 0; i < buffer_size; i += kByteGroupSize) + { + if (size_t(data_end - data) < kByteGroupDecodeLimit) + return 0; + + size_t header_offset = i / kByteGroupSize; + + int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; + + data = decodeBytesGroup(data, buffer + i, bitslog2); + } + + return data; +} + +static const unsigned char* decodeVertexBlock(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +{ + assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + + unsigned char buffer[kVertexBlockMaxSize]; + unsigned char transposed[kVertexBlockSizeBytes]; + + size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); + + for (size_t k = 0; k < vertex_size; ++k) + { + data = decodeBytes(data, data_end, buffer, vertex_count_aligned); + if (!data) + return 0; + + size_t vertex_offset = k; + + unsigned char p = last_vertex[k]; + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned char v = unzigzag8(buffer[i]) + p; + + transposed[vertex_offset] = v; + p = v; + + vertex_offset += vertex_size; + } + } + + memcpy(vertex_data, transposed, vertex_count * vertex_size); + + memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size); + + return data; +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +static unsigned char kDecodeBytesGroupShuffle[256][8]; +static unsigned char kDecodeBytesGroupCount[256]; + +#ifdef __wasm__ +__attribute__((cold)) // this saves 500 bytes in the output binary - we don't need to vectorize this loop! +#endif +static bool +decodeBytesGroupBuildTables() +{ + for (int mask = 0; mask < 256; ++mask) + { + unsigned char shuffle[8]; + unsigned char count = 0; + + for (int i = 0; i < 8; ++i) + { + int maski = (mask >> i) & 1; + shuffle[i] = maski ? count : 0x80; + count += (unsigned char)(maski); + } + + memcpy(kDecodeBytesGroupShuffle[mask], shuffle, 8); + kDecodeBytesGroupCount[mask] = count; + } + + return true; +} + +static bool gDecodeBytesGroupInitialized = decodeBytesGroupBuildTables(); +#endif + +#ifdef SIMD_SSE +SIMD_TARGET +static __m128i decodeShuffleMask(unsigned char mask0, unsigned char mask1) +{ + __m128i sm0 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&kDecodeBytesGroupShuffle[mask0])); + __m128i sm1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&kDecodeBytesGroupShuffle[mask1])); + __m128i sm1off = _mm_set1_epi8(kDecodeBytesGroupCount[mask0]); + + __m128i sm1r = _mm_add_epi8(sm1, sm1off); + + return _mm_unpacklo_epi64(sm0, sm1r); +} + +SIMD_TARGET +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + switch (bitslog2) + { + case 0: + { + __m128i result = _mm_setzero_si128(); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data; + } + + case 1: + { +#ifdef __GNUC__ + typedef int __attribute__((aligned(1))) unaligned_int; +#else + typedef int unaligned_int; +#endif + + __m128i sel2 = _mm_cvtsi32_si128(*reinterpret_cast<const unaligned_int*>(data)); + __m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 4)); + + __m128i sel22 = _mm_unpacklo_epi8(_mm_srli_epi16(sel2, 4), sel2); + __m128i sel2222 = _mm_unpacklo_epi8(_mm_srli_epi16(sel22, 2), sel22); + __m128i sel = _mm_and_si128(sel2222, _mm_set1_epi8(3)); + + __m128i mask = _mm_cmpeq_epi8(sel, _mm_set1_epi8(3)); + int mask16 = _mm_movemask_epi8(mask); + unsigned char mask0 = (unsigned char)(mask16 & 255); + unsigned char mask1 = (unsigned char)(mask16 >> 8); + + __m128i shuf = decodeShuffleMask(mask0, mask1); + + __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 2: + { + __m128i sel4 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(data)); + __m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 8)); + + __m128i sel44 = _mm_unpacklo_epi8(_mm_srli_epi16(sel4, 4), sel4); + __m128i sel = _mm_and_si128(sel44, _mm_set1_epi8(15)); + + __m128i mask = _mm_cmpeq_epi8(sel, _mm_set1_epi8(15)); + int mask16 = _mm_movemask_epi8(mask); + unsigned char mask0 = (unsigned char)(mask16 & 255); + unsigned char mask1 = (unsigned char)(mask16 >> 8); + + __m128i shuf = decodeShuffleMask(mask0, mask1); + + __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 3: + { + __m128i result = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#ifdef SIMD_AVX +static const __m128i decodeBytesGroupConfig[] = { + _mm_set1_epi8(3), + _mm_set1_epi8(15), + _mm_setr_epi8(6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24), + _mm_setr_epi8(4, 0, 12, 8, 20, 16, 28, 24, 36, 32, 44, 40, 52, 48, 60, 56), +}; + +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + switch (bitslog2) + { + case 0: + { + __m128i result = _mm_setzero_si128(); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data; + } + + case 1: + case 2: + { + const unsigned char* skip = data + (bitslog2 << 2); + + __m128i selb = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(data)); + __m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(skip)); + + __m128i sent = decodeBytesGroupConfig[bitslog2 - 1]; + __m128i ctrl = decodeBytesGroupConfig[bitslog2 + 1]; + + __m128i selw = _mm_shuffle_epi32(selb, 0x44); + __m128i sel = _mm_and_si128(sent, _mm_multishift_epi64_epi8(ctrl, selw)); + __mmask16 mask16 = _mm_cmp_epi8_mask(sel, sent, _MM_CMPINT_EQ); + + __m128i result = _mm_mask_expand_epi8(sel, mask16, rest); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return skip + _mm_popcnt_u32(mask16); + } + + case 3: + { + __m128i result = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#ifdef SIMD_NEON +static uint8x16_t shuffleBytes(unsigned char mask0, unsigned char mask1, uint8x8_t rest0, uint8x8_t rest1) +{ + uint8x8_t sm0 = vld1_u8(kDecodeBytesGroupShuffle[mask0]); + uint8x8_t sm1 = vld1_u8(kDecodeBytesGroupShuffle[mask1]); + + uint8x8_t r0 = vtbl1_u8(rest0, sm0); + uint8x8_t r1 = vtbl1_u8(rest1, sm1); + + return vcombine_u8(r0, r1); +} + +static void neonMoveMask(uint8x16_t mask, unsigned char& mask0, unsigned char& mask1) +{ + static const unsigned char byte_mask_data[16] = {1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128}; + + uint8x16_t byte_mask = vld1q_u8(byte_mask_data); + uint8x16_t masked = vandq_u8(mask, byte_mask); + +#ifdef __aarch64__ + // aarch64 has horizontal sums; MSVC doesn't expose this via arm64_neon.h so this path is exclusive to clang/gcc + mask0 = vaddv_u8(vget_low_u8(masked)); + mask1 = vaddv_u8(vget_high_u8(masked)); +#else + // we need horizontal sums of each half of masked, which can be done in 3 steps (yielding sums of sizes 2, 4, 8) + uint8x8_t sum1 = vpadd_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint8x8_t sum2 = vpadd_u8(sum1, sum1); + uint8x8_t sum3 = vpadd_u8(sum2, sum2); + + mask0 = vget_lane_u8(sum3, 0); + mask1 = vget_lane_u8(sum3, 1); +#endif +} + +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + switch (bitslog2) + { + case 0: + { + uint8x16_t result = vdupq_n_u8(0); + + vst1q_u8(buffer, result); + + return data; + } + + case 1: + { + uint8x8_t sel2 = vld1_u8(data); + uint8x8_t sel22 = vzip_u8(vshr_n_u8(sel2, 4), sel2).val[0]; + uint8x8x2_t sel2222 = vzip_u8(vshr_n_u8(sel22, 2), sel22); + uint8x16_t sel = vandq_u8(vcombine_u8(sel2222.val[0], sel2222.val[1]), vdupq_n_u8(3)); + + uint8x16_t mask = vceqq_u8(sel, vdupq_n_u8(3)); + unsigned char mask0, mask1; + neonMoveMask(mask, mask0, mask1); + + uint8x8_t rest0 = vld1_u8(data + 4); + uint8x8_t rest1 = vld1_u8(data + 4 + kDecodeBytesGroupCount[mask0]); + + uint8x16_t result = vbslq_u8(mask, shuffleBytes(mask0, mask1, rest0, rest1), sel); + + vst1q_u8(buffer, result); + + return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 2: + { + uint8x8_t sel4 = vld1_u8(data); + uint8x8x2_t sel44 = vzip_u8(vshr_n_u8(sel4, 4), vand_u8(sel4, vdup_n_u8(15))); + uint8x16_t sel = vcombine_u8(sel44.val[0], sel44.val[1]); + + uint8x16_t mask = vceqq_u8(sel, vdupq_n_u8(15)); + unsigned char mask0, mask1; + neonMoveMask(mask, mask0, mask1); + + uint8x8_t rest0 = vld1_u8(data + 8); + uint8x8_t rest1 = vld1_u8(data + 8 + kDecodeBytesGroupCount[mask0]); + + uint8x16_t result = vbslq_u8(mask, shuffleBytes(mask0, mask1, rest0, rest1), sel); + + vst1q_u8(buffer, result); + + return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 3: + { + uint8x16_t result = vld1q_u8(data); + + vst1q_u8(buffer, result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#ifdef SIMD_WASM +SIMD_TARGET +static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1) +{ + v128_t sm0 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask0]); + v128_t sm1 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask1]); + + v128_t sm1off = wasm_v128_load(&kDecodeBytesGroupCount[mask0]); + sm1off = wasm_v8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + + v128_t sm1r = wasm_i8x16_add(sm1, sm1off); + + return wasmx_unpacklo_v64x2(sm0, sm1r); +} + +SIMD_TARGET +static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1) +{ + v128_t mask_0 = wasm_v32x4_shuffle(mask, mask, 0, 2, 1, 3); + + uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull; + uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull; + + // TODO: This can use v8x16_bitmask in the future + uint64_t mask_2 = mask_1a | mask_1b; + uint64_t mask_4 = mask_2 | (mask_2 >> 16); + uint64_t mask_8 = mask_4 | (mask_4 >> 8); + + mask0 = uint8_t(mask_8); + mask1 = uint8_t(mask_8 >> 32); +} + +SIMD_TARGET +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + unsigned char byte, enc, encv; + const unsigned char* data_var; + + switch (bitslog2) + { + case 0: + { + v128_t result = wasm_i8x16_splat(0); + + wasm_v128_store(buffer, result); + + return data; + } + + case 1: + { + v128_t sel2 = wasm_v128_load(data); + v128_t rest = wasm_v128_load(data + 4); + + v128_t sel22 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel2, 4), sel2); + v128_t sel2222 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel22, 2), sel22); + v128_t sel = wasm_v128_and(sel2222, wasm_i8x16_splat(3)); + + v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(3)); + + unsigned char mask0, mask1; + wasmMoveMask(mask, mask0, mask1); + + v128_t shuf = decodeShuffleMask(mask0, mask1); + + v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask); + + wasm_v128_store(buffer, result); + + return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 2: + { + v128_t sel4 = wasm_v128_load(data); + v128_t rest = wasm_v128_load(data + 8); + + v128_t sel44 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel4, 4), sel4); + v128_t sel = wasm_v128_and(sel44, wasm_i8x16_splat(15)); + + v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(15)); + + unsigned char mask0, mask1; + wasmMoveMask(mask, mask0, mask1); + + v128_t shuf = decodeShuffleMask(mask0, mask1); + + v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask); + + wasm_v128_store(buffer, result); + + return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 3: + { + v128_t result = wasm_v128_load(data); + + wasm_v128_store(buffer, result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_AVX) +SIMD_TARGET +static void transpose8(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3) +{ + __m128i t0 = _mm_unpacklo_epi8(x0, x1); + __m128i t1 = _mm_unpackhi_epi8(x0, x1); + __m128i t2 = _mm_unpacklo_epi8(x2, x3); + __m128i t3 = _mm_unpackhi_epi8(x2, x3); + + x0 = _mm_unpacklo_epi16(t0, t2); + x1 = _mm_unpackhi_epi16(t0, t2); + x2 = _mm_unpacklo_epi16(t1, t3); + x3 = _mm_unpackhi_epi16(t1, t3); +} + +SIMD_TARGET +static __m128i unzigzag8(__m128i v) +{ + __m128i xl = _mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(v, _mm_set1_epi8(1))); + __m128i xr = _mm_and_si128(_mm_srli_epi16(v, 1), _mm_set1_epi8(127)); + + return _mm_xor_si128(xl, xr); +} +#endif + +#ifdef SIMD_NEON +static void transpose8(uint8x16_t& x0, uint8x16_t& x1, uint8x16_t& x2, uint8x16_t& x3) +{ + uint8x16x2_t t01 = vzipq_u8(x0, x1); + uint8x16x2_t t23 = vzipq_u8(x2, x3); + + uint16x8x2_t x01 = vzipq_u16(vreinterpretq_u16_u8(t01.val[0]), vreinterpretq_u16_u8(t23.val[0])); + uint16x8x2_t x23 = vzipq_u16(vreinterpretq_u16_u8(t01.val[1]), vreinterpretq_u16_u8(t23.val[1])); + + x0 = vreinterpretq_u8_u16(x01.val[0]); + x1 = vreinterpretq_u8_u16(x01.val[1]); + x2 = vreinterpretq_u8_u16(x23.val[0]); + x3 = vreinterpretq_u8_u16(x23.val[1]); +} + +static uint8x16_t unzigzag8(uint8x16_t v) +{ + uint8x16_t xl = vreinterpretq_u8_s8(vnegq_s8(vreinterpretq_s8_u8(vandq_u8(v, vdupq_n_u8(1))))); + uint8x16_t xr = vshrq_n_u8(v, 1); + + return veorq_u8(xl, xr); +} +#endif + +#ifdef SIMD_WASM +SIMD_TARGET +static void transpose8(v128_t& x0, v128_t& x1, v128_t& x2, v128_t& x3) +{ + v128_t t0 = wasmx_unpacklo_v8x16(x0, x1); + v128_t t1 = wasmx_unpackhi_v8x16(x0, x1); + v128_t t2 = wasmx_unpacklo_v8x16(x2, x3); + v128_t t3 = wasmx_unpackhi_v8x16(x2, x3); + + x0 = wasmx_unpacklo_v16x8(t0, t2); + x1 = wasmx_unpackhi_v16x8(t0, t2); + x2 = wasmx_unpacklo_v16x8(t1, t3); + x3 = wasmx_unpackhi_v16x8(t1, t3); +} + +SIMD_TARGET +static v128_t unzigzag8(v128_t v) +{ + v128_t xl = wasm_i8x16_neg(wasm_v128_and(v, wasm_i8x16_splat(1))); + v128_t xr = wasm_u8x16_shr(v, 1); + + return wasm_v128_xor(xl, xr); +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM) +SIMD_TARGET +static const unsigned char* decodeBytesSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) +{ + assert(buffer_size % kByteGroupSize == 0); + assert(kByteGroupSize == 16); + + const unsigned char* header = data; + + // round number of groups to 4 to get number of header bytes + size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; + + if (size_t(data_end - data) < header_size) + return 0; + + data += header_size; + + size_t i = 0; + + // fast-path: process 4 groups at a time, do a shared bounds check - each group reads <=24b + for (; i + kByteGroupSize * 4 <= buffer_size && size_t(data_end - data) >= kByteGroupDecodeLimit * 4; i += kByteGroupSize * 4) + { + size_t header_offset = i / kByteGroupSize; + unsigned char header_byte = header[header_offset / 4]; + + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 0, (header_byte >> 0) & 3); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 1, (header_byte >> 2) & 3); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 2, (header_byte >> 4) & 3); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 3, (header_byte >> 6) & 3); + } + + // slow-path: process remaining groups + for (; i < buffer_size; i += kByteGroupSize) + { + if (size_t(data_end - data) < kByteGroupDecodeLimit) + return 0; + + size_t header_offset = i / kByteGroupSize; + + int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; + + data = decodeBytesGroupSimd(data, buffer + i, bitslog2); + } + + return data; +} + +SIMD_TARGET +static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +{ + assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + + unsigned char buffer[kVertexBlockMaxSize * 4]; + unsigned char transposed[kVertexBlockSizeBytes]; + + size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); + + for (size_t k = 0; k < vertex_size; k += 4) + { + for (size_t j = 0; j < 4; ++j) + { + data = decodeBytesSimd(data, data_end, buffer + j * vertex_count_aligned, vertex_count_aligned); + if (!data) + return 0; + } + +#if defined(SIMD_SSE) || defined(SIMD_AVX) +#define TEMP __m128i +#define PREP() __m128i pi = _mm_cvtsi32_si128(*reinterpret_cast<const int*>(last_vertex + k)) +#define LOAD(i) __m128i r##i = _mm_loadu_si128(reinterpret_cast<const __m128i*>(buffer + j + i * vertex_count_aligned)) +#define GRP4(i) t0 = _mm_shuffle_epi32(r##i, 0), t1 = _mm_shuffle_epi32(r##i, 1), t2 = _mm_shuffle_epi32(r##i, 2), t3 = _mm_shuffle_epi32(r##i, 3) +#define FIXD(i) t##i = pi = _mm_add_epi8(pi, t##i) +#define SAVE(i) *reinterpret_cast<int*>(savep) = _mm_cvtsi128_si32(t##i), savep += vertex_size +#endif + +#ifdef SIMD_NEON +#define TEMP uint8x8_t +#define PREP() uint8x8_t pi = vreinterpret_u8_u32(vld1_lane_u32(reinterpret_cast<uint32_t*>(last_vertex + k), vdup_n_u32(0), 0)) +#define LOAD(i) uint8x16_t r##i = vld1q_u8(buffer + j + i * vertex_count_aligned) +#define GRP4(i) t0 = vget_low_u8(r##i), t1 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t0), 1)), t2 = vget_high_u8(r##i), t3 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t2), 1)) +#define FIXD(i) t##i = pi = vadd_u8(pi, t##i) +#define SAVE(i) vst1_lane_u32(reinterpret_cast<uint32_t*>(savep), vreinterpret_u32_u8(t##i), 0), savep += vertex_size +#endif + +#ifdef SIMD_WASM +#define TEMP v128_t +#define PREP() v128_t pi = wasm_v128_load(last_vertex + k) +#define LOAD(i) v128_t r##i = wasm_v128_load(buffer + j + i * vertex_count_aligned) +#define GRP4(i) t0 = wasmx_splat_v32x4(r##i, 0), t1 = wasmx_splat_v32x4(r##i, 1), t2 = wasmx_splat_v32x4(r##i, 2), t3 = wasmx_splat_v32x4(r##i, 3) +#define FIXD(i) t##i = pi = wasm_i8x16_add(pi, t##i) +#define SAVE(i) *reinterpret_cast<int*>(savep) = wasm_i32x4_extract_lane(t##i, 0), savep += vertex_size +#endif + + PREP(); + + unsigned char* savep = transposed + k; + + for (size_t j = 0; j < vertex_count_aligned; j += 16) + { + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + + r0 = unzigzag8(r0); + r1 = unzigzag8(r1); + r2 = unzigzag8(r2); + r3 = unzigzag8(r3); + + transpose8(r0, r1, r2, r3); + + TEMP t0, t1, t2, t3; + + GRP4(0); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + GRP4(1); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + GRP4(2); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + GRP4(3); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + +#undef TEMP +#undef PREP +#undef LOAD +#undef GRP4 +#undef FIXD +#undef SAVE + } + } + + memcpy(vertex_data, transposed, vertex_count * vertex_size); + + memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size); + + return data; +} +#endif + +#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) +static unsigned int getCpuFeatures() +{ + int cpuinfo[4] = {}; +#ifdef _MSC_VER + __cpuid(cpuinfo, 1); +#else + __cpuid(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); +#endif + return cpuinfo[2]; +} + +unsigned int cpuid = getCpuFeatures(); +#endif + +} // namespace meshopt + +size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size % 4 == 0); + +#if TRACE + memset(vertexstats, 0, sizeof(vertexstats)); +#endif + + const unsigned char* vertex_data = static_cast<const unsigned char*>(vertices); + + unsigned char* data = buffer; + unsigned char* data_end = buffer + buffer_size; + + if (size_t(data_end - data) < 1 + vertex_size) + return 0; + + int version = gEncodeVertexVersion; + + *data++ = (unsigned char)(kVertexHeader | version); + + unsigned char first_vertex[256] = {}; + if (vertex_count > 0) + memcpy(first_vertex, vertex_data, vertex_size); + + unsigned char last_vertex[256] = {}; + memcpy(last_vertex, first_vertex, vertex_size); + + size_t vertex_block_size = getVertexBlockSize(vertex_size); + + size_t vertex_offset = 0; + + while (vertex_offset < vertex_count) + { + size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset; + + data = encodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex); + if (!data) + return 0; + + vertex_offset += block_size; + } + + size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + + if (size_t(data_end - data) < tail_size) + return 0; + + // write first vertex to the end of the stream and pad it to 32 bytes; this is important to simplify bounds checks in decoder + if (vertex_size < kTailMaxSize) + { + memset(data, 0, kTailMaxSize - vertex_size); + data += kTailMaxSize - vertex_size; + } + + memcpy(data, first_vertex, vertex_size); + data += vertex_size; + + assert(data >= buffer + tail_size); + assert(data <= buffer + buffer_size); + +#if TRACE + size_t total_size = data - buffer; + + for (size_t k = 0; k < vertex_size; ++k) + { + const Stats& vsk = vertexstats[k]; + + printf("%2d: %d bytes\t%.1f%%\t%.1f bpv", int(k), int(vsk.size), double(vsk.size) / double(total_size) * 100, double(vsk.size) / double(vertex_count) * 8); + +#if TRACE > 1 + printf("\t\thdr %d bytes\tbit0 %d (%d bytes)\tbit1 %d (%d bytes)\tbit2 %d (%d bytes)\tbit3 %d (%d bytes)", + int(vsk.header), + int(vsk.bitg[0]), int(vsk.bitb[0]), + int(vsk.bitg[1]), int(vsk.bitb[1]), + int(vsk.bitg[2]), int(vsk.bitb[2]), + int(vsk.bitg[3]), int(vsk.bitb[3])); +#endif + + printf("\n"); + } +#endif + + return data - buffer; +} + +size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size % 4 == 0); + + size_t vertex_block_size = getVertexBlockSize(vertex_size); + size_t vertex_block_count = (vertex_count + vertex_block_size - 1) / vertex_block_size; + + size_t vertex_block_header_size = (vertex_block_size / kByteGroupSize + 3) / 4; + size_t vertex_block_data_size = vertex_block_size; + + size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + + return 1 + vertex_block_count * vertex_size * (vertex_block_header_size + vertex_block_data_size) + tail_size; +} + +void meshopt_encodeVertexVersion(int version) +{ + assert(unsigned(version) <= 0); + + meshopt::gEncodeVertexVersion = version; +} + +int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size) +{ + using namespace meshopt; + + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size % 4 == 0); + + const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256]) = 0; + +#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) + decode = (cpuid & (1 << 9)) ? decodeVertexBlockSimd : decodeVertexBlock; +#elif defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM) + decode = decodeVertexBlockSimd; +#else + decode = decodeVertexBlock; +#endif + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) + assert(gDecodeBytesGroupInitialized); + (void)gDecodeBytesGroupInitialized; +#endif + + unsigned char* vertex_data = static_cast<unsigned char*>(destination); + + const unsigned char* data = buffer; + const unsigned char* data_end = buffer + buffer_size; + + if (size_t(data_end - data) < 1 + vertex_size) + return -2; + + unsigned char data_header = *data++; + + if ((data_header & 0xf0) != kVertexHeader) + return -1; + + int version = data_header & 0x0f; + if (version > 0) + return -1; + + unsigned char last_vertex[256]; + memcpy(last_vertex, data_end - vertex_size, vertex_size); + + size_t vertex_block_size = getVertexBlockSize(vertex_size); + + size_t vertex_offset = 0; + + while (vertex_offset < vertex_count) + { + size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset; + + data = decode(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex); + if (!data) + return -2; + + vertex_offset += block_size; + } + + size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + + if (size_t(data_end - data) != tail_size) + return -3; + + return 0; +} + +#undef SIMD_NEON +#undef SIMD_SSE +#undef SIMD_AVX +#undef SIMD_WASM +#undef SIMD_FALLBACK +#undef SIMD_TARGET diff --git a/thirdparty/meshoptimizer/vertexfilter.cpp b/thirdparty/meshoptimizer/vertexfilter.cpp new file mode 100644 index 0000000000..e7ad2c9d39 --- /dev/null +++ b/thirdparty/meshoptimizer/vertexfilter.cpp @@ -0,0 +1,825 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <math.h> + +// The block below auto-detects SIMD ISA that can be used on the target platform +#ifndef MESHOPTIMIZER_NO_SIMD + +// The SIMD implementation requires SSE2, which can be enabled unconditionally through compiler settings +#if defined(__SSE2__) +#define SIMD_SSE +#endif + +// MSVC supports compiling SSE2 code regardless of compile options; we assume all 32-bit CPUs support SSE2 +#if !defined(SIMD_SSE) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) +#define SIMD_SSE +#endif + +// GCC/clang define these when NEON support is available +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define SIMD_NEON +#endif + +// On MSVC, we assume that ARM builds always target NEON-capable devices +#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) +#define SIMD_NEON +#endif + +// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD +#if defined(__wasm_simd128__) +#define SIMD_WASM +#endif + +#endif // !MESHOPTIMIZER_NO_SIMD + +#ifdef SIMD_SSE +#include <emmintrin.h> +#include <stdint.h> +#endif + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#ifdef SIMD_NEON +#if defined(_MSC_VER) && defined(_M_ARM64) +#include <arm64_neon.h> +#else +#include <arm_neon.h> +#endif +#endif + +#ifdef SIMD_WASM +#include <wasm_simd128.h> +#endif + +#ifdef SIMD_WASM +#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) +#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) +#define wasmx_unziplo_v32x4(a, b) wasm_v32x4_shuffle(a, b, 0, 2, 4, 6) +#define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7) +#endif + +namespace meshopt +{ + +#if !defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_WASM) +template <typename T> +static void decodeFilterOct(T* data, size_t count) +{ + const float max = float((1 << (sizeof(T) * 8 - 1)) - 1); + + for (size_t i = 0; i < count; ++i) + { + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + float x = float(data[i * 4 + 0]); + float y = float(data[i * 4 + 1]); + float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y); + + // fixup octahedral coordinates for z<0 + float t = (z >= 0.f) ? 0.f : z; + + x += (x >= 0.f) ? t : -t; + y += (y >= 0.f) ? t : -t; + + // compute normal length & scale + float l = sqrtf(x * x + y * y + z * z); + float s = max / l; + + // rounded signed float->int + int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f)); + int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f)); + int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f)); + + data[i * 4 + 0] = T(xf); + data[i * 4 + 1] = T(yf); + data[i * 4 + 2] = T(zf); + } +} + +static void decodeFilterQuat(short* data, size_t count) +{ + const float scale = 1.f / sqrtf(2.f); + + for (size_t i = 0; i < count; ++i) + { + // recover scale from the high byte of the component + int sf = data[i * 4 + 3] | 3; + float ss = scale / float(sf); + + // convert x/y/z to [-1..1] (scaled...) + float x = float(data[i * 4 + 0]) * ss; + float y = float(data[i * 4 + 1]) * ss; + float z = float(data[i * 4 + 2]) * ss; + + // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors + float ww = 1.f - x * x - y * y - z * z; + float w = sqrtf(ww >= 0.f ? ww : 0.f); + + // rounded signed float->int + int xf = int(x * 32767.f + (x >= 0.f ? 0.5f : -0.5f)); + int yf = int(y * 32767.f + (y >= 0.f ? 0.5f : -0.5f)); + int zf = int(z * 32767.f + (z >= 0.f ? 0.5f : -0.5f)); + int wf = int(w * 32767.f + 0.5f); + + int qc = data[i * 4 + 3] & 3; + + // output order is dictated by input index + data[i * 4 + ((qc + 1) & 3)] = short(xf); + data[i * 4 + ((qc + 2) & 3)] = short(yf); + data[i * 4 + ((qc + 3) & 3)] = short(zf); + data[i * 4 + ((qc + 0) & 3)] = short(wf); + } +} + +static void decodeFilterExp(unsigned int* data, size_t count) +{ + for (size_t i = 0; i < count; ++i) + { + unsigned int v = data[i]; + + // decode mantissa and exponent + int m = int(v << 8) >> 8; + int e = int(v) >> 24; + + union + { + float f; + unsigned int ui; + } u; + + // optimized version of ldexp(float(m), e) + u.ui = unsigned(e + 127) << 23; + u.f = u.f * float(m); + + data[i] = u.ui; + } +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +inline uint64_t rotateleft64(uint64_t v, int x) +{ +#if defined(_MSC_VER) && !defined(__clang__) + return _rotl64(v, x); +// Apple's Clang 8 is actually vanilla Clang 3.9, there we need to look for +// version 11 instead: https://en.wikipedia.org/wiki/Xcode#Toolchain_versions +#elif defined(__clang__) && ((!defined(__apple_build_version__) && __clang_major__ >= 8) || __clang_major__ >= 11) + return __builtin_rotateleft64(v, x); +#else + return (v << (x & 63)) | (v >> ((64 - x) & 63)); +#endif +} +#endif + +#ifdef SIMD_SSE +static void decodeFilterOctSimd(signed char* data, size_t count) +{ + const __m128 sign = _mm_set1_ps(-0.f); + + for (size_t i = 0; i < count; i += 4) + { + __m128i n4 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i * 4])); + + // sign-extends each of x,y in [x y ? ?] with arithmetic shifts + __m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 24), 24); + __m128i yf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 24); + + // unpack z; note that z is unsigned so we technically don't need to sign extend it + __m128i zf = _mm_srai_epi32(_mm_slli_epi32(n4, 8), 24); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + __m128 x = _mm_cvtepi32_ps(xf); + __m128 y = _mm_cvtepi32_ps(yf); + __m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y))); + + // fixup octahedral coordinates for z<0 + __m128 t = _mm_min_ps(z, _mm_setzero_ps()); + + x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign))); + y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign))); + + // compute normal length & scale + __m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z))); + __m128 s = _mm_mul_ps(_mm_set1_ps(127.f), _mm_rsqrt_ps(ll)); + + // rounded signed float->int + __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s)); + __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s)); + __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s)); + + // combine xr/yr/zr into final value + __m128i res = _mm_and_si128(n4, _mm_set1_epi32(0xff000000)); + res = _mm_or_si128(res, _mm_and_si128(xr, _mm_set1_epi32(0xff))); + res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(yr, _mm_set1_epi32(0xff)), 8)); + res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(zr, _mm_set1_epi32(0xff)), 16)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[i * 4]), res); + } +} + +static void decodeFilterOctSimd(short* data, size_t count) +{ + const __m128 sign = _mm_set1_ps(-0.f); + + for (size_t i = 0; i < count; i += 4) + { + __m128 n4_0 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 0) * 4])); + __m128 n4_1 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 2) * 4])); + + // gather both x/y 16-bit pairs in each 32-bit lane + __m128i n4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(2, 0, 2, 0))); + + // sign-extends each of x,y in [x y] with arithmetic shifts + __m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 16); + __m128i yf = _mm_srai_epi32(n4, 16); + + // unpack z; note that z is unsigned so we don't need to sign extend it + __m128i z4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(3, 1, 3, 1))); + __m128i zf = _mm_and_si128(z4, _mm_set1_epi32(0x7fff)); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + __m128 x = _mm_cvtepi32_ps(xf); + __m128 y = _mm_cvtepi32_ps(yf); + __m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y))); + + // fixup octahedral coordinates for z<0 + __m128 t = _mm_min_ps(z, _mm_setzero_ps()); + + x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign))); + y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign))); + + // compute normal length & scale + __m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z))); + __m128 s = _mm_div_ps(_mm_set1_ps(32767.f), _mm_sqrt_ps(ll)); + + // rounded signed float->int + __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s)); + __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s)); + __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s)); + + // mix x/z and y/0 to make 16-bit unpack easier + __m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16)); + __m128i y0r = _mm_and_si128(yr, _mm_set1_epi32(0xffff)); + + // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w + __m128i res_0 = _mm_unpacklo_epi16(xzr, y0r); + __m128i res_1 = _mm_unpackhi_epi16(xzr, y0r); + + // patch in .w + res_0 = _mm_or_si128(res_0, _mm_and_si128(_mm_castps_si128(n4_0), _mm_set1_epi64x(0xffff000000000000))); + res_1 = _mm_or_si128(res_1, _mm_and_si128(_mm_castps_si128(n4_1), _mm_set1_epi64x(0xffff000000000000))); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]), res_0); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]), res_1); + } +} + +static void decodeFilterQuatSimd(short* data, size_t count) +{ + const float scale = 1.f / sqrtf(2.f); + + for (size_t i = 0; i < count; i += 4) + { + __m128 q4_0 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 0) * 4])); + __m128 q4_1 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 2) * 4])); + + // gather both x/y 16-bit pairs in each 32-bit lane + __m128i q4_xy = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(2, 0, 2, 0))); + __m128i q4_zc = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(3, 1, 3, 1))); + + // sign-extends each of x,y in [x y] with arithmetic shifts + __m128i xf = _mm_srai_epi32(_mm_slli_epi32(q4_xy, 16), 16); + __m128i yf = _mm_srai_epi32(q4_xy, 16); + __m128i zf = _mm_srai_epi32(_mm_slli_epi32(q4_zc, 16), 16); + __m128i cf = _mm_srai_epi32(q4_zc, 16); + + // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) + __m128i sf = _mm_or_si128(cf, _mm_set1_epi32(3)); + __m128 ss = _mm_div_ps(_mm_set1_ps(scale), _mm_cvtepi32_ps(sf)); + + // convert x/y/z to [-1..1] (scaled...) + __m128 x = _mm_mul_ps(_mm_cvtepi32_ps(xf), ss); + __m128 y = _mm_mul_ps(_mm_cvtepi32_ps(yf), ss); + __m128 z = _mm_mul_ps(_mm_cvtepi32_ps(zf), ss); + + // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors + __m128 ww = _mm_sub_ps(_mm_set1_ps(1.f), _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)))); + __m128 w = _mm_sqrt_ps(_mm_max_ps(ww, _mm_setzero_ps())); + + __m128 s = _mm_set1_ps(32767.f); + + // rounded signed float->int + __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s)); + __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s)); + __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s)); + __m128i wr = _mm_cvtps_epi32(_mm_mul_ps(w, s)); + + // mix x/z and w/y to make 16-bit unpack easier + __m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16)); + __m128i wyr = _mm_or_si128(_mm_and_si128(wr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(yr, 16)); + + // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0) + __m128i res_0 = _mm_unpacklo_epi16(wyr, xzr); + __m128i res_1 = _mm_unpackhi_epi16(wyr, xzr); + + // store results to stack so that we can rotate using scalar instructions + uint64_t res[4]; + _mm_storeu_si128(reinterpret_cast<__m128i*>(&res[0]), res_0); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&res[2]), res_1); + + // rotate and store + uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]); + + out[0] = rotateleft64(res[0], data[(i + 0) * 4 + 3] << 4); + out[1] = rotateleft64(res[1], data[(i + 1) * 4 + 3] << 4); + out[2] = rotateleft64(res[2], data[(i + 2) * 4 + 3] << 4); + out[3] = rotateleft64(res[3], data[(i + 3) * 4 + 3] << 4); + } +} + +static void decodeFilterExpSimd(unsigned int* data, size_t count) +{ + for (size_t i = 0; i < count; i += 4) + { + __m128i v = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i])); + + // decode exponent into 2^x directly + __m128i ef = _mm_srai_epi32(v, 24); + __m128i es = _mm_slli_epi32(_mm_add_epi32(ef, _mm_set1_epi32(127)), 23); + + // decode 24-bit mantissa into floating-point value + __m128i mf = _mm_srai_epi32(_mm_slli_epi32(v, 8), 8); + __m128 m = _mm_cvtepi32_ps(mf); + + __m128 r = _mm_mul_ps(_mm_castsi128_ps(es), m); + + _mm_storeu_ps(reinterpret_cast<float*>(&data[i]), r); + } +} +#endif + +#if defined(SIMD_NEON) && !defined(__aarch64__) && !defined(_M_ARM64) +inline float32x4_t vsqrtq_f32(float32x4_t x) +{ + float32x4_t r = vrsqrteq_f32(x); + r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(r, x), r)); // refine rsqrt estimate + return vmulq_f32(r, x); +} + +inline float32x4_t vdivq_f32(float32x4_t x, float32x4_t y) +{ + float32x4_t r = vrecpeq_f32(y); + r = vmulq_f32(r, vrecpsq_f32(y, r)); // refine rcp estimate + return vmulq_f32(x, r); +} +#endif + +#ifdef SIMD_NEON +static void decodeFilterOctSimd(signed char* data, size_t count) +{ + const int32x4_t sign = vdupq_n_s32(0x80000000); + + for (size_t i = 0; i < count; i += 4) + { + int32x4_t n4 = vld1q_s32(reinterpret_cast<int32_t*>(&data[i * 4])); + + // sign-extends each of x,y in [x y ? ?] with arithmetic shifts + int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 24), 24); + int32x4_t yf = vshrq_n_s32(vshlq_n_s32(n4, 16), 24); + + // unpack z; note that z is unsigned so we technically don't need to sign extend it + int32x4_t zf = vshrq_n_s32(vshlq_n_s32(n4, 8), 24); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + float32x4_t x = vcvtq_f32_s32(xf); + float32x4_t y = vcvtq_f32_s32(yf); + float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y))); + + // fixup octahedral coordinates for z<0 + float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f)); + + x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign)))); + y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign)))); + + // compute normal length & scale + float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))); + float32x4_t rl = vrsqrteq_f32(ll); + float32x4_t s = vmulq_f32(vdupq_n_f32(127.f), rl); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const float32x4_t fsnap = vdupq_n_f32(3 << 22); + + int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); + int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); + int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); + + // combine xr/yr/zr into final value + int32x4_t res = vandq_s32(n4, vdupq_n_s32(0xff000000)); + res = vorrq_s32(res, vandq_s32(xr, vdupq_n_s32(0xff))); + res = vorrq_s32(res, vshlq_n_s32(vandq_s32(yr, vdupq_n_s32(0xff)), 8)); + res = vorrq_s32(res, vshlq_n_s32(vandq_s32(zr, vdupq_n_s32(0xff)), 16)); + + vst1q_s32(reinterpret_cast<int32_t*>(&data[i * 4]), res); + } +} + +static void decodeFilterOctSimd(short* data, size_t count) +{ + const int32x4_t sign = vdupq_n_s32(0x80000000); + + for (size_t i = 0; i < count; i += 4) + { + int32x4_t n4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4])); + int32x4_t n4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4])); + + // gather both x/y 16-bit pairs in each 32-bit lane + int32x4_t n4 = vuzpq_s32(n4_0, n4_1).val[0]; + + // sign-extends each of x,y in [x y] with arithmetic shifts + int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 16), 16); + int32x4_t yf = vshrq_n_s32(n4, 16); + + // unpack z; note that z is unsigned so we don't need to sign extend it + int32x4_t z4 = vuzpq_s32(n4_0, n4_1).val[1]; + int32x4_t zf = vandq_s32(z4, vdupq_n_s32(0x7fff)); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + float32x4_t x = vcvtq_f32_s32(xf); + float32x4_t y = vcvtq_f32_s32(yf); + float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y))); + + // fixup octahedral coordinates for z<0 + float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f)); + + x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign)))); + y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign)))); + + // compute normal length & scale + float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))); + float32x4_t rl = vrsqrteq_f32(ll); + rl = vmulq_f32(rl, vrsqrtsq_f32(vmulq_f32(rl, ll), rl)); // refine rsqrt estimate + float32x4_t s = vmulq_f32(vdupq_n_f32(32767.f), rl); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const float32x4_t fsnap = vdupq_n_f32(3 << 22); + + int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); + int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); + int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); + + // mix x/z and y/0 to make 16-bit unpack easier + int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16)); + int32x4_t y0r = vandq_s32(yr, vdupq_n_s32(0xffff)); + + // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w + int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[0]); + int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[1]); + + // patch in .w + res_0 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_0, res_0); + res_1 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_1, res_1); + + vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]), res_0); + vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]), res_1); + } +} + +static void decodeFilterQuatSimd(short* data, size_t count) +{ + const float scale = 1.f / sqrtf(2.f); + + for (size_t i = 0; i < count; i += 4) + { + int32x4_t q4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4])); + int32x4_t q4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4])); + + // gather both x/y 16-bit pairs in each 32-bit lane + int32x4_t q4_xy = vuzpq_s32(q4_0, q4_1).val[0]; + int32x4_t q4_zc = vuzpq_s32(q4_0, q4_1).val[1]; + + // sign-extends each of x,y in [x y] with arithmetic shifts + int32x4_t xf = vshrq_n_s32(vshlq_n_s32(q4_xy, 16), 16); + int32x4_t yf = vshrq_n_s32(q4_xy, 16); + int32x4_t zf = vshrq_n_s32(vshlq_n_s32(q4_zc, 16), 16); + int32x4_t cf = vshrq_n_s32(q4_zc, 16); + + // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) + int32x4_t sf = vorrq_s32(cf, vdupq_n_s32(3)); + float32x4_t ss = vdivq_f32(vdupq_n_f32(scale), vcvtq_f32_s32(sf)); + + // convert x/y/z to [-1..1] (scaled...) + float32x4_t x = vmulq_f32(vcvtq_f32_s32(xf), ss); + float32x4_t y = vmulq_f32(vcvtq_f32_s32(yf), ss); + float32x4_t z = vmulq_f32(vcvtq_f32_s32(zf), ss); + + // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors + float32x4_t ww = vsubq_f32(vdupq_n_f32(1.f), vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z)))); + float32x4_t w = vsqrtq_f32(vmaxq_f32(ww, vdupq_n_f32(0.f))); + + float32x4_t s = vdupq_n_f32(32767.f); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const float32x4_t fsnap = vdupq_n_f32(3 << 22); + + int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); + int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); + int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); + int32x4_t wr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(w, s), fsnap)); + + // mix x/z and w/y to make 16-bit unpack easier + int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16)); + int32x4_t wyr = vorrq_s32(vandq_s32(wr, vdupq_n_s32(0xffff)), vshlq_n_s32(yr, 16)); + + // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0) + int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[0]); + int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[1]); + + // rotate and store + uint64_t* out = (uint64_t*)&data[i * 4]; + + out[0] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 0), vgetq_lane_s32(cf, 0) << 4); + out[1] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 1), vgetq_lane_s32(cf, 1) << 4); + out[2] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 0), vgetq_lane_s32(cf, 2) << 4); + out[3] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 1), vgetq_lane_s32(cf, 3) << 4); + } +} + +static void decodeFilterExpSimd(unsigned int* data, size_t count) +{ + for (size_t i = 0; i < count; i += 4) + { + int32x4_t v = vld1q_s32(reinterpret_cast<int32_t*>(&data[i])); + + // decode exponent into 2^x directly + int32x4_t ef = vshrq_n_s32(v, 24); + int32x4_t es = vshlq_n_s32(vaddq_s32(ef, vdupq_n_s32(127)), 23); + + // decode 24-bit mantissa into floating-point value + int32x4_t mf = vshrq_n_s32(vshlq_n_s32(v, 8), 8); + float32x4_t m = vcvtq_f32_s32(mf); + + float32x4_t r = vmulq_f32(vreinterpretq_f32_s32(es), m); + + vst1q_f32(reinterpret_cast<float*>(&data[i]), r); + } +} +#endif + +#ifdef SIMD_WASM +static void decodeFilterOctSimd(signed char* data, size_t count) +{ + const v128_t sign = wasm_f32x4_splat(-0.f); + + for (size_t i = 0; i < count; i += 4) + { + v128_t n4 = wasm_v128_load(&data[i * 4]); + + // sign-extends each of x,y in [x y ? ?] with arithmetic shifts + v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 24), 24); + v128_t yf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 24); + + // unpack z; note that z is unsigned so we technically don't need to sign extend it + v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 8), 24); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + v128_t x = wasm_f32x4_convert_i32x4(xf); + v128_t y = wasm_f32x4_convert_i32x4(yf); + v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y))); + + // fixup octahedral coordinates for z<0 + // note: i32x4_min with 0 is equvalent to f32x4_min + v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0)); + + x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign))); + y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign))); + + // compute normal length & scale + v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z))); + v128_t s = wasm_f32x4_div(wasm_f32x4_splat(127.f), wasm_f32x4_sqrt(ll)); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction + const v128_t fsnap = wasm_f32x4_splat(3 << 22); + + v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap); + v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap); + v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap); + + // combine xr/yr/zr into final value + v128_t res = wasm_v128_and(n4, wasm_i32x4_splat(0xff000000)); + res = wasm_v128_or(res, wasm_v128_and(xr, wasm_i32x4_splat(0xff))); + res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(yr, wasm_i32x4_splat(0xff)), 8)); + res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(zr, wasm_i32x4_splat(0xff)), 16)); + + wasm_v128_store(&data[i * 4], res); + } +} + +static void decodeFilterOctSimd(short* data, size_t count) +{ + const v128_t sign = wasm_f32x4_splat(-0.f); + const v128_t zmask = wasm_i32x4_splat(0x7fff); + + for (size_t i = 0; i < count; i += 4) + { + v128_t n4_0 = wasm_v128_load(&data[(i + 0) * 4]); + v128_t n4_1 = wasm_v128_load(&data[(i + 2) * 4]); + + // gather both x/y 16-bit pairs in each 32-bit lane + v128_t n4 = wasmx_unziplo_v32x4(n4_0, n4_1); + + // sign-extends each of x,y in [x y] with arithmetic shifts + v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 16); + v128_t yf = wasm_i32x4_shr(n4, 16); + + // unpack z; note that z is unsigned so we don't need to sign extend it + v128_t z4 = wasmx_unziphi_v32x4(n4_0, n4_1); + v128_t zf = wasm_v128_and(z4, zmask); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + v128_t x = wasm_f32x4_convert_i32x4(xf); + v128_t y = wasm_f32x4_convert_i32x4(yf); + v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y))); + + // fixup octahedral coordinates for z<0 + // note: i32x4_min with 0 is equvalent to f32x4_min + v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0)); + + x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign))); + y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign))); + + // compute normal length & scale + v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z))); + v128_t s = wasm_f32x4_div(wasm_f32x4_splat(32767.f), wasm_f32x4_sqrt(ll)); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const v128_t fsnap = wasm_f32x4_splat(3 << 22); + + v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap); + v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap); + v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap); + + // mix x/z and y/0 to make 16-bit unpack easier + v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16)); + v128_t y0r = wasm_v128_and(yr, wasm_i32x4_splat(0xffff)); + + // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w + v128_t res_0 = wasmx_unpacklo_v16x8(xzr, y0r); + v128_t res_1 = wasmx_unpackhi_v16x8(xzr, y0r); + + // patch in .w + res_0 = wasm_v128_or(res_0, wasm_v128_and(n4_0, wasm_i64x2_splat(0xffff000000000000))); + res_1 = wasm_v128_or(res_1, wasm_v128_and(n4_1, wasm_i64x2_splat(0xffff000000000000))); + + wasm_v128_store(&data[(i + 0) * 4], res_0); + wasm_v128_store(&data[(i + 2) * 4], res_1); + } +} + +static void decodeFilterQuatSimd(short* data, size_t count) +{ + const float scale = 1.f / sqrtf(2.f); + + for (size_t i = 0; i < count; i += 4) + { + v128_t q4_0 = wasm_v128_load(&data[(i + 0) * 4]); + v128_t q4_1 = wasm_v128_load(&data[(i + 2) * 4]); + + // gather both x/y 16-bit pairs in each 32-bit lane + v128_t q4_xy = wasmx_unziplo_v32x4(q4_0, q4_1); + v128_t q4_zc = wasmx_unziphi_v32x4(q4_0, q4_1); + + // sign-extends each of x,y in [x y] with arithmetic shifts + v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(q4_xy, 16), 16); + v128_t yf = wasm_i32x4_shr(q4_xy, 16); + v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(q4_zc, 16), 16); + v128_t cf = wasm_i32x4_shr(q4_zc, 16); + + // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) + v128_t sf = wasm_v128_or(cf, wasm_i32x4_splat(3)); + v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(scale), wasm_f32x4_convert_i32x4(sf)); + + // convert x/y/z to [-1..1] (scaled...) + v128_t x = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(xf), ss); + v128_t y = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(yf), ss); + v128_t z = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(zf), ss); + + // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors + // note: i32x4_max with 0 is equivalent to f32x4_max + v128_t ww = wasm_f32x4_sub(wasm_f32x4_splat(1.f), wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)))); + v128_t w = wasm_f32x4_sqrt(wasm_i32x4_max(ww, wasm_i32x4_splat(0))); + + v128_t s = wasm_f32x4_splat(32767.f); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const v128_t fsnap = wasm_f32x4_splat(3 << 22); + + v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap); + v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap); + v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap); + v128_t wr = wasm_f32x4_add(wasm_f32x4_mul(w, s), fsnap); + + // mix x/z and w/y to make 16-bit unpack easier + v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16)); + v128_t wyr = wasm_v128_or(wasm_v128_and(wr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(yr, 16)); + + // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0) + v128_t res_0 = wasmx_unpacklo_v16x8(wyr, xzr); + v128_t res_1 = wasmx_unpackhi_v16x8(wyr, xzr); + + // compute component index shifted left by 4 (and moved into i32x4 slot) + // TODO: volatile here works around LLVM mis-optimizing code; https://github.com/emscripten-core/emscripten/issues/11449 + volatile v128_t cm = wasm_i32x4_shl(cf, 4); + + // rotate and store + uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]); + + out[0] = rotateleft64(wasm_i64x2_extract_lane(res_0, 0), wasm_i32x4_extract_lane(cm, 0)); + out[1] = rotateleft64(wasm_i64x2_extract_lane(res_0, 1), wasm_i32x4_extract_lane(cm, 1)); + out[2] = rotateleft64(wasm_i64x2_extract_lane(res_1, 0), wasm_i32x4_extract_lane(cm, 2)); + out[3] = rotateleft64(wasm_i64x2_extract_lane(res_1, 1), wasm_i32x4_extract_lane(cm, 3)); + } +} + +static void decodeFilterExpSimd(unsigned int* data, size_t count) +{ + for (size_t i = 0; i < count; i += 4) + { + v128_t v = wasm_v128_load(&data[i]); + + // decode exponent into 2^x directly + v128_t ef = wasm_i32x4_shr(v, 24); + v128_t es = wasm_i32x4_shl(wasm_i32x4_add(ef, wasm_i32x4_splat(127)), 23); + + // decode 24-bit mantissa into floating-point value + v128_t mf = wasm_i32x4_shr(wasm_i32x4_shl(v, 8), 8); + v128_t m = wasm_f32x4_convert_i32x4(mf); + + v128_t r = wasm_f32x4_mul(es, m); + + wasm_v128_store(&data[i], r); + } +} +#endif + +} // namespace meshopt + +void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_count % 4 == 0); + assert(vertex_size == 4 || vertex_size == 8); + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) + if (vertex_size == 4) + decodeFilterOctSimd(static_cast<signed char*>(buffer), vertex_count); + else + decodeFilterOctSimd(static_cast<short*>(buffer), vertex_count); +#else + if (vertex_size == 4) + decodeFilterOct(static_cast<signed char*>(buffer), vertex_count); + else + decodeFilterOct(static_cast<short*>(buffer), vertex_count); +#endif +} + +void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_count % 4 == 0); + assert(vertex_size == 8); + (void)vertex_size; + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) + decodeFilterQuatSimd(static_cast<short*>(buffer), vertex_count); +#else + decodeFilterQuat(static_cast<short*>(buffer), vertex_count); +#endif +} + +void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_count % 4 == 0); + assert(vertex_size % 4 == 0); + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) + decodeFilterExpSimd(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4)); +#else + decodeFilterExp(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4)); +#endif +} + +#undef SIMD_SSE +#undef SIMD_NEON +#undef SIMD_WASM diff --git a/thirdparty/meshoptimizer/vfetchanalyzer.cpp b/thirdparty/meshoptimizer/vfetchanalyzer.cpp new file mode 100644 index 0000000000..51dca873f8 --- /dev/null +++ b/thirdparty/meshoptimizer/vfetchanalyzer.cpp @@ -0,0 +1,58 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size) +{ + assert(index_count % 3 == 0); + assert(vertex_size > 0 && vertex_size <= 256); + + meshopt_Allocator allocator; + + meshopt_VertexFetchStatistics result = {}; + + unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count); + memset(vertex_visited, 0, vertex_count); + + const size_t kCacheLine = 64; + const size_t kCacheSize = 128 * 1024; + + // simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway + size_t cache[kCacheSize / kCacheLine] = {}; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + vertex_visited[index] = 1; + + size_t start_address = index * vertex_size; + size_t end_address = start_address + vertex_size; + + size_t start_tag = start_address / kCacheLine; + size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine; + + assert(start_tag < end_tag); + + for (size_t tag = start_tag; tag < end_tag; ++tag) + { + size_t line = tag % (sizeof(cache) / sizeof(cache[0])); + + // we store +1 since cache is filled with 0 by default + result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine; + cache[line] = tag + 1; + } + } + + size_t unique_vertex_count = 0; + + for (size_t i = 0; i < vertex_count; ++i) + unique_vertex_count += vertex_visited[i]; + + result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size); + + return result; +} diff --git a/thirdparty/meshoptimizer/vfetchoptimizer.cpp b/thirdparty/meshoptimizer/vfetchoptimizer.cpp new file mode 100644 index 0000000000..465d6df5ca --- /dev/null +++ b/thirdparty/meshoptimizer/vfetchoptimizer.cpp @@ -0,0 +1,74 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +{ + assert(index_count % 3 == 0); + + memset(destination, -1, vertex_count * sizeof(unsigned int)); + + unsigned int next_vertex = 0; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + if (destination[index] == ~0u) + { + destination[index] = next_vertex++; + } + } + + assert(next_vertex <= vertex_count); + + return next_vertex; +} + +size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + assert(index_count % 3 == 0); + assert(vertex_size > 0 && vertex_size <= 256); + + meshopt_Allocator allocator; + + // support in-place optimization + if (destination == vertices) + { + unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size); + memcpy(vertices_copy, vertices, vertex_count * vertex_size); + vertices = vertices_copy; + } + + // build vertex remap table + unsigned int* vertex_remap = allocator.allocate<unsigned int>(vertex_count); + memset(vertex_remap, -1, vertex_count * sizeof(unsigned int)); + + unsigned int next_vertex = 0; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + unsigned int& remap = vertex_remap[index]; + + if (remap == ~0u) // vertex was not added to destination VB + { + // add vertex + memcpy(static_cast<unsigned char*>(destination) + next_vertex * vertex_size, static_cast<const unsigned char*>(vertices) + index * vertex_size, vertex_size); + + remap = next_vertex++; + } + + // modify indices in place + indices[i] = remap; + } + + assert(next_vertex <= vertex_count); + + return next_vertex; +} diff --git a/thirdparty/minimp3/LICENSE b/thirdparty/minimp3/LICENSE new file mode 100644 index 0000000000..2c4afabdb6 --- /dev/null +++ b/thirdparty/minimp3/LICENSE @@ -0,0 +1,117 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see +<http://creativecommons.org/publicdomain/zero/1.0/> + diff --git a/thirdparty/minimp3/minimp3.h b/thirdparty/minimp3/minimp3.h new file mode 100644 index 0000000000..796cbc1f8e --- /dev/null +++ b/thirdparty/minimp3/minimp3.h @@ -0,0 +1,1855 @@ +#ifndef MINIMP3_H +#define MINIMP3_H +/* + https://github.com/lieff/minimp3 + To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. + This software is distributed without any warranty. + See <http://creativecommons.org/publicdomain/zero/1.0/>. +*/ +#include <stdint.h> + +#define MINIMP3_MAX_SAMPLES_PER_FRAME (1152*2) + +typedef struct +{ + int frame_bytes, frame_offset, channels, hz, layer, bitrate_kbps; +} mp3dec_frame_info_t; + +typedef struct +{ + float mdct_overlap[2][9*32], qmf_state[15*2*32]; + int reserv, free_format_bytes; + unsigned char header[4], reserv_buf[511]; +} mp3dec_t; + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +void mp3dec_init(mp3dec_t *dec); +#ifndef MINIMP3_FLOAT_OUTPUT +typedef int16_t mp3d_sample_t; +#else /* MINIMP3_FLOAT_OUTPUT */ +typedef float mp3d_sample_t; +void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples); +#endif /* MINIMP3_FLOAT_OUTPUT */ +int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* MINIMP3_H */ +#if defined(MINIMP3_IMPLEMENTATION) && !defined(_MINIMP3_IMPLEMENTATION_GUARD) +#define _MINIMP3_IMPLEMENTATION_GUARD + +#include <stdlib.h> +#include <string.h> + +#define MAX_FREE_FORMAT_FRAME_SIZE 2304 /* more than ISO spec's */ +#ifndef MAX_FRAME_SYNC_MATCHES +#define MAX_FRAME_SYNC_MATCHES 10 +#endif /* MAX_FRAME_SYNC_MATCHES */ + +#define MAX_L3_FRAME_PAYLOAD_BYTES MAX_FREE_FORMAT_FRAME_SIZE /* MUST be >= 320000/8/32000*1152 = 1440 */ + +#define MAX_BITRESERVOIR_BYTES 511 +#define SHORT_BLOCK_TYPE 2 +#define STOP_BLOCK_TYPE 3 +#define MODE_MONO 3 +#define MODE_JOINT_STEREO 1 +#define HDR_SIZE 4 +#define HDR_IS_MONO(h) (((h[3]) & 0xC0) == 0xC0) +#define HDR_IS_MS_STEREO(h) (((h[3]) & 0xE0) == 0x60) +#define HDR_IS_FREE_FORMAT(h) (((h[2]) & 0xF0) == 0) +#define HDR_IS_CRC(h) (!((h[1]) & 1)) +#define HDR_TEST_PADDING(h) ((h[2]) & 0x2) +#define HDR_TEST_MPEG1(h) ((h[1]) & 0x8) +#define HDR_TEST_NOT_MPEG25(h) ((h[1]) & 0x10) +#define HDR_TEST_I_STEREO(h) ((h[3]) & 0x10) +#define HDR_TEST_MS_STEREO(h) ((h[3]) & 0x20) +#define HDR_GET_STEREO_MODE(h) (((h[3]) >> 6) & 3) +#define HDR_GET_STEREO_MODE_EXT(h) (((h[3]) >> 4) & 3) +#define HDR_GET_LAYER(h) (((h[1]) >> 1) & 3) +#define HDR_GET_BITRATE(h) ((h[2]) >> 4) +#define HDR_GET_SAMPLE_RATE(h) (((h[2]) >> 2) & 3) +#define HDR_GET_MY_SAMPLE_RATE(h) (HDR_GET_SAMPLE_RATE(h) + (((h[1] >> 3) & 1) + ((h[1] >> 4) & 1))*3) +#define HDR_IS_FRAME_576(h) ((h[1] & 14) == 2) +#define HDR_IS_LAYER_1(h) ((h[1] & 6) == 6) + +#define BITS_DEQUANTIZER_OUT -1 +#define MAX_SCF (255 + BITS_DEQUANTIZER_OUT*4 - 210) +#define MAX_SCFI ((MAX_SCF + 3) & ~3) + +#define MINIMP3_MIN(a, b) ((a) > (b) ? (b) : (a)) +#define MINIMP3_MAX(a, b) ((a) < (b) ? (b) : (a)) + +#if !defined(MINIMP3_NO_SIMD) + +#if !defined(MINIMP3_ONLY_SIMD) && (defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_ARM64)) +/* x64 always have SSE2, arm64 always have neon, no need for generic code */ +#define MINIMP3_ONLY_SIMD +#endif /* SIMD checks... */ + +#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__)) +#if defined(_MSC_VER) +#include <intrin.h> +#endif /* defined(_MSC_VER) */ +#include <immintrin.h> +#define HAVE_SSE 1 +#define HAVE_SIMD 1 +#define VSTORE _mm_storeu_ps +#define VLD _mm_loadu_ps +#define VSET _mm_set1_ps +#define VADD _mm_add_ps +#define VSUB _mm_sub_ps +#define VMUL _mm_mul_ps +#define VMAC(a, x, y) _mm_add_ps(a, _mm_mul_ps(x, y)) +#define VMSB(a, x, y) _mm_sub_ps(a, _mm_mul_ps(x, y)) +#define VMUL_S(x, s) _mm_mul_ps(x, _mm_set1_ps(s)) +#define VREV(x) _mm_shuffle_ps(x, x, _MM_SHUFFLE(0, 1, 2, 3)) +typedef __m128 f4; +#if defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) +#define minimp3_cpuid __cpuid +#else /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */ +static __inline__ __attribute__((always_inline)) void minimp3_cpuid(int CPUInfo[], const int InfoType) +{ +#if defined(__PIC__) + __asm__ __volatile__( +#if defined(__x86_64__) + "push %%rbx\n" + "cpuid\n" + "xchgl %%ebx, %1\n" + "pop %%rbx\n" +#else /* defined(__x86_64__) */ + "xchgl %%ebx, %1\n" + "cpuid\n" + "xchgl %%ebx, %1\n" +#endif /* defined(__x86_64__) */ + : "=a" (CPUInfo[0]), "=r" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) + : "a" (InfoType)); +#else /* defined(__PIC__) */ + __asm__ __volatile__( + "cpuid" + : "=a" (CPUInfo[0]), "=b" (CPUInfo[1]), "=c" (CPUInfo[2]), "=d" (CPUInfo[3]) + : "a" (InfoType)); +#endif /* defined(__PIC__)*/ +} +#endif /* defined(_MSC_VER) || defined(MINIMP3_ONLY_SIMD) */ +static int have_simd(void) +{ +#ifdef MINIMP3_ONLY_SIMD + return 1; +#else /* MINIMP3_ONLY_SIMD */ + static int g_have_simd; + int CPUInfo[4]; +#ifdef MINIMP3_TEST + static int g_counter; + if (g_counter++ > 100) + return 0; +#endif /* MINIMP3_TEST */ + if (g_have_simd) + goto end; + minimp3_cpuid(CPUInfo, 0); + g_have_simd = 1; + if (CPUInfo[0] > 0) + { + minimp3_cpuid(CPUInfo, 1); + g_have_simd = (CPUInfo[3] & (1 << 26)) + 1; /* SSE2 */ + } +end: + return g_have_simd - 1; +#endif /* MINIMP3_ONLY_SIMD */ +} +#elif defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64) +#include <arm_neon.h> +#define HAVE_SSE 0 +#define HAVE_SIMD 1 +#define VSTORE vst1q_f32 +#define VLD vld1q_f32 +#define VSET vmovq_n_f32 +#define VADD vaddq_f32 +#define VSUB vsubq_f32 +#define VMUL vmulq_f32 +#define VMAC(a, x, y) vmlaq_f32(a, x, y) +#define VMSB(a, x, y) vmlsq_f32(a, x, y) +#define VMUL_S(x, s) vmulq_f32(x, vmovq_n_f32(s)) +#define VREV(x) vcombine_f32(vget_high_f32(vrev64q_f32(x)), vget_low_f32(vrev64q_f32(x))) +typedef float32x4_t f4; +static int have_simd() +{ /* TODO: detect neon for !MINIMP3_ONLY_SIMD */ + return 1; +} +#else /* SIMD checks... */ +#define HAVE_SSE 0 +#define HAVE_SIMD 0 +#ifdef MINIMP3_ONLY_SIMD +#error MINIMP3_ONLY_SIMD used, but SSE/NEON not enabled +#endif /* MINIMP3_ONLY_SIMD */ +#endif /* SIMD checks... */ +#else /* !defined(MINIMP3_NO_SIMD) */ +#define HAVE_SIMD 0 +#endif /* !defined(MINIMP3_NO_SIMD) */ + +#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64) +#define HAVE_ARMV6 1 +static __inline__ __attribute__((always_inline)) int32_t minimp3_clip_int16_arm(int32_t a) +{ + int32_t x = 0; + __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a)); + return x; +} +#else +#define HAVE_ARMV6 0 +#endif + +typedef struct +{ + const uint8_t *buf; + int pos, limit; +} bs_t; + +typedef struct +{ + float scf[3*64]; + uint8_t total_bands, stereo_bands, bitalloc[64], scfcod[64]; +} L12_scale_info; + +typedef struct +{ + uint8_t tab_offset, code_tab_width, band_count; +} L12_subband_alloc_t; + +typedef struct +{ + const uint8_t *sfbtab; + uint16_t part_23_length, big_values, scalefac_compress; + uint8_t global_gain, block_type, mixed_block_flag, n_long_sfb, n_short_sfb; + uint8_t table_select[3], region_count[3], subblock_gain[3]; + uint8_t preflag, scalefac_scale, count1_table, scfsi; +} L3_gr_info_t; + +typedef struct +{ + bs_t bs; + uint8_t maindata[MAX_BITRESERVOIR_BYTES + MAX_L3_FRAME_PAYLOAD_BYTES]; + L3_gr_info_t gr_info[4]; + float grbuf[2][576], scf[40], syn[18 + 15][2*32]; + uint8_t ist_pos[2][39]; +} mp3dec_scratch_t; + +static void bs_init(bs_t *bs, const uint8_t *data, int bytes) +{ + bs->buf = data; + bs->pos = 0; + bs->limit = bytes*8; +} + +static uint32_t get_bits(bs_t *bs, int n) +{ + uint32_t next, cache = 0, s = bs->pos & 7; + int shl = n + s; + const uint8_t *p = bs->buf + (bs->pos >> 3); + if ((bs->pos += n) > bs->limit) + return 0; + next = *p++ & (255 >> s); + while ((shl -= 8) > 0) + { + cache |= next << shl; + next = *p++; + } + return cache | (next >> -shl); +} + +static int hdr_valid(const uint8_t *h) +{ + return h[0] == 0xff && + ((h[1] & 0xF0) == 0xf0 || (h[1] & 0xFE) == 0xe2) && + (HDR_GET_LAYER(h) != 0) && + (HDR_GET_BITRATE(h) != 15) && + (HDR_GET_SAMPLE_RATE(h) != 3); +} + +static int hdr_compare(const uint8_t *h1, const uint8_t *h2) +{ + return hdr_valid(h2) && + ((h1[1] ^ h2[1]) & 0xFE) == 0 && + ((h1[2] ^ h2[2]) & 0x0C) == 0 && + !(HDR_IS_FREE_FORMAT(h1) ^ HDR_IS_FREE_FORMAT(h2)); +} + +static unsigned hdr_bitrate_kbps(const uint8_t *h) +{ + static const uint8_t halfrate[2][3][15] = { + { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } }, + { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } }, + }; + return 2*halfrate[!!HDR_TEST_MPEG1(h)][HDR_GET_LAYER(h) - 1][HDR_GET_BITRATE(h)]; +} + +static unsigned hdr_sample_rate_hz(const uint8_t *h) +{ + static const unsigned g_hz[3] = { 44100, 48000, 32000 }; + return g_hz[HDR_GET_SAMPLE_RATE(h)] >> (int)!HDR_TEST_MPEG1(h) >> (int)!HDR_TEST_NOT_MPEG25(h); +} + +static unsigned hdr_frame_samples(const uint8_t *h) +{ + return HDR_IS_LAYER_1(h) ? 384 : (1152 >> (int)HDR_IS_FRAME_576(h)); +} + +static int hdr_frame_bytes(const uint8_t *h, int free_format_size) +{ + int frame_bytes = hdr_frame_samples(h)*hdr_bitrate_kbps(h)*125/hdr_sample_rate_hz(h); + if (HDR_IS_LAYER_1(h)) + { + frame_bytes &= ~3; /* slot align */ + } + return frame_bytes ? frame_bytes : free_format_size; +} + +static int hdr_padding(const uint8_t *h) +{ + return HDR_TEST_PADDING(h) ? (HDR_IS_LAYER_1(h) ? 4 : 1) : 0; +} + +#ifndef MINIMP3_ONLY_MP3 +static const L12_subband_alloc_t *L12_subband_alloc_table(const uint8_t *hdr, L12_scale_info *sci) +{ + const L12_subband_alloc_t *alloc; + int mode = HDR_GET_STEREO_MODE(hdr); + int nbands, stereo_bands = (mode == MODE_MONO) ? 0 : (mode == MODE_JOINT_STEREO) ? (HDR_GET_STEREO_MODE_EXT(hdr) << 2) + 4 : 32; + + if (HDR_IS_LAYER_1(hdr)) + { + static const L12_subband_alloc_t g_alloc_L1[] = { { 76, 4, 32 } }; + alloc = g_alloc_L1; + nbands = 32; + } else if (!HDR_TEST_MPEG1(hdr)) + { + static const L12_subband_alloc_t g_alloc_L2M2[] = { { 60, 4, 4 }, { 44, 3, 7 }, { 44, 2, 19 } }; + alloc = g_alloc_L2M2; + nbands = 30; + } else + { + static const L12_subband_alloc_t g_alloc_L2M1[] = { { 0, 4, 3 }, { 16, 4, 8 }, { 32, 3, 12 }, { 40, 2, 7 } }; + int sample_rate_idx = HDR_GET_SAMPLE_RATE(hdr); + unsigned kbps = hdr_bitrate_kbps(hdr) >> (int)(mode != MODE_MONO); + if (!kbps) /* free-format */ + { + kbps = 192; + } + + alloc = g_alloc_L2M1; + nbands = 27; + if (kbps < 56) + { + static const L12_subband_alloc_t g_alloc_L2M1_lowrate[] = { { 44, 4, 2 }, { 44, 3, 10 } }; + alloc = g_alloc_L2M1_lowrate; + nbands = sample_rate_idx == 2 ? 12 : 8; + } else if (kbps >= 96 && sample_rate_idx != 1) + { + nbands = 30; + } + } + + sci->total_bands = (uint8_t)nbands; + sci->stereo_bands = (uint8_t)MINIMP3_MIN(stereo_bands, nbands); + + return alloc; +} + +static void L12_read_scalefactors(bs_t *bs, uint8_t *pba, uint8_t *scfcod, int bands, float *scf) +{ + static const float g_deq_L12[18*3] = { +#define DQ(x) 9.53674316e-07f/x, 7.56931807e-07f/x, 6.00777173e-07f/x + DQ(3),DQ(7),DQ(15),DQ(31),DQ(63),DQ(127),DQ(255),DQ(511),DQ(1023),DQ(2047),DQ(4095),DQ(8191),DQ(16383),DQ(32767),DQ(65535),DQ(3),DQ(5),DQ(9) + }; + int i, m; + for (i = 0; i < bands; i++) + { + float s = 0; + int ba = *pba++; + int mask = ba ? 4 + ((19 >> scfcod[i]) & 3) : 0; + for (m = 4; m; m >>= 1) + { + if (mask & m) + { + int b = get_bits(bs, 6); + s = g_deq_L12[ba*3 - 6 + b % 3]*(1 << 21 >> b/3); + } + *scf++ = s; + } + } +} + +static void L12_read_scale_info(const uint8_t *hdr, bs_t *bs, L12_scale_info *sci) +{ + static const uint8_t g_bitalloc_code_tab[] = { + 0,17, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16, + 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,16, + 0,17,18, 3,19,4,5,16, + 0,17,18,16, + 0,17,18,19, 4,5,6, 7,8, 9,10,11,12,13,14,15, + 0,17,18, 3,19,4,5, 6,7, 8, 9,10,11,12,13,14, + 0, 2, 3, 4, 5,6,7, 8,9,10,11,12,13,14,15,16 + }; + const L12_subband_alloc_t *subband_alloc = L12_subband_alloc_table(hdr, sci); + + int i, k = 0, ba_bits = 0; + const uint8_t *ba_code_tab = g_bitalloc_code_tab; + + for (i = 0; i < sci->total_bands; i++) + { + uint8_t ba; + if (i == k) + { + k += subband_alloc->band_count; + ba_bits = subband_alloc->code_tab_width; + ba_code_tab = g_bitalloc_code_tab + subband_alloc->tab_offset; + subband_alloc++; + } + ba = ba_code_tab[get_bits(bs, ba_bits)]; + sci->bitalloc[2*i] = ba; + if (i < sci->stereo_bands) + { + ba = ba_code_tab[get_bits(bs, ba_bits)]; + } + sci->bitalloc[2*i + 1] = sci->stereo_bands ? ba : 0; + } + + for (i = 0; i < 2*sci->total_bands; i++) + { + sci->scfcod[i] = sci->bitalloc[i] ? HDR_IS_LAYER_1(hdr) ? 2 : get_bits(bs, 2) : 6; + } + + L12_read_scalefactors(bs, sci->bitalloc, sci->scfcod, sci->total_bands*2, sci->scf); + + for (i = sci->stereo_bands; i < sci->total_bands; i++) + { + sci->bitalloc[2*i + 1] = 0; + } +} + +static int L12_dequantize_granule(float *grbuf, bs_t *bs, L12_scale_info *sci, int group_size) +{ + int i, j, k, choff = 576; + for (j = 0; j < 4; j++) + { + float *dst = grbuf + group_size*j; + for (i = 0; i < 2*sci->total_bands; i++) + { + int ba = sci->bitalloc[i]; + if (ba != 0) + { + if (ba < 17) + { + int half = (1 << (ba - 1)) - 1; + for (k = 0; k < group_size; k++) + { + dst[k] = (float)((int)get_bits(bs, ba) - half); + } + } else + { + unsigned mod = (2 << (ba - 17)) + 1; /* 3, 5, 9 */ + unsigned code = get_bits(bs, mod + 2 - (mod >> 3)); /* 5, 7, 10 */ + for (k = 0; k < group_size; k++, code /= mod) + { + dst[k] = (float)((int)(code % mod - mod/2)); + } + } + } + dst += choff; + choff = 18 - choff; + } + } + return group_size*4; +} + +static void L12_apply_scf_384(L12_scale_info *sci, const float *scf, float *dst) +{ + int i, k; + memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float)); + for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6) + { + for (k = 0; k < 12; k++) + { + dst[k + 0] *= scf[0]; + dst[k + 576] *= scf[3]; + } + } +} +#endif /* MINIMP3_ONLY_MP3 */ + +static int L3_read_side_info(bs_t *bs, L3_gr_info_t *gr, const uint8_t *hdr) +{ + static const uint8_t g_scf_long[8][23] = { + { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + { 12,12,12,12,12,12,16,20,24,28,32,40,48,56,64,76,90,2,2,2,2,2,0 }, + { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + { 6,6,6,6,6,6,8,10,12,14,16,18,22,26,32,38,46,54,62,70,76,36,0 }, + { 6,6,6,6,6,6,8,10,12,14,16,20,24,28,32,38,46,52,60,68,58,54,0 }, + { 4,4,4,4,4,4,6,6,8,8,10,12,16,20,24,28,34,42,50,54,76,158,0 }, + { 4,4,4,4,4,4,6,6,6,8,10,12,16,18,22,28,34,40,46,54,54,192,0 }, + { 4,4,4,4,4,4,6,6,8,10,12,16,20,24,30,38,46,56,68,84,102,26,0 } + }; + static const uint8_t g_scf_short[8][40] = { + { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 8,8,8,8,8,8,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 }, + { 4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 }, + { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 }, + { 4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 }, + { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 }, + { 4,4,4,4,4,4,4,4,4,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 } + }; + static const uint8_t g_scf_mixed[8][40] = { + { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 12,12,12,4,4,4,8,8,8,12,12,12,16,16,16,20,20,20,24,24,24,28,28,28,36,36,36,2,2,2,2,2,2,2,2,2,26,26,26,0 }, + { 6,6,6,6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,14,14,14,18,18,18,26,26,26,32,32,32,42,42,42,18,18,18,0 }, + { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,32,32,32,44,44,44,12,12,12,0 }, + { 6,6,6,6,6,6,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,24,24,24,30,30,30,40,40,40,18,18,18,0 }, + { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,10,10,10,12,12,12,14,14,14,18,18,18,22,22,22,30,30,30,56,56,56,0 }, + { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,6,6,6,10,10,10,12,12,12,14,14,14,16,16,16,20,20,20,26,26,26,66,66,66,0 }, + { 4,4,4,4,4,4,6,6,4,4,4,6,6,6,8,8,8,12,12,12,16,16,16,20,20,20,26,26,26,34,34,34,42,42,42,12,12,12,0 } + }; + + unsigned tables, scfsi = 0; + int main_data_begin, part_23_sum = 0; + int sr_idx = HDR_GET_MY_SAMPLE_RATE(hdr); sr_idx -= (sr_idx != 0); + int gr_count = HDR_IS_MONO(hdr) ? 1 : 2; + + if (HDR_TEST_MPEG1(hdr)) + { + gr_count *= 2; + main_data_begin = get_bits(bs, 9); + scfsi = get_bits(bs, 7 + gr_count); + } else + { + main_data_begin = get_bits(bs, 8 + gr_count) >> gr_count; + } + + do + { + if (HDR_IS_MONO(hdr)) + { + scfsi <<= 4; + } + gr->part_23_length = (uint16_t)get_bits(bs, 12); + part_23_sum += gr->part_23_length; + gr->big_values = (uint16_t)get_bits(bs, 9); + if (gr->big_values > 288) + { + return -1; + } + gr->global_gain = (uint8_t)get_bits(bs, 8); + gr->scalefac_compress = (uint16_t)get_bits(bs, HDR_TEST_MPEG1(hdr) ? 4 : 9); + gr->sfbtab = g_scf_long[sr_idx]; + gr->n_long_sfb = 22; + gr->n_short_sfb = 0; + if (get_bits(bs, 1)) + { + gr->block_type = (uint8_t)get_bits(bs, 2); + if (!gr->block_type) + { + return -1; + } + gr->mixed_block_flag = (uint8_t)get_bits(bs, 1); + gr->region_count[0] = 7; + gr->region_count[1] = 255; + if (gr->block_type == SHORT_BLOCK_TYPE) + { + scfsi &= 0x0F0F; + if (!gr->mixed_block_flag) + { + gr->region_count[0] = 8; + gr->sfbtab = g_scf_short[sr_idx]; + gr->n_long_sfb = 0; + gr->n_short_sfb = 39; + } else + { + gr->sfbtab = g_scf_mixed[sr_idx]; + gr->n_long_sfb = HDR_TEST_MPEG1(hdr) ? 8 : 6; + gr->n_short_sfb = 30; + } + } + tables = get_bits(bs, 10); + tables <<= 5; + gr->subblock_gain[0] = (uint8_t)get_bits(bs, 3); + gr->subblock_gain[1] = (uint8_t)get_bits(bs, 3); + gr->subblock_gain[2] = (uint8_t)get_bits(bs, 3); + } else + { + gr->block_type = 0; + gr->mixed_block_flag = 0; + tables = get_bits(bs, 15); + gr->region_count[0] = (uint8_t)get_bits(bs, 4); + gr->region_count[1] = (uint8_t)get_bits(bs, 3); + gr->region_count[2] = 255; + } + gr->table_select[0] = (uint8_t)(tables >> 10); + gr->table_select[1] = (uint8_t)((tables >> 5) & 31); + gr->table_select[2] = (uint8_t)((tables) & 31); + gr->preflag = HDR_TEST_MPEG1(hdr) ? get_bits(bs, 1) : (gr->scalefac_compress >= 500); + gr->scalefac_scale = (uint8_t)get_bits(bs, 1); + gr->count1_table = (uint8_t)get_bits(bs, 1); + gr->scfsi = (uint8_t)((scfsi >> 12) & 15); + scfsi <<= 4; + gr++; + } while(--gr_count); + + if (part_23_sum + bs->pos > bs->limit + main_data_begin*8) + { + return -1; + } + + return main_data_begin; +} + +static void L3_read_scalefactors(uint8_t *scf, uint8_t *ist_pos, const uint8_t *scf_size, const uint8_t *scf_count, bs_t *bitbuf, int scfsi) +{ + int i, k; + for (i = 0; i < 4 && scf_count[i]; i++, scfsi *= 2) + { + int cnt = scf_count[i]; + if (scfsi & 8) + { + memcpy(scf, ist_pos, cnt); + } else + { + int bits = scf_size[i]; + if (!bits) + { + memset(scf, 0, cnt); + memset(ist_pos, 0, cnt); + } else + { + int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1; + for (k = 0; k < cnt; k++) + { + int s = get_bits(bitbuf, bits); + ist_pos[k] = (s == max_scf ? -1 : s); + scf[k] = s; + } + } + } + ist_pos += cnt; + scf += cnt; + } + scf[0] = scf[1] = scf[2] = 0; +} + +static float L3_ldexp_q2(float y, int exp_q2) +{ + static const float g_expfrac[4] = { 9.31322575e-10f,7.83145814e-10f,6.58544508e-10f,5.53767716e-10f }; + int e; + do + { + e = MINIMP3_MIN(30*4, exp_q2); + y *= g_expfrac[e & 3]*(1 << 30 >> (e >> 2)); + } while ((exp_q2 -= e) > 0); + return y; +} + +static void L3_decode_scalefactors(const uint8_t *hdr, uint8_t *ist_pos, bs_t *bs, const L3_gr_info_t *gr, float *scf, int ch) +{ + static const uint8_t g_scf_partitions[3][28] = { + { 6,5,5, 5,6,5,5,5,6,5, 7,3,11,10,0,0, 7, 7, 7,0, 6, 6,6,3, 8, 8,5,0 }, + { 8,9,6,12,6,9,9,9,6,9,12,6,15,18,0,0, 6,15,12,0, 6,12,9,6, 6,18,9,0 }, + { 9,9,6,12,9,9,9,9,9,9,12,6,18,18,0,0,12,12,12,0,12, 9,9,6,15,12,9,0 } + }; + const uint8_t *scf_partition = g_scf_partitions[!!gr->n_short_sfb + !gr->n_long_sfb]; + uint8_t scf_size[4], iscf[40]; + int i, scf_shift = gr->scalefac_scale + 1, gain_exp, scfsi = gr->scfsi; + float gain; + + if (HDR_TEST_MPEG1(hdr)) + { + static const uint8_t g_scfc_decode[16] = { 0,1,2,3, 12,5,6,7, 9,10,11,13, 14,15,18,19 }; + int part = g_scfc_decode[gr->scalefac_compress]; + scf_size[1] = scf_size[0] = (uint8_t)(part >> 2); + scf_size[3] = scf_size[2] = (uint8_t)(part & 3); + } else + { + static const uint8_t g_mod[6*4] = { 5,5,4,4,5,5,4,1,4,3,1,1,5,6,6,1,4,4,4,1,4,3,1,1 }; + int k, modprod, sfc, ist = HDR_TEST_I_STEREO(hdr) && ch; + sfc = gr->scalefac_compress >> ist; + for (k = ist*3*4; sfc >= 0; sfc -= modprod, k += 4) + { + for (modprod = 1, i = 3; i >= 0; i--) + { + scf_size[i] = (uint8_t)(sfc / modprod % g_mod[k + i]); + modprod *= g_mod[k + i]; + } + } + scf_partition += k; + scfsi = -16; + } + L3_read_scalefactors(iscf, ist_pos, scf_size, scf_partition, bs, scfsi); + + if (gr->n_short_sfb) + { + int sh = 3 - scf_shift; + for (i = 0; i < gr->n_short_sfb; i += 3) + { + iscf[gr->n_long_sfb + i + 0] += gr->subblock_gain[0] << sh; + iscf[gr->n_long_sfb + i + 1] += gr->subblock_gain[1] << sh; + iscf[gr->n_long_sfb + i + 2] += gr->subblock_gain[2] << sh; + } + } else if (gr->preflag) + { + static const uint8_t g_preamp[10] = { 1,1,1,1,2,2,3,3,3,2 }; + for (i = 0; i < 10; i++) + { + iscf[11 + i] += g_preamp[i]; + } + } + + gain_exp = gr->global_gain + BITS_DEQUANTIZER_OUT*4 - 210 - (HDR_IS_MS_STEREO(hdr) ? 2 : 0); + gain = L3_ldexp_q2(1 << (MAX_SCFI/4), MAX_SCFI - gain_exp); + for (i = 0; i < (int)(gr->n_long_sfb + gr->n_short_sfb); i++) + { + scf[i] = L3_ldexp_q2(gain, iscf[i] << scf_shift); + } +} + +static const float g_pow43[129 + 16] = { + 0,-1,-2.519842f,-4.326749f,-6.349604f,-8.549880f,-10.902724f,-13.390518f,-16.000000f,-18.720754f,-21.544347f,-24.463781f,-27.473142f,-30.567351f,-33.741992f,-36.993181f, + 0,1,2.519842f,4.326749f,6.349604f,8.549880f,10.902724f,13.390518f,16.000000f,18.720754f,21.544347f,24.463781f,27.473142f,30.567351f,33.741992f,36.993181f,40.317474f,43.711787f,47.173345f,50.699631f,54.288352f,57.937408f,61.644865f,65.408941f,69.227979f,73.100443f,77.024898f,81.000000f,85.024491f,89.097188f,93.216975f,97.382800f,101.593667f,105.848633f,110.146801f,114.487321f,118.869381f,123.292209f,127.755065f,132.257246f,136.798076f,141.376907f,145.993119f,150.646117f,155.335327f,160.060199f,164.820202f,169.614826f,174.443577f,179.305980f,184.201575f,189.129918f,194.090580f,199.083145f,204.107210f,209.162385f,214.248292f,219.364564f,224.510845f,229.686789f,234.892058f,240.126328f,245.389280f,250.680604f,256.000000f,261.347174f,266.721841f,272.123723f,277.552547f,283.008049f,288.489971f,293.998060f,299.532071f,305.091761f,310.676898f,316.287249f,321.922592f,327.582707f,333.267377f,338.976394f,344.709550f,350.466646f,356.247482f,362.051866f,367.879608f,373.730522f,379.604427f,385.501143f,391.420496f,397.362314f,403.326427f,409.312672f,415.320884f,421.350905f,427.402579f,433.475750f,439.570269f,445.685987f,451.822757f,457.980436f,464.158883f,470.357960f,476.577530f,482.817459f,489.077615f,495.357868f,501.658090f,507.978156f,514.317941f,520.677324f,527.056184f,533.454404f,539.871867f,546.308458f,552.764065f,559.238575f,565.731879f,572.243870f,578.774440f,585.323483f,591.890898f,598.476581f,605.080431f,611.702349f,618.342238f,625.000000f,631.675540f,638.368763f,645.079578f +}; + +static float L3_pow_43(int x) +{ + float frac; + int sign, mult = 256; + + if (x < 129) + { + return g_pow43[16 + x]; + } + + if (x < 1024) + { + mult = 16; + x <<= 3; + } + + sign = 2*x & 64; + frac = (float)((x & 63) - sign) / ((x & ~63) + sign); + return g_pow43[16 + ((x + sign) >> 6)]*(1.f + frac*((4.f/3) + frac*(2.f/9)))*mult; +} + +static void L3_huffman(float *dst, bs_t *bs, const L3_gr_info_t *gr_info, const float *scf, int layer3gr_limit) +{ + static const int16_t tabs[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 785,785,785,785,784,784,784,784,513,513,513,513,513,513,513,513,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256, + -255,1313,1298,1282,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,290,288, + -255,1313,1298,1282,769,769,769,769,529,529,529,529,529,529,529,529,528,528,528,528,528,528,528,528,512,512,512,512,512,512,512,512,290,288, + -253,-318,-351,-367,785,785,785,785,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,819,818,547,547,275,275,275,275,561,560,515,546,289,274,288,258, + -254,-287,1329,1299,1314,1312,1057,1057,1042,1042,1026,1026,784,784,784,784,529,529,529,529,529,529,529,529,769,769,769,769,768,768,768,768,563,560,306,306,291,259, + -252,-413,-477,-542,1298,-575,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-383,-399,1107,1092,1106,1061,849,849,789,789,1104,1091,773,773,1076,1075,341,340,325,309,834,804,577,577,532,532,516,516,832,818,803,816,561,561,531,531,515,546,289,289,288,258, + -252,-429,-493,-559,1057,1057,1042,1042,529,529,529,529,529,529,529,529,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,-382,1077,-415,1106,1061,1104,849,849,789,789,1091,1076,1029,1075,834,834,597,581,340,340,339,324,804,833,532,532,832,772,818,803,817,787,816,771,290,290,290,290,288,258, + -253,-349,-414,-447,-463,1329,1299,-479,1314,1312,1057,1057,1042,1042,1026,1026,785,785,785,785,784,784,784,784,769,769,769,769,768,768,768,768,-319,851,821,-335,836,850,805,849,341,340,325,336,533,533,579,579,564,564,773,832,578,548,563,516,321,276,306,291,304,259, + -251,-572,-733,-830,-863,-879,1041,1041,784,784,784,784,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,1396,1351,1381,1366,1395,1335,1380,-559,1334,1138,1138,1063,1063,1350,1392,1031,1031,1062,1062,1364,1363,1120,1120,1333,1348,881,881,881,881,375,374,359,373,343,358,341,325,791,791,1123,1122,-703,1105,1045,-719,865,865,790,790,774,774,1104,1029,338,293,323,308,-799,-815,833,788,772,818,803,816,322,292,307,320,561,531,515,546,289,274,288,258, + -251,-525,-605,-685,-765,-831,-846,1298,1057,1057,1312,1282,785,785,785,785,784,784,784,784,769,769,769,769,512,512,512,512,512,512,512,512,1399,1398,1383,1367,1382,1396,1351,-511,1381,1366,1139,1139,1079,1079,1124,1124,1364,1349,1363,1333,882,882,882,882,807,807,807,807,1094,1094,1136,1136,373,341,535,535,881,775,867,822,774,-591,324,338,-671,849,550,550,866,864,609,609,293,336,534,534,789,835,773,-751,834,804,308,307,833,788,832,772,562,562,547,547,305,275,560,515,290,290, + -252,-397,-477,-557,-622,-653,-719,-735,-750,1329,1299,1314,1057,1057,1042,1042,1312,1282,1024,1024,785,785,785,785,784,784,784,784,769,769,769,769,-383,1127,1141,1111,1126,1140,1095,1110,869,869,883,883,1079,1109,882,882,375,374,807,868,838,881,791,-463,867,822,368,263,852,837,836,-543,610,610,550,550,352,336,534,534,865,774,851,821,850,805,593,533,579,564,773,832,578,578,548,548,577,577,307,276,306,291,516,560,259,259, + -250,-2107,-2507,-2764,-2909,-2974,-3007,-3023,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-767,-1052,-1213,-1277,-1358,-1405,-1469,-1535,-1550,-1582,-1614,-1647,-1662,-1694,-1726,-1759,-1774,-1807,-1822,-1854,-1886,1565,-1919,-1935,-1951,-1967,1731,1730,1580,1717,-1983,1729,1564,-1999,1548,-2015,-2031,1715,1595,-2047,1714,-2063,1610,-2079,1609,-2095,1323,1323,1457,1457,1307,1307,1712,1547,1641,1700,1699,1594,1685,1625,1442,1442,1322,1322,-780,-973,-910,1279,1278,1277,1262,1276,1261,1275,1215,1260,1229,-959,974,974,989,989,-943,735,478,478,495,463,506,414,-1039,1003,958,1017,927,942,987,957,431,476,1272,1167,1228,-1183,1256,-1199,895,895,941,941,1242,1227,1212,1135,1014,1014,490,489,503,487,910,1013,985,925,863,894,970,955,1012,847,-1343,831,755,755,984,909,428,366,754,559,-1391,752,486,457,924,997,698,698,983,893,740,740,908,877,739,739,667,667,953,938,497,287,271,271,683,606,590,712,726,574,302,302,738,736,481,286,526,725,605,711,636,724,696,651,589,681,666,710,364,467,573,695,466,466,301,465,379,379,709,604,665,679,316,316,634,633,436,436,464,269,424,394,452,332,438,363,347,408,393,448,331,422,362,407,392,421,346,406,391,376,375,359,1441,1306,-2367,1290,-2383,1337,-2399,-2415,1426,1321,-2431,1411,1336,-2447,-2463,-2479,1169,1169,1049,1049,1424,1289,1412,1352,1319,-2495,1154,1154,1064,1064,1153,1153,416,390,360,404,403,389,344,374,373,343,358,372,327,357,342,311,356,326,1395,1394,1137,1137,1047,1047,1365,1392,1287,1379,1334,1364,1349,1378,1318,1363,792,792,792,792,1152,1152,1032,1032,1121,1121,1046,1046,1120,1120,1030,1030,-2895,1106,1061,1104,849,849,789,789,1091,1076,1029,1090,1060,1075,833,833,309,324,532,532,832,772,818,803,561,561,531,560,515,546,289,274,288,258, + -250,-1179,-1579,-1836,-1996,-2124,-2253,-2333,-2413,-2477,-2542,-2574,-2607,-2622,-2655,1314,1313,1298,1312,1282,785,785,785,785,1040,1040,1025,1025,768,768,768,768,-766,-798,-830,-862,-895,-911,-927,-943,-959,-975,-991,-1007,-1023,-1039,-1055,-1070,1724,1647,-1103,-1119,1631,1767,1662,1738,1708,1723,-1135,1780,1615,1779,1599,1677,1646,1778,1583,-1151,1777,1567,1737,1692,1765,1722,1707,1630,1751,1661,1764,1614,1736,1676,1763,1750,1645,1598,1721,1691,1762,1706,1582,1761,1566,-1167,1749,1629,767,766,751,765,494,494,735,764,719,749,734,763,447,447,748,718,477,506,431,491,446,476,461,505,415,430,475,445,504,399,460,489,414,503,383,474,429,459,502,502,746,752,488,398,501,473,413,472,486,271,480,270,-1439,-1455,1357,-1471,-1487,-1503,1341,1325,-1519,1489,1463,1403,1309,-1535,1372,1448,1418,1476,1356,1462,1387,-1551,1475,1340,1447,1402,1386,-1567,1068,1068,1474,1461,455,380,468,440,395,425,410,454,364,467,466,464,453,269,409,448,268,432,1371,1473,1432,1417,1308,1460,1355,1446,1459,1431,1083,1083,1401,1416,1458,1445,1067,1067,1370,1457,1051,1051,1291,1430,1385,1444,1354,1415,1400,1443,1082,1082,1173,1113,1186,1066,1185,1050,-1967,1158,1128,1172,1097,1171,1081,-1983,1157,1112,416,266,375,400,1170,1142,1127,1065,793,793,1169,1033,1156,1096,1141,1111,1155,1080,1126,1140,898,898,808,808,897,897,792,792,1095,1152,1032,1125,1110,1139,1079,1124,882,807,838,881,853,791,-2319,867,368,263,822,852,837,866,806,865,-2399,851,352,262,534,534,821,836,594,594,549,549,593,593,533,533,848,773,579,579,564,578,548,563,276,276,577,576,306,291,516,560,305,305,275,259, + -251,-892,-2058,-2620,-2828,-2957,-3023,-3039,1041,1041,1040,1040,769,769,769,769,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,-511,-527,-543,-559,1530,-575,-591,1528,1527,1407,1526,1391,1023,1023,1023,1023,1525,1375,1268,1268,1103,1103,1087,1087,1039,1039,1523,-604,815,815,815,815,510,495,509,479,508,463,507,447,431,505,415,399,-734,-782,1262,-815,1259,1244,-831,1258,1228,-847,-863,1196,-879,1253,987,987,748,-767,493,493,462,477,414,414,686,669,478,446,461,445,474,429,487,458,412,471,1266,1264,1009,1009,799,799,-1019,-1276,-1452,-1581,-1677,-1757,-1821,-1886,-1933,-1997,1257,1257,1483,1468,1512,1422,1497,1406,1467,1496,1421,1510,1134,1134,1225,1225,1466,1451,1374,1405,1252,1252,1358,1480,1164,1164,1251,1251,1238,1238,1389,1465,-1407,1054,1101,-1423,1207,-1439,830,830,1248,1038,1237,1117,1223,1148,1236,1208,411,426,395,410,379,269,1193,1222,1132,1235,1221,1116,976,976,1192,1162,1177,1220,1131,1191,963,963,-1647,961,780,-1663,558,558,994,993,437,408,393,407,829,978,813,797,947,-1743,721,721,377,392,844,950,828,890,706,706,812,859,796,960,948,843,934,874,571,571,-1919,690,555,689,421,346,539,539,944,779,918,873,932,842,903,888,570,570,931,917,674,674,-2575,1562,-2591,1609,-2607,1654,1322,1322,1441,1441,1696,1546,1683,1593,1669,1624,1426,1426,1321,1321,1639,1680,1425,1425,1305,1305,1545,1668,1608,1623,1667,1592,1638,1666,1320,1320,1652,1607,1409,1409,1304,1304,1288,1288,1664,1637,1395,1395,1335,1335,1622,1636,1394,1394,1319,1319,1606,1621,1392,1392,1137,1137,1137,1137,345,390,360,375,404,373,1047,-2751,-2767,-2783,1062,1121,1046,-2799,1077,-2815,1106,1061,789,789,1105,1104,263,355,310,340,325,354,352,262,339,324,1091,1076,1029,1090,1060,1075,833,833,788,788,1088,1028,818,818,803,803,561,561,531,531,816,771,546,546,289,274,288,258, + -253,-317,-381,-446,-478,-509,1279,1279,-811,-1179,-1451,-1756,-1900,-2028,-2189,-2253,-2333,-2414,-2445,-2511,-2526,1313,1298,-2559,1041,1041,1040,1040,1025,1025,1024,1024,1022,1007,1021,991,1020,975,1019,959,687,687,1018,1017,671,671,655,655,1016,1015,639,639,758,758,623,623,757,607,756,591,755,575,754,559,543,543,1009,783,-575,-621,-685,-749,496,-590,750,749,734,748,974,989,1003,958,988,973,1002,942,987,957,972,1001,926,986,941,971,956,1000,910,985,925,999,894,970,-1071,-1087,-1102,1390,-1135,1436,1509,1451,1374,-1151,1405,1358,1480,1420,-1167,1507,1494,1389,1342,1465,1435,1450,1326,1505,1310,1493,1373,1479,1404,1492,1464,1419,428,443,472,397,736,526,464,464,486,457,442,471,484,482,1357,1449,1434,1478,1388,1491,1341,1490,1325,1489,1463,1403,1309,1477,1372,1448,1418,1433,1476,1356,1462,1387,-1439,1475,1340,1447,1402,1474,1324,1461,1371,1473,269,448,1432,1417,1308,1460,-1711,1459,-1727,1441,1099,1099,1446,1386,1431,1401,-1743,1289,1083,1083,1160,1160,1458,1445,1067,1067,1370,1457,1307,1430,1129,1129,1098,1098,268,432,267,416,266,400,-1887,1144,1187,1082,1173,1113,1186,1066,1050,1158,1128,1143,1172,1097,1171,1081,420,391,1157,1112,1170,1142,1127,1065,1169,1049,1156,1096,1141,1111,1155,1080,1126,1154,1064,1153,1140,1095,1048,-2159,1125,1110,1137,-2175,823,823,1139,1138,807,807,384,264,368,263,868,838,853,791,867,822,852,837,866,806,865,790,-2319,851,821,836,352,262,850,805,849,-2399,533,533,835,820,336,261,578,548,563,577,532,532,832,772,562,562,547,547,305,275,560,515,290,290,288,258 }; + static const uint8_t tab32[] = { 130,162,193,209,44,28,76,140,9,9,9,9,9,9,9,9,190,254,222,238,126,94,157,157,109,61,173,205 }; + static const uint8_t tab33[] = { 252,236,220,204,188,172,156,140,124,108,92,76,60,44,28,12 }; + static const int16_t tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 }; + static const uint8_t g_linbits[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 }; + +#define PEEK_BITS(n) (bs_cache >> (32 - n)) +#define FLUSH_BITS(n) { bs_cache <<= (n); bs_sh += (n); } +#define CHECK_BITS while (bs_sh >= 0) { bs_cache |= (uint32_t)*bs_next_ptr++ << bs_sh; bs_sh -= 8; } +#define BSPOS ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh) + + float one = 0.0f; + int ireg = 0, big_val_cnt = gr_info->big_values; + const uint8_t *sfb = gr_info->sfbtab; + const uint8_t *bs_next_ptr = bs->buf + bs->pos/8; + uint32_t bs_cache = (((bs_next_ptr[0]*256u + bs_next_ptr[1])*256u + bs_next_ptr[2])*256u + bs_next_ptr[3]) << (bs->pos & 7); + int pairs_to_decode, np, bs_sh = (bs->pos & 7) - 8; + bs_next_ptr += 4; + + while (big_val_cnt > 0) + { + int tab_num = gr_info->table_select[ireg]; + int sfb_cnt = gr_info->region_count[ireg++]; + const int16_t *codebook = tabs + tabindex[tab_num]; + int linbits = g_linbits[tab_num]; + if (linbits) + { + do + { + np = *sfb++ / 2; + pairs_to_decode = MINIMP3_MIN(big_val_cnt, np); + one = *scf++; + do + { + int j, w = 5; + int leaf = codebook[PEEK_BITS(w)]; + while (leaf < 0) + { + FLUSH_BITS(w); + w = leaf & 7; + leaf = codebook[PEEK_BITS(w) - (leaf >> 3)]; + } + FLUSH_BITS(leaf >> 8); + + for (j = 0; j < 2; j++, dst++, leaf >>= 4) + { + int lsb = leaf & 0x0F; + if (lsb == 15) + { + lsb += PEEK_BITS(linbits); + FLUSH_BITS(linbits); + CHECK_BITS; + *dst = one*L3_pow_43(lsb)*((int32_t)bs_cache < 0 ? -1: 1); + } else + { + *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one; + } + FLUSH_BITS(lsb ? 1 : 0); + } + CHECK_BITS; + } while (--pairs_to_decode); + } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0); + } else + { + do + { + np = *sfb++ / 2; + pairs_to_decode = MINIMP3_MIN(big_val_cnt, np); + one = *scf++; + do + { + int j, w = 5; + int leaf = codebook[PEEK_BITS(w)]; + while (leaf < 0) + { + FLUSH_BITS(w); + w = leaf & 7; + leaf = codebook[PEEK_BITS(w) - (leaf >> 3)]; + } + FLUSH_BITS(leaf >> 8); + + for (j = 0; j < 2; j++, dst++, leaf >>= 4) + { + int lsb = leaf & 0x0F; + *dst = g_pow43[16 + lsb - 16*(bs_cache >> 31)]*one; + FLUSH_BITS(lsb ? 1 : 0); + } + CHECK_BITS; + } while (--pairs_to_decode); + } while ((big_val_cnt -= np) > 0 && --sfb_cnt >= 0); + } + } + + for (np = 1 - big_val_cnt;; dst += 4) + { + const uint8_t *codebook_count1 = (gr_info->count1_table) ? tab33 : tab32; + int leaf = codebook_count1[PEEK_BITS(4)]; + if (!(leaf & 8)) + { + leaf = codebook_count1[(leaf >> 3) + (bs_cache << 4 >> (32 - (leaf & 3)))]; + } + FLUSH_BITS(leaf & 7); + if (BSPOS > layer3gr_limit) + { + break; + } +#define RELOAD_SCALEFACTOR if (!--np) { np = *sfb++/2; if (!np) break; one = *scf++; } +#define DEQ_COUNT1(s) if (leaf & (128 >> s)) { dst[s] = ((int32_t)bs_cache < 0) ? -one : one; FLUSH_BITS(1) } + RELOAD_SCALEFACTOR; + DEQ_COUNT1(0); + DEQ_COUNT1(1); + RELOAD_SCALEFACTOR; + DEQ_COUNT1(2); + DEQ_COUNT1(3); + CHECK_BITS; + } + + bs->pos = layer3gr_limit; +} + +static void L3_midside_stereo(float *left, int n) +{ + int i = 0; + float *right = left + 576; +#if HAVE_SIMD + if (have_simd()) for (; i < n - 3; i += 4) + { + f4 vl = VLD(left + i); + f4 vr = VLD(right + i); + VSTORE(left + i, VADD(vl, vr)); + VSTORE(right + i, VSUB(vl, vr)); + } +#endif /* HAVE_SIMD */ + for (; i < n; i++) + { + float a = left[i]; + float b = right[i]; + left[i] = a + b; + right[i] = a - b; + } +} + +static void L3_intensity_stereo_band(float *left, int n, float kl, float kr) +{ + int i; + for (i = 0; i < n; i++) + { + left[i + 576] = left[i]*kr; + left[i] = left[i]*kl; + } +} + +static void L3_stereo_top_band(const float *right, const uint8_t *sfb, int nbands, int max_band[3]) +{ + int i, k; + + max_band[0] = max_band[1] = max_band[2] = -1; + + for (i = 0; i < nbands; i++) + { + for (k = 0; k < sfb[i]; k += 2) + { + if (right[k] != 0 || right[k + 1] != 0) + { + max_band[i % 3] = i; + break; + } + } + right += sfb[i]; + } +} + +static void L3_stereo_process(float *left, const uint8_t *ist_pos, const uint8_t *sfb, const uint8_t *hdr, int max_band[3], int mpeg2_sh) +{ + static const float g_pan[7*2] = { 0,1,0.21132487f,0.78867513f,0.36602540f,0.63397460f,0.5f,0.5f,0.63397460f,0.36602540f,0.78867513f,0.21132487f,1,0 }; + unsigned i, max_pos = HDR_TEST_MPEG1(hdr) ? 7 : 64; + + for (i = 0; sfb[i]; i++) + { + unsigned ipos = ist_pos[i]; + if ((int)i > max_band[i % 3] && ipos < max_pos) + { + float kl, kr, s = HDR_TEST_MS_STEREO(hdr) ? 1.41421356f : 1; + if (HDR_TEST_MPEG1(hdr)) + { + kl = g_pan[2*ipos]; + kr = g_pan[2*ipos + 1]; + } else + { + kl = 1; + kr = L3_ldexp_q2(1, (ipos + 1) >> 1 << mpeg2_sh); + if (ipos & 1) + { + kl = kr; + kr = 1; + } + } + L3_intensity_stereo_band(left, sfb[i], kl*s, kr*s); + } else if (HDR_TEST_MS_STEREO(hdr)) + { + L3_midside_stereo(left, sfb[i]); + } + left += sfb[i]; + } +} + +static void L3_intensity_stereo(float *left, uint8_t *ist_pos, const L3_gr_info_t *gr, const uint8_t *hdr) +{ + int max_band[3], n_sfb = gr->n_long_sfb + gr->n_short_sfb; + int i, max_blocks = gr->n_short_sfb ? 3 : 1; + + L3_stereo_top_band(left + 576, gr->sfbtab, n_sfb, max_band); + if (gr->n_long_sfb) + { + max_band[0] = max_band[1] = max_band[2] = MINIMP3_MAX(MINIMP3_MAX(max_band[0], max_band[1]), max_band[2]); + } + for (i = 0; i < max_blocks; i++) + { + int default_pos = HDR_TEST_MPEG1(hdr) ? 3 : 0; + int itop = n_sfb - max_blocks + i; + int prev = itop - max_blocks; + ist_pos[itop] = max_band[i] >= prev ? default_pos : ist_pos[prev]; + } + L3_stereo_process(left, ist_pos, gr->sfbtab, hdr, max_band, gr[1].scalefac_compress & 1); +} + +static void L3_reorder(float *grbuf, float *scratch, const uint8_t *sfb) +{ + int i, len; + float *src = grbuf, *dst = scratch; + + for (;0 != (len = *sfb); sfb += 3, src += 2*len) + { + for (i = 0; i < len; i++, src++) + { + *dst++ = src[0*len]; + *dst++ = src[1*len]; + *dst++ = src[2*len]; + } + } + memcpy(grbuf, scratch, (dst - scratch)*sizeof(float)); +} + +static void L3_antialias(float *grbuf, int nbands) +{ + static const float g_aa[2][8] = { + {0.85749293f,0.88174200f,0.94962865f,0.98331459f,0.99551782f,0.99916056f,0.99989920f,0.99999316f}, + {0.51449576f,0.47173197f,0.31337745f,0.18191320f,0.09457419f,0.04096558f,0.01419856f,0.00369997f} + }; + + for (; nbands > 0; nbands--, grbuf += 18) + { + int i = 0; +#if HAVE_SIMD + if (have_simd()) for (; i < 8; i += 4) + { + f4 vu = VLD(grbuf + 18 + i); + f4 vd = VLD(grbuf + 14 - i); + f4 vc0 = VLD(g_aa[0] + i); + f4 vc1 = VLD(g_aa[1] + i); + vd = VREV(vd); + VSTORE(grbuf + 18 + i, VSUB(VMUL(vu, vc0), VMUL(vd, vc1))); + vd = VADD(VMUL(vu, vc1), VMUL(vd, vc0)); + VSTORE(grbuf + 14 - i, VREV(vd)); + } +#endif /* HAVE_SIMD */ +#ifndef MINIMP3_ONLY_SIMD + for(; i < 8; i++) + { + float u = grbuf[18 + i]; + float d = grbuf[17 - i]; + grbuf[18 + i] = u*g_aa[0][i] - d*g_aa[1][i]; + grbuf[17 - i] = u*g_aa[1][i] + d*g_aa[0][i]; + } +#endif /* MINIMP3_ONLY_SIMD */ + } +} + +static void L3_dct3_9(float *y) +{ + float s0, s1, s2, s3, s4, s5, s6, s7, s8, t0, t2, t4; + + s0 = y[0]; s2 = y[2]; s4 = y[4]; s6 = y[6]; s8 = y[8]; + t0 = s0 + s6*0.5f; + s0 -= s6; + t4 = (s4 + s2)*0.93969262f; + t2 = (s8 + s2)*0.76604444f; + s6 = (s4 - s8)*0.17364818f; + s4 += s8 - s2; + + s2 = s0 - s4*0.5f; + y[4] = s4 + s0; + s8 = t0 - t2 + s6; + s0 = t0 - t4 + t2; + s4 = t0 + t4 - s6; + + s1 = y[1]; s3 = y[3]; s5 = y[5]; s7 = y[7]; + + s3 *= 0.86602540f; + t0 = (s5 + s1)*0.98480775f; + t4 = (s5 - s7)*0.34202014f; + t2 = (s1 + s7)*0.64278761f; + s1 = (s1 - s5 - s7)*0.86602540f; + + s5 = t0 - s3 - t2; + s7 = t4 - s3 - t0; + s3 = t4 + s3 - t2; + + y[0] = s4 - s7; + y[1] = s2 + s1; + y[2] = s0 - s3; + y[3] = s8 + s5; + y[5] = s8 - s5; + y[6] = s0 + s3; + y[7] = s2 - s1; + y[8] = s4 + s7; +} + +static void L3_imdct36(float *grbuf, float *overlap, const float *window, int nbands) +{ + int i, j; + static const float g_twid9[18] = { + 0.73727734f,0.79335334f,0.84339145f,0.88701083f,0.92387953f,0.95371695f,0.97629601f,0.99144486f,0.99904822f,0.67559021f,0.60876143f,0.53729961f,0.46174861f,0.38268343f,0.30070580f,0.21643961f,0.13052619f,0.04361938f + }; + + for (j = 0; j < nbands; j++, grbuf += 18, overlap += 9) + { + float co[9], si[9]; + co[0] = -grbuf[0]; + si[0] = grbuf[17]; + for (i = 0; i < 4; i++) + { + si[8 - 2*i] = grbuf[4*i + 1] - grbuf[4*i + 2]; + co[1 + 2*i] = grbuf[4*i + 1] + grbuf[4*i + 2]; + si[7 - 2*i] = grbuf[4*i + 4] - grbuf[4*i + 3]; + co[2 + 2*i] = -(grbuf[4*i + 3] + grbuf[4*i + 4]); + } + L3_dct3_9(co); + L3_dct3_9(si); + + si[1] = -si[1]; + si[3] = -si[3]; + si[5] = -si[5]; + si[7] = -si[7]; + + i = 0; + +#if HAVE_SIMD + if (have_simd()) for (; i < 8; i += 4) + { + f4 vovl = VLD(overlap + i); + f4 vc = VLD(co + i); + f4 vs = VLD(si + i); + f4 vr0 = VLD(g_twid9 + i); + f4 vr1 = VLD(g_twid9 + 9 + i); + f4 vw0 = VLD(window + i); + f4 vw1 = VLD(window + 9 + i); + f4 vsum = VADD(VMUL(vc, vr1), VMUL(vs, vr0)); + VSTORE(overlap + i, VSUB(VMUL(vc, vr0), VMUL(vs, vr1))); + VSTORE(grbuf + i, VSUB(VMUL(vovl, vw0), VMUL(vsum, vw1))); + vsum = VADD(VMUL(vovl, vw1), VMUL(vsum, vw0)); + VSTORE(grbuf + 14 - i, VREV(vsum)); + } +#endif /* HAVE_SIMD */ + for (; i < 9; i++) + { + float ovl = overlap[i]; + float sum = co[i]*g_twid9[9 + i] + si[i]*g_twid9[0 + i]; + overlap[i] = co[i]*g_twid9[0 + i] - si[i]*g_twid9[9 + i]; + grbuf[i] = ovl*window[0 + i] - sum*window[9 + i]; + grbuf[17 - i] = ovl*window[9 + i] + sum*window[0 + i]; + } + } +} + +static void L3_idct3(float x0, float x1, float x2, float *dst) +{ + float m1 = x1*0.86602540f; + float a1 = x0 - x2*0.5f; + dst[1] = x0 + x2; + dst[0] = a1 + m1; + dst[2] = a1 - m1; +} + +static void L3_imdct12(float *x, float *dst, float *overlap) +{ + static const float g_twid3[6] = { 0.79335334f,0.92387953f,0.99144486f, 0.60876143f,0.38268343f,0.13052619f }; + float co[3], si[3]; + int i; + + L3_idct3(-x[0], x[6] + x[3], x[12] + x[9], co); + L3_idct3(x[15], x[12] - x[9], x[6] - x[3], si); + si[1] = -si[1]; + + for (i = 0; i < 3; i++) + { + float ovl = overlap[i]; + float sum = co[i]*g_twid3[3 + i] + si[i]*g_twid3[0 + i]; + overlap[i] = co[i]*g_twid3[0 + i] - si[i]*g_twid3[3 + i]; + dst[i] = ovl*g_twid3[2 - i] - sum*g_twid3[5 - i]; + dst[5 - i] = ovl*g_twid3[5 - i] + sum*g_twid3[2 - i]; + } +} + +static void L3_imdct_short(float *grbuf, float *overlap, int nbands) +{ + for (;nbands > 0; nbands--, overlap += 9, grbuf += 18) + { + float tmp[18]; + memcpy(tmp, grbuf, sizeof(tmp)); + memcpy(grbuf, overlap, 6*sizeof(float)); + L3_imdct12(tmp, grbuf + 6, overlap + 6); + L3_imdct12(tmp + 1, grbuf + 12, overlap + 6); + L3_imdct12(tmp + 2, overlap, overlap + 6); + } +} + +static void L3_change_sign(float *grbuf) +{ + int b, i; + for (b = 0, grbuf += 18; b < 32; b += 2, grbuf += 36) + for (i = 1; i < 18; i += 2) + grbuf[i] = -grbuf[i]; +} + +static void L3_imdct_gr(float *grbuf, float *overlap, unsigned block_type, unsigned n_long_bands) +{ + static const float g_mdct_window[2][18] = { + { 0.99904822f,0.99144486f,0.97629601f,0.95371695f,0.92387953f,0.88701083f,0.84339145f,0.79335334f,0.73727734f,0.04361938f,0.13052619f,0.21643961f,0.30070580f,0.38268343f,0.46174861f,0.53729961f,0.60876143f,0.67559021f }, + { 1,1,1,1,1,1,0.99144486f,0.92387953f,0.79335334f,0,0,0,0,0,0,0.13052619f,0.38268343f,0.60876143f } + }; + if (n_long_bands) + { + L3_imdct36(grbuf, overlap, g_mdct_window[0], n_long_bands); + grbuf += 18*n_long_bands; + overlap += 9*n_long_bands; + } + if (block_type == SHORT_BLOCK_TYPE) + L3_imdct_short(grbuf, overlap, 32 - n_long_bands); + else + L3_imdct36(grbuf, overlap, g_mdct_window[block_type == STOP_BLOCK_TYPE], 32 - n_long_bands); +} + +static void L3_save_reservoir(mp3dec_t *h, mp3dec_scratch_t *s) +{ + int pos = (s->bs.pos + 7)/8u; + int remains = s->bs.limit/8u - pos; + if (remains > MAX_BITRESERVOIR_BYTES) + { + pos += remains - MAX_BITRESERVOIR_BYTES; + remains = MAX_BITRESERVOIR_BYTES; + } + if (remains > 0) + { + memmove(h->reserv_buf, s->maindata + pos, remains); + } + h->reserv = remains; +} + +static int L3_restore_reservoir(mp3dec_t *h, bs_t *bs, mp3dec_scratch_t *s, int main_data_begin) +{ + int frame_bytes = (bs->limit - bs->pos)/8; + int bytes_have = MINIMP3_MIN(h->reserv, main_data_begin); + memcpy(s->maindata, h->reserv_buf + MINIMP3_MAX(0, h->reserv - main_data_begin), MINIMP3_MIN(h->reserv, main_data_begin)); + memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes); + bs_init(&s->bs, s->maindata, bytes_have + frame_bytes); + return h->reserv >= main_data_begin; +} + +static void L3_decode(mp3dec_t *h, mp3dec_scratch_t *s, L3_gr_info_t *gr_info, int nch) +{ + int ch; + + for (ch = 0; ch < nch; ch++) + { + int layer3gr_limit = s->bs.pos + gr_info[ch].part_23_length; + L3_decode_scalefactors(h->header, s->ist_pos[ch], &s->bs, gr_info + ch, s->scf, ch); + L3_huffman(s->grbuf[ch], &s->bs, gr_info + ch, s->scf, layer3gr_limit); + } + + if (HDR_TEST_I_STEREO(h->header)) + { + L3_intensity_stereo(s->grbuf[0], s->ist_pos[1], gr_info, h->header); + } else if (HDR_IS_MS_STEREO(h->header)) + { + L3_midside_stereo(s->grbuf[0], 576); + } + + for (ch = 0; ch < nch; ch++, gr_info++) + { + int aa_bands = 31; + int n_long_bands = (gr_info->mixed_block_flag ? 2 : 0) << (int)(HDR_GET_MY_SAMPLE_RATE(h->header) == 2); + + if (gr_info->n_short_sfb) + { + aa_bands = n_long_bands - 1; + L3_reorder(s->grbuf[ch] + n_long_bands*18, s->syn[0], gr_info->sfbtab + gr_info->n_long_sfb); + } + + L3_antialias(s->grbuf[ch], aa_bands); + L3_imdct_gr(s->grbuf[ch], h->mdct_overlap[ch], gr_info->block_type, n_long_bands); + L3_change_sign(s->grbuf[ch]); + } +} + +static void mp3d_DCT_II(float *grbuf, int n) +{ + static const float g_sec[24] = { + 10.19000816f,0.50060302f,0.50241929f,3.40760851f,0.50547093f,0.52249861f,2.05778098f,0.51544732f,0.56694406f,1.48416460f,0.53104258f,0.64682180f,1.16943991f,0.55310392f,0.78815460f,0.97256821f,0.58293498f,1.06067765f,0.83934963f,0.62250412f,1.72244716f,0.74453628f,0.67480832f,5.10114861f + }; + int i, k = 0; +#if HAVE_SIMD + if (have_simd()) for (; k < n; k += 4) + { + f4 t[4][8], *x; + float *y = grbuf + k; + + for (x = t[0], i = 0; i < 8; i++, x++) + { + f4 x0 = VLD(&y[i*18]); + f4 x1 = VLD(&y[(15 - i)*18]); + f4 x2 = VLD(&y[(16 + i)*18]); + f4 x3 = VLD(&y[(31 - i)*18]); + f4 t0 = VADD(x0, x3); + f4 t1 = VADD(x1, x2); + f4 t2 = VMUL_S(VSUB(x1, x2), g_sec[3*i + 0]); + f4 t3 = VMUL_S(VSUB(x0, x3), g_sec[3*i + 1]); + x[0] = VADD(t0, t1); + x[8] = VMUL_S(VSUB(t0, t1), g_sec[3*i + 2]); + x[16] = VADD(t3, t2); + x[24] = VMUL_S(VSUB(t3, t2), g_sec[3*i + 2]); + } + for (x = t[0], i = 0; i < 4; i++, x += 8) + { + f4 x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt; + xt = VSUB(x0, x7); x0 = VADD(x0, x7); + x7 = VSUB(x1, x6); x1 = VADD(x1, x6); + x6 = VSUB(x2, x5); x2 = VADD(x2, x5); + x5 = VSUB(x3, x4); x3 = VADD(x3, x4); + x4 = VSUB(x0, x3); x0 = VADD(x0, x3); + x3 = VSUB(x1, x2); x1 = VADD(x1, x2); + x[0] = VADD(x0, x1); + x[4] = VMUL_S(VSUB(x0, x1), 0.70710677f); + x5 = VADD(x5, x6); + x6 = VMUL_S(VADD(x6, x7), 0.70710677f); + x7 = VADD(x7, xt); + x3 = VMUL_S(VADD(x3, x4), 0.70710677f); + x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); /* rotate by PI/8 */ + x7 = VADD(x7, VMUL_S(x5, 0.382683432f)); + x5 = VSUB(x5, VMUL_S(x7, 0.198912367f)); + x0 = VSUB(xt, x6); xt = VADD(xt, x6); + x[1] = VMUL_S(VADD(xt, x7), 0.50979561f); + x[2] = VMUL_S(VADD(x4, x3), 0.54119611f); + x[3] = VMUL_S(VSUB(x0, x5), 0.60134488f); + x[5] = VMUL_S(VADD(x0, x5), 0.89997619f); + x[6] = VMUL_S(VSUB(x4, x3), 1.30656302f); + x[7] = VMUL_S(VSUB(xt, x7), 2.56291556f); + } + + if (k > n - 3) + { +#if HAVE_SSE +#define VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v) +#else /* HAVE_SSE */ +#define VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18], vget_low_f32(v)) +#endif /* HAVE_SSE */ + for (i = 0; i < 7; i++, y += 4*18) + { + f4 s = VADD(t[3][i], t[3][i + 1]); + VSAVE2(0, t[0][i]); + VSAVE2(1, VADD(t[2][i], s)); + VSAVE2(2, VADD(t[1][i], t[1][i + 1])); + VSAVE2(3, VADD(t[2][1 + i], s)); + } + VSAVE2(0, t[0][7]); + VSAVE2(1, VADD(t[2][7], t[3][7])); + VSAVE2(2, t[1][7]); + VSAVE2(3, t[3][7]); + } else + { +#define VSAVE4(i, v) VSTORE(&y[i*18], v) + for (i = 0; i < 7; i++, y += 4*18) + { + f4 s = VADD(t[3][i], t[3][i + 1]); + VSAVE4(0, t[0][i]); + VSAVE4(1, VADD(t[2][i], s)); + VSAVE4(2, VADD(t[1][i], t[1][i + 1])); + VSAVE4(3, VADD(t[2][1 + i], s)); + } + VSAVE4(0, t[0][7]); + VSAVE4(1, VADD(t[2][7], t[3][7])); + VSAVE4(2, t[1][7]); + VSAVE4(3, t[3][7]); + } + } else +#endif /* HAVE_SIMD */ +#ifdef MINIMP3_ONLY_SIMD + {} +#else /* MINIMP3_ONLY_SIMD */ + for (; k < n; k++) + { + float t[4][8], *x, *y = grbuf + k; + + for (x = t[0], i = 0; i < 8; i++, x++) + { + float x0 = y[i*18]; + float x1 = y[(15 - i)*18]; + float x2 = y[(16 + i)*18]; + float x3 = y[(31 - i)*18]; + float t0 = x0 + x3; + float t1 = x1 + x2; + float t2 = (x1 - x2)*g_sec[3*i + 0]; + float t3 = (x0 - x3)*g_sec[3*i + 1]; + x[0] = t0 + t1; + x[8] = (t0 - t1)*g_sec[3*i + 2]; + x[16] = t3 + t2; + x[24] = (t3 - t2)*g_sec[3*i + 2]; + } + for (x = t[0], i = 0; i < 4; i++, x += 8) + { + float x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7], xt; + xt = x0 - x7; x0 += x7; + x7 = x1 - x6; x1 += x6; + x6 = x2 - x5; x2 += x5; + x5 = x3 - x4; x3 += x4; + x4 = x0 - x3; x0 += x3; + x3 = x1 - x2; x1 += x2; + x[0] = x0 + x1; + x[4] = (x0 - x1)*0.70710677f; + x5 = x5 + x6; + x6 = (x6 + x7)*0.70710677f; + x7 = x7 + xt; + x3 = (x3 + x4)*0.70710677f; + x5 -= x7*0.198912367f; /* rotate by PI/8 */ + x7 += x5*0.382683432f; + x5 -= x7*0.198912367f; + x0 = xt - x6; xt += x6; + x[1] = (xt + x7)*0.50979561f; + x[2] = (x4 + x3)*0.54119611f; + x[3] = (x0 - x5)*0.60134488f; + x[5] = (x0 + x5)*0.89997619f; + x[6] = (x4 - x3)*1.30656302f; + x[7] = (xt - x7)*2.56291556f; + + } + for (i = 0; i < 7; i++, y += 4*18) + { + y[0*18] = t[0][i]; + y[1*18] = t[2][i] + t[3][i] + t[3][i + 1]; + y[2*18] = t[1][i] + t[1][i + 1]; + y[3*18] = t[2][i + 1] + t[3][i] + t[3][i + 1]; + } + y[0*18] = t[0][7]; + y[1*18] = t[2][7] + t[3][7]; + y[2*18] = t[1][7]; + y[3*18] = t[3][7]; + } +#endif /* MINIMP3_ONLY_SIMD */ +} + +#ifndef MINIMP3_FLOAT_OUTPUT +static int16_t mp3d_scale_pcm(float sample) +{ +#if HAVE_ARMV6 + int32_t s32 = (int32_t)(sample + .5f); + s32 -= (s32 < 0); + int16_t s = (int16_t)minimp3_clip_int16_arm(s32); +#else + if (sample >= 32766.5) return (int16_t) 32767; + if (sample <= -32767.5) return (int16_t)-32768; + int16_t s = (int16_t)(sample + .5f); + s -= (s < 0); /* away from zero, to be compliant */ +#endif + return s; +} +#else /* MINIMP3_FLOAT_OUTPUT */ +static float mp3d_scale_pcm(float sample) +{ + return sample*(1.f/32768.f); +} +#endif /* MINIMP3_FLOAT_OUTPUT */ + +static void mp3d_synth_pair(mp3d_sample_t *pcm, int nch, const float *z) +{ + float a; + a = (z[14*64] - z[ 0]) * 29; + a += (z[ 1*64] + z[13*64]) * 213; + a += (z[12*64] - z[ 2*64]) * 459; + a += (z[ 3*64] + z[11*64]) * 2037; + a += (z[10*64] - z[ 4*64]) * 5153; + a += (z[ 5*64] + z[ 9*64]) * 6574; + a += (z[ 8*64] - z[ 6*64]) * 37489; + a += z[ 7*64] * 75038; + pcm[0] = mp3d_scale_pcm(a); + + z += 2; + a = z[14*64] * 104; + a += z[12*64] * 1567; + a += z[10*64] * 9727; + a += z[ 8*64] * 64019; + a += z[ 6*64] * -9975; + a += z[ 4*64] * -45; + a += z[ 2*64] * 146; + a += z[ 0*64] * -5; + pcm[16*nch] = mp3d_scale_pcm(a); +} + +static void mp3d_synth(float *xl, mp3d_sample_t *dstl, int nch, float *lins) +{ + int i; + float *xr = xl + 576*(nch - 1); + mp3d_sample_t *dstr = dstl + (nch - 1); + + static const float g_win[] = { + -1,26,-31,208,218,401,-519,2063,2000,4788,-5517,7134,5959,35640,-39336,74992, + -1,24,-35,202,222,347,-581,2080,1952,4425,-5879,7640,5288,33791,-41176,74856, + -1,21,-38,196,225,294,-645,2087,1893,4063,-6237,8092,4561,31947,-43006,74630, + -1,19,-41,190,227,244,-711,2085,1822,3705,-6589,8492,3776,30112,-44821,74313, + -1,17,-45,183,228,197,-779,2075,1739,3351,-6935,8840,2935,28289,-46617,73908, + -1,16,-49,176,228,153,-848,2057,1644,3004,-7271,9139,2037,26482,-48390,73415, + -2,14,-53,169,227,111,-919,2032,1535,2663,-7597,9389,1082,24694,-50137,72835, + -2,13,-58,161,224,72,-991,2001,1414,2330,-7910,9592,70,22929,-51853,72169, + -2,11,-63,154,221,36,-1064,1962,1280,2006,-8209,9750,-998,21189,-53534,71420, + -2,10,-68,147,215,2,-1137,1919,1131,1692,-8491,9863,-2122,19478,-55178,70590, + -3,9,-73,139,208,-29,-1210,1870,970,1388,-8755,9935,-3300,17799,-56778,69679, + -3,8,-79,132,200,-57,-1283,1817,794,1095,-8998,9966,-4533,16155,-58333,68692, + -4,7,-85,125,189,-83,-1356,1759,605,814,-9219,9959,-5818,14548,-59838,67629, + -4,7,-91,117,177,-106,-1428,1698,402,545,-9416,9916,-7154,12980,-61289,66494, + -5,6,-97,111,163,-127,-1498,1634,185,288,-9585,9838,-8540,11455,-62684,65290 + }; + float *zlin = lins + 15*64; + const float *w = g_win; + + zlin[4*15] = xl[18*16]; + zlin[4*15 + 1] = xr[18*16]; + zlin[4*15 + 2] = xl[0]; + zlin[4*15 + 3] = xr[0]; + + zlin[4*31] = xl[1 + 18*16]; + zlin[4*31 + 1] = xr[1 + 18*16]; + zlin[4*31 + 2] = xl[1]; + zlin[4*31 + 3] = xr[1]; + + mp3d_synth_pair(dstr, nch, lins + 4*15 + 1); + mp3d_synth_pair(dstr + 32*nch, nch, lins + 4*15 + 64 + 1); + mp3d_synth_pair(dstl, nch, lins + 4*15); + mp3d_synth_pair(dstl + 32*nch, nch, lins + 4*15 + 64); + +#if HAVE_SIMD + if (have_simd()) for (i = 14; i >= 0; i--) + { +#define VLOAD(k) f4 w0 = VSET(*w++); f4 w1 = VSET(*w++); f4 vz = VLD(&zlin[4*i - 64*k]); f4 vy = VLD(&zlin[4*i - 64*(15 - k)]); +#define V0(k) { VLOAD(k) b = VADD(VMUL(vz, w1), VMUL(vy, w0)) ; a = VSUB(VMUL(vz, w0), VMUL(vy, w1)); } +#define V1(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vz, w0), VMUL(vy, w1))); } +#define V2(k) { VLOAD(k) b = VADD(b, VADD(VMUL(vz, w1), VMUL(vy, w0))); a = VADD(a, VSUB(VMUL(vy, w1), VMUL(vz, w0))); } + f4 a, b; + zlin[4*i] = xl[18*(31 - i)]; + zlin[4*i + 1] = xr[18*(31 - i)]; + zlin[4*i + 2] = xl[1 + 18*(31 - i)]; + zlin[4*i + 3] = xr[1 + 18*(31 - i)]; + zlin[4*i + 64] = xl[1 + 18*(1 + i)]; + zlin[4*i + 64 + 1] = xr[1 + 18*(1 + i)]; + zlin[4*i - 64 + 2] = xl[18*(1 + i)]; + zlin[4*i - 64 + 3] = xr[18*(1 + i)]; + + V0(0) V2(1) V1(2) V2(3) V1(4) V2(5) V1(6) V2(7) + + { +#ifndef MINIMP3_FLOAT_OUTPUT +#if HAVE_SSE + static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f }; + static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f }; + __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)), + _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min))); + dstr[(15 - i)*nch] = _mm_extract_epi16(pcm8, 1); + dstr[(17 + i)*nch] = _mm_extract_epi16(pcm8, 5); + dstl[(15 - i)*nch] = _mm_extract_epi16(pcm8, 0); + dstl[(17 + i)*nch] = _mm_extract_epi16(pcm8, 4); + dstr[(47 - i)*nch] = _mm_extract_epi16(pcm8, 3); + dstr[(49 + i)*nch] = _mm_extract_epi16(pcm8, 7); + dstl[(47 - i)*nch] = _mm_extract_epi16(pcm8, 2); + dstl[(49 + i)*nch] = _mm_extract_epi16(pcm8, 6); +#else /* HAVE_SSE */ + int16x4_t pcma, pcmb; + a = VADD(a, VSET(0.5f)); + b = VADD(b, VSET(0.5f)); + pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0))))); + pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0))))); + vst1_lane_s16(dstr + (15 - i)*nch, pcma, 1); + vst1_lane_s16(dstr + (17 + i)*nch, pcmb, 1); + vst1_lane_s16(dstl + (15 - i)*nch, pcma, 0); + vst1_lane_s16(dstl + (17 + i)*nch, pcmb, 0); + vst1_lane_s16(dstr + (47 - i)*nch, pcma, 3); + vst1_lane_s16(dstr + (49 + i)*nch, pcmb, 3); + vst1_lane_s16(dstl + (47 - i)*nch, pcma, 2); + vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2); +#endif /* HAVE_SSE */ + +#else /* MINIMP3_FLOAT_OUTPUT */ + + static const f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f }; + a = VMUL(a, g_scale); + b = VMUL(b, g_scale); +#if HAVE_SSE + _mm_store_ss(dstr + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dstr + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dstl + (15 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(0, 0, 0, 0))); + _mm_store_ss(dstl + (17 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(0, 0, 0, 0))); + _mm_store_ss(dstr + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(3, 3, 3, 3))); + _mm_store_ss(dstr + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 3, 3))); + _mm_store_ss(dstl + (47 - i)*nch, _mm_shuffle_ps(a, a, _MM_SHUFFLE(2, 2, 2, 2))); + _mm_store_ss(dstl + (49 + i)*nch, _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 2, 2))); +#else /* HAVE_SSE */ + vst1q_lane_f32(dstr + (15 - i)*nch, a, 1); + vst1q_lane_f32(dstr + (17 + i)*nch, b, 1); + vst1q_lane_f32(dstl + (15 - i)*nch, a, 0); + vst1q_lane_f32(dstl + (17 + i)*nch, b, 0); + vst1q_lane_f32(dstr + (47 - i)*nch, a, 3); + vst1q_lane_f32(dstr + (49 + i)*nch, b, 3); + vst1q_lane_f32(dstl + (47 - i)*nch, a, 2); + vst1q_lane_f32(dstl + (49 + i)*nch, b, 2); +#endif /* HAVE_SSE */ +#endif /* MINIMP3_FLOAT_OUTPUT */ + } + } else +#endif /* HAVE_SIMD */ +#ifdef MINIMP3_ONLY_SIMD + {} +#else /* MINIMP3_ONLY_SIMD */ + for (i = 14; i >= 0; i--) + { +#define LOAD(k) float w0 = *w++; float w1 = *w++; float *vz = &zlin[4*i - k*64]; float *vy = &zlin[4*i - (15 - k)*64]; +#define S0(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] = vz[j]*w1 + vy[j]*w0, a[j] = vz[j]*w0 - vy[j]*w1; } +#define S1(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vz[j]*w0 - vy[j]*w1; } +#define S2(k) { int j; LOAD(k); for (j = 0; j < 4; j++) b[j] += vz[j]*w1 + vy[j]*w0, a[j] += vy[j]*w1 - vz[j]*w0; } + float a[4], b[4]; + + zlin[4*i] = xl[18*(31 - i)]; + zlin[4*i + 1] = xr[18*(31 - i)]; + zlin[4*i + 2] = xl[1 + 18*(31 - i)]; + zlin[4*i + 3] = xr[1 + 18*(31 - i)]; + zlin[4*(i + 16)] = xl[1 + 18*(1 + i)]; + zlin[4*(i + 16) + 1] = xr[1 + 18*(1 + i)]; + zlin[4*(i - 16) + 2] = xl[18*(1 + i)]; + zlin[4*(i - 16) + 3] = xr[18*(1 + i)]; + + S0(0) S2(1) S1(2) S2(3) S1(4) S2(5) S1(6) S2(7) + + dstr[(15 - i)*nch] = mp3d_scale_pcm(a[1]); + dstr[(17 + i)*nch] = mp3d_scale_pcm(b[1]); + dstl[(15 - i)*nch] = mp3d_scale_pcm(a[0]); + dstl[(17 + i)*nch] = mp3d_scale_pcm(b[0]); + dstr[(47 - i)*nch] = mp3d_scale_pcm(a[3]); + dstr[(49 + i)*nch] = mp3d_scale_pcm(b[3]); + dstl[(47 - i)*nch] = mp3d_scale_pcm(a[2]); + dstl[(49 + i)*nch] = mp3d_scale_pcm(b[2]); + } +#endif /* MINIMP3_ONLY_SIMD */ +} + +static void mp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int nch, mp3d_sample_t *pcm, float *lins) +{ + int i; + for (i = 0; i < nch; i++) + { + mp3d_DCT_II(grbuf + 576*i, nbands); + } + + memcpy(lins, qmf_state, sizeof(float)*15*64); + + for (i = 0; i < nbands; i += 2) + { + mp3d_synth(grbuf + i, pcm + 32*nch*i, nch, lins + i*64); + } +#ifndef MINIMP3_NONSTANDARD_BUT_LOGICAL + if (nch == 1) + { + for (i = 0; i < 15*64; i += 2) + { + qmf_state[i] = lins[nbands*64 + i]; + } + } else +#endif /* MINIMP3_NONSTANDARD_BUT_LOGICAL */ + { + memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64); + } +} + +static int mp3d_match_frame(const uint8_t *hdr, int mp3_bytes, int frame_bytes) +{ + int i, nmatch; + for (i = 0, nmatch = 0; nmatch < MAX_FRAME_SYNC_MATCHES; nmatch++) + { + i += hdr_frame_bytes(hdr + i, frame_bytes) + hdr_padding(hdr + i); + if (i + HDR_SIZE > mp3_bytes) + return nmatch > 0; + if (!hdr_compare(hdr, hdr + i)) + return 0; + } + return 1; +} + +static int mp3d_find_frame(const uint8_t *mp3, int mp3_bytes, int *free_format_bytes, int *ptr_frame_bytes) +{ + int i, k; + for (i = 0; i < mp3_bytes - HDR_SIZE; i++, mp3++) + { + if (hdr_valid(mp3)) + { + int frame_bytes = hdr_frame_bytes(mp3, *free_format_bytes); + int frame_and_padding = frame_bytes + hdr_padding(mp3); + + for (k = HDR_SIZE; !frame_bytes && k < MAX_FREE_FORMAT_FRAME_SIZE && i + 2*k < mp3_bytes - HDR_SIZE; k++) + { + if (hdr_compare(mp3, mp3 + k)) + { + int fb = k - hdr_padding(mp3); + int nextfb = fb + hdr_padding(mp3 + k); + if (i + k + nextfb + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + k + nextfb)) + continue; + frame_and_padding = k; + frame_bytes = fb; + *free_format_bytes = fb; + } + } + if ((frame_bytes && i + frame_and_padding <= mp3_bytes && + mp3d_match_frame(mp3, mp3_bytes - i, frame_bytes)) || + (!i && frame_and_padding == mp3_bytes)) + { + *ptr_frame_bytes = frame_and_padding; + return i; + } + *free_format_bytes = 0; + } + } + *ptr_frame_bytes = 0; + return mp3_bytes; +} + +void mp3dec_init(mp3dec_t *dec) +{ + dec->header[0] = 0; +} + +int mp3dec_decode_frame(mp3dec_t *dec, const uint8_t *mp3, int mp3_bytes, mp3d_sample_t *pcm, mp3dec_frame_info_t *info) +{ + int i = 0, igr, frame_size = 0, success = 1; + const uint8_t *hdr; + bs_t bs_frame[1]; + mp3dec_scratch_t scratch; + + if (mp3_bytes > 4 && dec->header[0] == 0xff && hdr_compare(dec->header, mp3)) + { + frame_size = hdr_frame_bytes(mp3, dec->free_format_bytes) + hdr_padding(mp3); + if (frame_size != mp3_bytes && (frame_size + HDR_SIZE > mp3_bytes || !hdr_compare(mp3, mp3 + frame_size))) + { + frame_size = 0; + } + } + if (!frame_size) + { + memset(dec, 0, sizeof(mp3dec_t)); + i = mp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size); + if (!frame_size || i + frame_size > mp3_bytes) + { + info->frame_bytes = i; + return 0; + } + } + + hdr = mp3 + i; + memcpy(dec->header, hdr, HDR_SIZE); + info->frame_bytes = i + frame_size; + info->frame_offset = i; + info->channels = HDR_IS_MONO(hdr) ? 1 : 2; + info->hz = hdr_sample_rate_hz(hdr); + info->layer = 4 - HDR_GET_LAYER(hdr); + info->bitrate_kbps = hdr_bitrate_kbps(hdr); + + if (!pcm) + { + return hdr_frame_samples(hdr); + } + + bs_init(bs_frame, hdr + HDR_SIZE, frame_size - HDR_SIZE); + if (HDR_IS_CRC(hdr)) + { + get_bits(bs_frame, 16); + } + + if (info->layer == 3) + { + int main_data_begin = L3_read_side_info(bs_frame, scratch.gr_info, hdr); + if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit) + { + mp3dec_init(dec); + return 0; + } + success = L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin); + if (success) + { + for (igr = 0; igr < (HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm += 576*info->channels) + { + memset(scratch.grbuf[0], 0, 576*2*sizeof(float)); + L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels); + mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, pcm, scratch.syn[0]); + } + } + L3_save_reservoir(dec, &scratch); + } else + { +#ifdef MINIMP3_ONLY_MP3 + return 0; +#else /* MINIMP3_ONLY_MP3 */ + L12_scale_info sci[1]; + L12_read_scale_info(hdr, bs_frame, sci); + + memset(scratch.grbuf[0], 0, 576*2*sizeof(float)); + for (i = 0, igr = 0; igr < 3; igr++) + { + if (12 == (i += L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1))) + { + i = 0; + L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]); + mp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, pcm, scratch.syn[0]); + memset(scratch.grbuf[0], 0, 576*2*sizeof(float)); + pcm += 384*info->channels; + } + if (bs_frame->pos > bs_frame->limit) + { + mp3dec_init(dec); + return 0; + } + } +#endif /* MINIMP3_ONLY_MP3 */ + } + return success*hdr_frame_samples(dec->header); +} + +#ifdef MINIMP3_FLOAT_OUTPUT +void mp3dec_f32_to_s16(const float *in, int16_t *out, int num_samples) +{ + int i = 0; +#if HAVE_SIMD + int aligned_count = num_samples & ~7; + for(; i < aligned_count; i += 8) + { + static const f4 g_scale = { 32768.0f, 32768.0f, 32768.0f, 32768.0f }; + f4 a = VMUL(VLD(&in[i ]), g_scale); + f4 b = VMUL(VLD(&in[i+4]), g_scale); +#if HAVE_SSE + static const f4 g_max = { 32767.0f, 32767.0f, 32767.0f, 32767.0f }; + static const f4 g_min = { -32768.0f, -32768.0f, -32768.0f, -32768.0f }; + __m128i pcm8 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(a, g_max), g_min)), + _mm_cvtps_epi32(_mm_max_ps(_mm_min_ps(b, g_max), g_min))); + out[i ] = _mm_extract_epi16(pcm8, 0); + out[i+1] = _mm_extract_epi16(pcm8, 1); + out[i+2] = _mm_extract_epi16(pcm8, 2); + out[i+3] = _mm_extract_epi16(pcm8, 3); + out[i+4] = _mm_extract_epi16(pcm8, 4); + out[i+5] = _mm_extract_epi16(pcm8, 5); + out[i+6] = _mm_extract_epi16(pcm8, 6); + out[i+7] = _mm_extract_epi16(pcm8, 7); +#else /* HAVE_SSE */ + int16x4_t pcma, pcmb; + a = VADD(a, VSET(0.5f)); + b = VADD(b, VSET(0.5f)); + pcma = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(a), vreinterpretq_s32_u32(vcltq_f32(a, VSET(0))))); + pcmb = vqmovn_s32(vqaddq_s32(vcvtq_s32_f32(b), vreinterpretq_s32_u32(vcltq_f32(b, VSET(0))))); + vst1_lane_s16(out+i , pcma, 0); + vst1_lane_s16(out+i+1, pcma, 1); + vst1_lane_s16(out+i+2, pcma, 2); + vst1_lane_s16(out+i+3, pcma, 3); + vst1_lane_s16(out+i+4, pcmb, 0); + vst1_lane_s16(out+i+5, pcmb, 1); + vst1_lane_s16(out+i+6, pcmb, 2); + vst1_lane_s16(out+i+7, pcmb, 3); +#endif /* HAVE_SSE */ + } +#endif /* HAVE_SIMD */ + for(; i < num_samples; i++) + { + float sample = in[i] * 32768.0f; + if (sample >= 32766.5) + out[i] = (int16_t) 32767; + else if (sample <= -32767.5) + out[i] = (int16_t)-32768; + else + { + int16_t s = (int16_t)(sample + .5f); + s -= (s < 0); /* away from zero, to be compliant */ + out[i] = s; + } + } +} +#endif /* MINIMP3_FLOAT_OUTPUT */ +#endif /* MINIMP3_IMPLEMENTATION && !_MINIMP3_IMPLEMENTATION_GUARD */ diff --git a/thirdparty/minimp3/minimp3_ex.h b/thirdparty/minimp3/minimp3_ex.h new file mode 100644 index 0000000000..e29dd15b2e --- /dev/null +++ b/thirdparty/minimp3/minimp3_ex.h @@ -0,0 +1,1394 @@ +#ifndef MINIMP3_EXT_H +#define MINIMP3_EXT_H +/* + https://github.com/lieff/minimp3 + To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. + This software is distributed without any warranty. + See <http://creativecommons.org/publicdomain/zero/1.0/>. +*/ +#include "minimp3.h" + +/* flags for mp3dec_ex_open_* functions */ +#define MP3D_SEEK_TO_BYTE 0 /* mp3dec_ex_seek seeks to byte in stream */ +#define MP3D_SEEK_TO_SAMPLE 1 /* mp3dec_ex_seek precisely seeks to sample using index (created during duration calculation scan or when mp3dec_ex_seek called) */ +#define MP3D_DO_NOT_SCAN 2 /* do not scan whole stream for duration if vbrtag not found, mp3dec_ex_t::samples will be filled only if mp3dec_ex_t::vbr_tag_found == 1 */ +#ifdef MINIMP3_ALLOW_MONO_STEREO_TRANSITION +#define MP3D_ALLOW_MONO_STEREO_TRANSITION 4 +#define MP3D_FLAGS_MASK 7 +#else +#define MP3D_FLAGS_MASK 3 +#endif + +/* compile-time config */ +#define MINIMP3_PREDECODE_FRAMES 2 /* frames to pre-decode and skip after seek (to fill internal structures) */ +/*#define MINIMP3_SEEK_IDX_LINEAR_SEARCH*/ /* define to use linear index search instead of binary search on seek */ +#define MINIMP3_IO_SIZE (128*1024) /* io buffer size for streaming functions, must be greater than MINIMP3_BUF_SIZE */ +#define MINIMP3_BUF_SIZE (16*1024) /* buffer which can hold minimum 10 consecutive mp3 frames (~16KB) worst case */ +/*#define MINIMP3_SCAN_LIMIT (256*1024)*/ /* how many bytes will be scanned to search first valid mp3 frame, to prevent stall on large non-mp3 files */ +#define MINIMP3_ENABLE_RING 0 /* WIP enable hardware magic ring buffer if available, to make less input buffer memmove(s) in callback IO mode */ + +/* return error codes */ +#define MP3D_E_PARAM -1 +#define MP3D_E_MEMORY -2 +#define MP3D_E_IOERROR -3 +#define MP3D_E_USER -4 /* can be used to stop processing from callbacks without indicating specific error */ +#define MP3D_E_DECODE -5 /* decode error which can't be safely skipped, such as sample rate, layer and channels change */ + +typedef struct +{ + mp3d_sample_t *buffer; + size_t samples; /* channels included, byte size = samples*sizeof(mp3d_sample_t) */ + int channels, hz, layer, avg_bitrate_kbps; +} mp3dec_file_info_t; + +typedef struct +{ + const uint8_t *buffer; + size_t size; +} mp3dec_map_info_t; + +typedef struct +{ + uint64_t sample; + uint64_t offset; +} mp3dec_frame_t; + +typedef struct +{ + mp3dec_frame_t *frames; + size_t num_frames, capacity; +} mp3dec_index_t; + +typedef size_t (*MP3D_READ_CB)(void *buf, size_t size, void *user_data); +typedef int (*MP3D_SEEK_CB)(uint64_t position, void *user_data); + +typedef struct +{ + MP3D_READ_CB read; + void *read_data; + MP3D_SEEK_CB seek; + void *seek_data; +} mp3dec_io_t; + +typedef struct +{ + mp3dec_t mp3d; + mp3dec_map_info_t file; + mp3dec_io_t *io; + mp3dec_index_t index; + uint64_t offset, samples, detected_samples, cur_sample, start_offset, end_offset; + mp3dec_frame_info_t info; + mp3d_sample_t buffer[MINIMP3_MAX_SAMPLES_PER_FRAME]; + size_t input_consumed, input_filled; + int is_file, flags, vbr_tag_found, indexes_built; + int free_format_bytes; + int buffer_samples, buffer_consumed, to_skip, start_delay; + int last_error; +} mp3dec_ex_t; + +typedef int (*MP3D_ITERATE_CB)(void *user_data, const uint8_t *frame, int frame_size, int free_format_bytes, size_t buf_size, uint64_t offset, mp3dec_frame_info_t *info); +typedef int (*MP3D_PROGRESS_CB)(void *user_data, size_t file_size, uint64_t offset, mp3dec_frame_info_t *info); + +#ifdef __cplusplus +extern "C" { +#endif + +/* detect mp3/mpa format */ +int mp3dec_detect_buf(const uint8_t *buf, size_t buf_size); +int mp3dec_detect_cb(mp3dec_io_t *io, uint8_t *buf, size_t buf_size); +/* decode whole buffer block */ +int mp3dec_load_buf(mp3dec_t *dec, const uint8_t *buf, size_t buf_size, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data); +int mp3dec_load_cb(mp3dec_t *dec, mp3dec_io_t *io, uint8_t *buf, size_t buf_size, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data); +/* iterate through frames */ +int mp3dec_iterate_buf(const uint8_t *buf, size_t buf_size, MP3D_ITERATE_CB callback, void *user_data); +int mp3dec_iterate_cb(mp3dec_io_t *io, uint8_t *buf, size_t buf_size, MP3D_ITERATE_CB callback, void *user_data); +/* streaming decoder with seeking capability */ +int mp3dec_ex_open_buf(mp3dec_ex_t *dec, const uint8_t *buf, size_t buf_size, int flags); +int mp3dec_ex_open_cb(mp3dec_ex_t *dec, mp3dec_io_t *io, int flags); +void mp3dec_ex_close(mp3dec_ex_t *dec); +int mp3dec_ex_seek(mp3dec_ex_t *dec, uint64_t position); +size_t mp3dec_ex_read_frame(mp3dec_ex_t *dec, mp3d_sample_t **buf, mp3dec_frame_info_t *frame_info, size_t max_samples); +size_t mp3dec_ex_read(mp3dec_ex_t *dec, mp3d_sample_t *buf, size_t samples); +#ifndef MINIMP3_NO_STDIO +/* stdio versions of file detect, load, iterate and stream */ +int mp3dec_detect(const char *file_name); +int mp3dec_load(mp3dec_t *dec, const char *file_name, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data); +int mp3dec_iterate(const char *file_name, MP3D_ITERATE_CB callback, void *user_data); +int mp3dec_ex_open(mp3dec_ex_t *dec, const char *file_name, int flags); +#ifdef _WIN32 +int mp3dec_detect_w(const wchar_t *file_name); +int mp3dec_load_w(mp3dec_t *dec, const wchar_t *file_name, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data); +int mp3dec_iterate_w(const wchar_t *file_name, MP3D_ITERATE_CB callback, void *user_data); +int mp3dec_ex_open_w(mp3dec_ex_t *dec, const wchar_t *file_name, int flags); +#endif +#endif + +#ifdef __cplusplus +} +#endif +#endif /*MINIMP3_EXT_H*/ + +#ifdef MINIMP3_IMPLEMENTATION +#include <limits.h> + +static void mp3dec_skip_id3v1(const uint8_t *buf, size_t *pbuf_size) +{ + size_t buf_size = *pbuf_size; +#ifndef MINIMP3_NOSKIP_ID3V1 + if (buf_size >= 128 && !memcmp(buf + buf_size - 128, "TAG", 3)) + { + buf_size -= 128; + if (buf_size >= 227 && !memcmp(buf + buf_size - 227, "TAG+", 4)) + buf_size -= 227; + } +#endif +#ifndef MINIMP3_NOSKIP_APEV2 + if (buf_size > 32 && !memcmp(buf + buf_size - 32, "APETAGEX", 8)) + { + buf_size -= 32; + const uint8_t *tag = buf + buf_size + 8 + 4; + uint32_t tag_size = (uint32_t)(tag[3] << 24) | (tag[2] << 16) | (tag[1] << 8) | tag[0]; + if (buf_size >= tag_size) + buf_size -= tag_size; + } +#endif + *pbuf_size = buf_size; +} + +static size_t mp3dec_skip_id3v2(const uint8_t *buf, size_t buf_size) +{ +#define MINIMP3_ID3_DETECT_SIZE 10 +#ifndef MINIMP3_NOSKIP_ID3V2 + if (buf_size >= MINIMP3_ID3_DETECT_SIZE && !memcmp(buf, "ID3", 3) && !((buf[5] & 15) || (buf[6] & 0x80) || (buf[7] & 0x80) || (buf[8] & 0x80) || (buf[9] & 0x80))) + { + size_t id3v2size = (((buf[6] & 0x7f) << 21) | ((buf[7] & 0x7f) << 14) | ((buf[8] & 0x7f) << 7) | (buf[9] & 0x7f)) + 10; + if ((buf[5] & 16)) + id3v2size += 10; /* footer */ + return id3v2size; + } +#endif + return 0; +} + +static void mp3dec_skip_id3(const uint8_t **pbuf, size_t *pbuf_size) +{ + uint8_t *buf = (uint8_t *)(*pbuf); + size_t buf_size = *pbuf_size; + size_t id3v2size = mp3dec_skip_id3v2(buf, buf_size); + if (id3v2size) + { + if (id3v2size >= buf_size) + id3v2size = buf_size; + buf += id3v2size; + buf_size -= id3v2size; + } + mp3dec_skip_id3v1(buf, &buf_size); + *pbuf = (const uint8_t *)buf; + *pbuf_size = buf_size; +} + +static int mp3dec_check_vbrtag(const uint8_t *frame, int frame_size, uint32_t *frames, int *delay, int *padding) +{ + static const char g_xing_tag[4] = { 'X', 'i', 'n', 'g' }; + static const char g_info_tag[4] = { 'I', 'n', 'f', 'o' }; +#define FRAMES_FLAG 1 +#define BYTES_FLAG 2 +#define TOC_FLAG 4 +#define VBR_SCALE_FLAG 8 + /* Side info offsets after header: + / Mono Stereo + / MPEG1 17 32 + / MPEG2 & 2.5 9 17*/ + bs_t bs[1]; + L3_gr_info_t gr_info[4]; + bs_init(bs, frame + HDR_SIZE, frame_size - HDR_SIZE); + if (HDR_IS_CRC(frame)) + get_bits(bs, 16); + if (L3_read_side_info(bs, gr_info, frame) < 0) + return 0; /* side info corrupted */ + + const uint8_t *tag = frame + HDR_SIZE + bs->pos/8; + if (memcmp(g_xing_tag, tag, 4) && memcmp(g_info_tag, tag, 4)) + return 0; + int flags = tag[7]; + if (!((flags & FRAMES_FLAG))) + return -1; + tag += 8; + *frames = (uint32_t)(tag[0] << 24) | (tag[1] << 16) | (tag[2] << 8) | tag[3]; + tag += 4; + if (flags & BYTES_FLAG) + tag += 4; + if (flags & TOC_FLAG) + tag += 100; + if (flags & VBR_SCALE_FLAG) + tag += 4; + *delay = *padding = 0; + if (*tag) + { /* extension, LAME, Lavc, etc. Should be the same structure. */ + tag += 21; + if (tag - frame + 14 >= frame_size) + return 0; + *delay = ((tag[0] << 4) | (tag[1] >> 4)) + (528 + 1); + *padding = (((tag[1] & 0xF) << 8) | tag[2]) - (528 + 1); + } + return 1; +} + +int mp3dec_detect_buf(const uint8_t *buf, size_t buf_size) +{ + return mp3dec_detect_cb(0, (uint8_t *)buf, buf_size); +} + +int mp3dec_detect_cb(mp3dec_io_t *io, uint8_t *buf, size_t buf_size) +{ + if (!buf || (size_t)-1 == buf_size || (io && buf_size < MINIMP3_BUF_SIZE)) + return MP3D_E_PARAM; + size_t filled = buf_size; + if (io) + { + if (io->seek(0, io->seek_data)) + return MP3D_E_IOERROR; + filled = io->read(buf, MINIMP3_ID3_DETECT_SIZE, io->read_data); + if (filled > MINIMP3_ID3_DETECT_SIZE) + return MP3D_E_IOERROR; + } + if (filled < MINIMP3_ID3_DETECT_SIZE) + return MP3D_E_USER; /* too small, can't be mp3/mpa */ + if (mp3dec_skip_id3v2(buf, filled)) + return 0; /* id3v2 tag is enough evidence */ + if (io) + { + size_t readed = io->read(buf + MINIMP3_ID3_DETECT_SIZE, buf_size - MINIMP3_ID3_DETECT_SIZE, io->read_data); + if (readed > (buf_size - MINIMP3_ID3_DETECT_SIZE)) + return MP3D_E_IOERROR; + filled += readed; + if (filled < MINIMP3_BUF_SIZE) + mp3dec_skip_id3v1(buf, &filled); + } else + { + mp3dec_skip_id3v1(buf, &filled); + if (filled > MINIMP3_BUF_SIZE) + filled = MINIMP3_BUF_SIZE; + } + int free_format_bytes, frame_size; + mp3d_find_frame(buf, filled, &free_format_bytes, &frame_size); + if (frame_size) + return 0; /* MAX_FRAME_SYNC_MATCHES consecutive frames found */ + return MP3D_E_USER; +} + +int mp3dec_load_buf(mp3dec_t *dec, const uint8_t *buf, size_t buf_size, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data) +{ + return mp3dec_load_cb(dec, 0, (uint8_t *)buf, buf_size, info, progress_cb, user_data); +} + +int mp3dec_load_cb(mp3dec_t *dec, mp3dec_io_t *io, uint8_t *buf, size_t buf_size, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data) +{ + if (!dec || !buf || !info || (size_t)-1 == buf_size || (io && buf_size < MINIMP3_BUF_SIZE)) + return MP3D_E_PARAM; + uint64_t detected_samples = 0; + size_t orig_buf_size = buf_size; + int to_skip = 0; + mp3dec_frame_info_t frame_info; + memset(info, 0, sizeof(*info)); + memset(&frame_info, 0, sizeof(frame_info)); + + /* skip id3 */ + size_t filled = 0, consumed = 0; + int eof = 0, ret = 0; + if (io) + { + if (io->seek(0, io->seek_data)) + return MP3D_E_IOERROR; + filled = io->read(buf, MINIMP3_ID3_DETECT_SIZE, io->read_data); + if (filled > MINIMP3_ID3_DETECT_SIZE) + return MP3D_E_IOERROR; + if (MINIMP3_ID3_DETECT_SIZE != filled) + return 0; + size_t id3v2size = mp3dec_skip_id3v2(buf, filled); + if (id3v2size) + { + if (io->seek(id3v2size, io->seek_data)) + return MP3D_E_IOERROR; + filled = io->read(buf, buf_size, io->read_data); + if (filled > buf_size) + return MP3D_E_IOERROR; + } else + { + size_t readed = io->read(buf + MINIMP3_ID3_DETECT_SIZE, buf_size - MINIMP3_ID3_DETECT_SIZE, io->read_data); + if (readed > (buf_size - MINIMP3_ID3_DETECT_SIZE)) + return MP3D_E_IOERROR; + filled += readed; + } + if (filled < MINIMP3_BUF_SIZE) + mp3dec_skip_id3v1(buf, &filled); + } else + { + mp3dec_skip_id3((const uint8_t **)&buf, &buf_size); + if (!buf_size) + return 0; + } + /* try to make allocation size assumption by first frame or vbr tag */ + mp3dec_init(dec); + int samples; + do + { + uint32_t frames; + int i, delay, padding, free_format_bytes = 0, frame_size = 0; + const uint8_t *hdr; + if (io) + { + if (!eof && filled - consumed < MINIMP3_BUF_SIZE) + { /* keep minimum 10 consecutive mp3 frames (~16KB) worst case */ + memmove(buf, buf + consumed, filled - consumed); + filled -= consumed; + consumed = 0; + size_t readed = io->read(buf + filled, buf_size - filled, io->read_data); + if (readed > (buf_size - filled)) + return MP3D_E_IOERROR; + if (readed != (buf_size - filled)) + eof = 1; + filled += readed; + if (eof) + mp3dec_skip_id3v1(buf, &filled); + } + i = mp3d_find_frame(buf + consumed, filled - consumed, &free_format_bytes, &frame_size); + consumed += i; + hdr = buf + consumed; + } else + { + i = mp3d_find_frame(buf, buf_size, &free_format_bytes, &frame_size); + buf += i; + buf_size -= i; + hdr = buf; + } + if (i && !frame_size) + continue; + if (!frame_size) + return 0; + frame_info.channels = HDR_IS_MONO(hdr) ? 1 : 2; + frame_info.hz = hdr_sample_rate_hz(hdr); + frame_info.layer = 4 - HDR_GET_LAYER(hdr); + frame_info.bitrate_kbps = hdr_bitrate_kbps(hdr); + frame_info.frame_bytes = frame_size; + samples = hdr_frame_samples(hdr)*frame_info.channels; + if (3 != frame_info.layer) + break; + int ret = mp3dec_check_vbrtag(hdr, frame_size, &frames, &delay, &padding); + if (ret > 0) + { + padding *= frame_info.channels; + to_skip = delay*frame_info.channels; + detected_samples = samples*(uint64_t)frames; + if (detected_samples >= (uint64_t)to_skip) + detected_samples -= to_skip; + if (padding > 0 && detected_samples >= (uint64_t)padding) + detected_samples -= padding; + if (!detected_samples) + return 0; + } + if (ret) + { + if (io) + { + consumed += frame_size; + } else + { + buf += frame_size; + buf_size -= frame_size; + } + } + break; + } while(1); + size_t allocated = MINIMP3_MAX_SAMPLES_PER_FRAME*sizeof(mp3d_sample_t); + if (detected_samples) + allocated += detected_samples*sizeof(mp3d_sample_t); + else + allocated += (buf_size/frame_info.frame_bytes)*samples*sizeof(mp3d_sample_t); + info->buffer = (mp3d_sample_t*)malloc(allocated); + if (!info->buffer) + return MP3D_E_MEMORY; + /* save info */ + info->channels = frame_info.channels; + info->hz = frame_info.hz; + info->layer = frame_info.layer; + /* decode all frames */ + size_t avg_bitrate_kbps = 0, frames = 0; + do + { + if ((allocated - info->samples*sizeof(mp3d_sample_t)) < MINIMP3_MAX_SAMPLES_PER_FRAME*sizeof(mp3d_sample_t)) + { + allocated *= 2; + mp3d_sample_t *alloc_buf = (mp3d_sample_t*)realloc(info->buffer, allocated); + if (!alloc_buf) + return MP3D_E_MEMORY; + info->buffer = alloc_buf; + } + if (io) + { + if (!eof && filled - consumed < MINIMP3_BUF_SIZE) + { /* keep minimum 10 consecutive mp3 frames (~16KB) worst case */ + memmove(buf, buf + consumed, filled - consumed); + filled -= consumed; + consumed = 0; + size_t readed = io->read(buf + filled, buf_size - filled, io->read_data); + if (readed != (buf_size - filled)) + eof = 1; + filled += readed; + if (eof) + mp3dec_skip_id3v1(buf, &filled); + } + samples = mp3dec_decode_frame(dec, buf + consumed, filled - consumed, info->buffer + info->samples, &frame_info); + consumed += frame_info.frame_bytes; + } else + { + samples = mp3dec_decode_frame(dec, buf, MINIMP3_MIN(buf_size, (size_t)INT_MAX), info->buffer + info->samples, &frame_info); + buf += frame_info.frame_bytes; + buf_size -= frame_info.frame_bytes; + } + if (samples) + { + if (info->hz != frame_info.hz || info->layer != frame_info.layer) + { + ret = MP3D_E_DECODE; + break; + } + if (info->channels && info->channels != frame_info.channels) + { +#ifdef MINIMP3_ALLOW_MONO_STEREO_TRANSITION + info->channels = 0; /* mark file with mono-stereo transition */ +#else + ret = MP3D_E_DECODE; + break; +#endif + } + samples *= frame_info.channels; + if (to_skip) + { + size_t skip = MINIMP3_MIN(samples, to_skip); + to_skip -= skip; + samples -= skip; + memmove(info->buffer, info->buffer + skip, samples*sizeof(mp3d_sample_t)); + } + info->samples += samples; + avg_bitrate_kbps += frame_info.bitrate_kbps; + frames++; + if (progress_cb) + { + ret = progress_cb(user_data, orig_buf_size, orig_buf_size - buf_size, &frame_info); + if (ret) + break; + } + } + } while (frame_info.frame_bytes); + if (detected_samples && info->samples > detected_samples) + info->samples = detected_samples; /* cut padding */ + /* reallocate to normal buffer size */ + if (allocated != info->samples*sizeof(mp3d_sample_t)) + { + mp3d_sample_t *alloc_buf = (mp3d_sample_t*)realloc(info->buffer, info->samples*sizeof(mp3d_sample_t)); + if (!alloc_buf && info->samples) + return MP3D_E_MEMORY; + info->buffer = alloc_buf; + } + if (frames) + info->avg_bitrate_kbps = avg_bitrate_kbps/frames; + return ret; +} + +int mp3dec_iterate_buf(const uint8_t *buf, size_t buf_size, MP3D_ITERATE_CB callback, void *user_data) +{ + const uint8_t *orig_buf = buf; + if (!buf || (size_t)-1 == buf_size || !callback) + return MP3D_E_PARAM; + /* skip id3 */ + mp3dec_skip_id3(&buf, &buf_size); + if (!buf_size) + return 0; + mp3dec_frame_info_t frame_info; + memset(&frame_info, 0, sizeof(frame_info)); + do + { + int free_format_bytes = 0, frame_size = 0, ret; + int i = mp3d_find_frame(buf, buf_size, &free_format_bytes, &frame_size); + buf += i; + buf_size -= i; + if (i && !frame_size) + continue; + if (!frame_size) + break; + const uint8_t *hdr = buf; + frame_info.channels = HDR_IS_MONO(hdr) ? 1 : 2; + frame_info.hz = hdr_sample_rate_hz(hdr); + frame_info.layer = 4 - HDR_GET_LAYER(hdr); + frame_info.bitrate_kbps = hdr_bitrate_kbps(hdr); + frame_info.frame_bytes = frame_size; + + if (callback) + { + if ((ret = callback(user_data, hdr, frame_size, free_format_bytes, buf_size, hdr - orig_buf, &frame_info))) + return ret; + } + buf += frame_size; + buf_size -= frame_size; + } while (1); + return 0; +} + +int mp3dec_iterate_cb(mp3dec_io_t *io, uint8_t *buf, size_t buf_size, MP3D_ITERATE_CB callback, void *user_data) +{ + if (!io || !buf || (size_t)-1 == buf_size || buf_size < MINIMP3_BUF_SIZE || !callback) + return MP3D_E_PARAM; + size_t filled = io->read(buf, MINIMP3_ID3_DETECT_SIZE, io->read_data), consumed = 0; + uint64_t readed = 0; + mp3dec_frame_info_t frame_info; + int eof = 0; + memset(&frame_info, 0, sizeof(frame_info)); + if (filled > MINIMP3_ID3_DETECT_SIZE) + return MP3D_E_IOERROR; + if (MINIMP3_ID3_DETECT_SIZE != filled) + return 0; + size_t id3v2size = mp3dec_skip_id3v2(buf, filled); + if (id3v2size) + { + if (io->seek(id3v2size, io->seek_data)) + return MP3D_E_IOERROR; + filled = io->read(buf, buf_size, io->read_data); + if (filled > buf_size) + return MP3D_E_IOERROR; + readed += id3v2size; + } else + { + size_t readed = io->read(buf + MINIMP3_ID3_DETECT_SIZE, buf_size - MINIMP3_ID3_DETECT_SIZE, io->read_data); + if (readed > (buf_size - MINIMP3_ID3_DETECT_SIZE)) + return MP3D_E_IOERROR; + filled += readed; + } + if (filled < MINIMP3_BUF_SIZE) + mp3dec_skip_id3v1(buf, &filled); + do + { + int free_format_bytes = 0, frame_size = 0, ret; + int i = mp3d_find_frame(buf + consumed, filled - consumed, &free_format_bytes, &frame_size); + if (i && !frame_size) + { + consumed += i; + continue; + } + if (!frame_size) + break; + const uint8_t *hdr = buf + consumed + i; + frame_info.channels = HDR_IS_MONO(hdr) ? 1 : 2; + frame_info.hz = hdr_sample_rate_hz(hdr); + frame_info.layer = 4 - HDR_GET_LAYER(hdr); + frame_info.bitrate_kbps = hdr_bitrate_kbps(hdr); + frame_info.frame_bytes = frame_size; + + readed += i; + if (callback) + { + if ((ret = callback(user_data, hdr, frame_size, free_format_bytes, filled - consumed, readed, &frame_info))) + return ret; + } + readed += frame_size; + consumed += i + frame_size; + if (!eof && filled - consumed < MINIMP3_BUF_SIZE) + { /* keep minimum 10 consecutive mp3 frames (~16KB) worst case */ + memmove(buf, buf + consumed, filled - consumed); + filled -= consumed; + consumed = 0; + size_t readed = io->read(buf + filled, buf_size - filled, io->read_data); + if (readed > (buf_size - filled)) + return MP3D_E_IOERROR; + if (readed != (buf_size - filled)) + eof = 1; + filled += readed; + if (eof) + mp3dec_skip_id3v1(buf, &filled); + } + } while (1); + return 0; +} + +static int mp3dec_load_index(void *user_data, const uint8_t *frame, int frame_size, int free_format_bytes, size_t buf_size, uint64_t offset, mp3dec_frame_info_t *info) +{ + mp3dec_frame_t *idx_frame; + mp3dec_ex_t *dec = (mp3dec_ex_t *)user_data; + if (!dec->index.frames && !dec->start_offset) + { /* detect VBR tag and try to avoid full scan */ + uint32_t frames; + int delay, padding; + dec->info = *info; + dec->start_offset = dec->offset = offset; + dec->end_offset = offset + buf_size; + dec->free_format_bytes = free_format_bytes; /* should not change */ + if (3 == dec->info.layer) + { + int ret = mp3dec_check_vbrtag(frame, frame_size, &frames, &delay, &padding); + if (ret) + dec->start_offset = dec->offset = offset + frame_size; + if (ret > 0) + { + padding *= info->channels; + dec->start_delay = dec->to_skip = delay*info->channels; + dec->samples = hdr_frame_samples(frame)*info->channels*(uint64_t)frames; + if (dec->samples >= (uint64_t)dec->start_delay) + dec->samples -= dec->start_delay; + if (padding > 0 && dec->samples >= (uint64_t)padding) + dec->samples -= padding; + dec->detected_samples = dec->samples; + dec->vbr_tag_found = 1; + return MP3D_E_USER; + } else if (ret < 0) + return 0; + } + } + if (dec->flags & MP3D_DO_NOT_SCAN) + return MP3D_E_USER; + if (dec->index.num_frames + 1 > dec->index.capacity) + { + if (!dec->index.capacity) + dec->index.capacity = 4096; + else + dec->index.capacity *= 2; + mp3dec_frame_t *alloc_buf = (mp3dec_frame_t *)realloc((void*)dec->index.frames, sizeof(mp3dec_frame_t)*dec->index.capacity); + if (!alloc_buf) + return MP3D_E_MEMORY; + dec->index.frames = alloc_buf; + } + idx_frame = &dec->index.frames[dec->index.num_frames++]; + idx_frame->offset = offset; + idx_frame->sample = dec->samples; + if (!dec->buffer_samples && dec->index.num_frames < 256) + { /* for some cutted mp3 frames, bit-reservoir not filled and decoding can't be started from first frames */ + /* try to decode up to 255 first frames till samples starts to decode */ + dec->buffer_samples = mp3dec_decode_frame(&dec->mp3d, frame, MINIMP3_MIN(buf_size, (size_t)INT_MAX), dec->buffer, info); + dec->samples += dec->buffer_samples*info->channels; + } else + dec->samples += hdr_frame_samples(frame)*info->channels; + return 0; +} + +int mp3dec_ex_open_buf(mp3dec_ex_t *dec, const uint8_t *buf, size_t buf_size, int flags) +{ + if (!dec || !buf || (size_t)-1 == buf_size || (flags & (~MP3D_FLAGS_MASK))) + return MP3D_E_PARAM; + memset(dec, 0, sizeof(*dec)); + dec->file.buffer = buf; + dec->file.size = buf_size; + dec->flags = flags; + mp3dec_init(&dec->mp3d); + int ret = mp3dec_iterate_buf(dec->file.buffer, dec->file.size, mp3dec_load_index, dec); + if (ret && MP3D_E_USER != ret) + return ret; + mp3dec_init(&dec->mp3d); + dec->buffer_samples = 0; + dec->indexes_built = !(dec->vbr_tag_found || (flags & MP3D_DO_NOT_SCAN)); + dec->flags &= (~MP3D_DO_NOT_SCAN); + return 0; +} + +#ifndef MINIMP3_SEEK_IDX_LINEAR_SEARCH +static size_t mp3dec_idx_binary_search(mp3dec_index_t *idx, uint64_t position) +{ + size_t end = idx->num_frames, start = 0, index = 0; + while (start <= end) + { + size_t mid = (start + end) / 2; + if (idx->frames[mid].sample >= position) + { /* move left side. */ + if (idx->frames[mid].sample == position) + return mid; + end = mid - 1; + } else + { /* move to right side */ + index = mid; + start = mid + 1; + if (start == idx->num_frames) + break; + } + } + return index; +} +#endif + +int mp3dec_ex_seek(mp3dec_ex_t *dec, uint64_t position) +{ + size_t i; + if (!dec) + return MP3D_E_PARAM; + if (!(dec->flags & MP3D_SEEK_TO_SAMPLE)) + { + if (dec->io) + { + dec->offset = position; + } else + { + dec->offset = MINIMP3_MIN(position, dec->file.size); + } + dec->cur_sample = 0; + goto do_exit; + } + dec->cur_sample = position; + position += dec->start_delay; + if (0 == position) + { /* optimize seek to zero, no index needed */ +seek_zero: + dec->offset = dec->start_offset; + dec->to_skip = 0; + goto do_exit; + } + if (!dec->indexes_built) + { /* no index created yet (vbr tag used to calculate track length or MP3D_DO_NOT_SCAN open flag used) */ + dec->indexes_built = 1; + dec->samples = 0; + dec->buffer_samples = 0; + if (dec->io) + { + if (dec->io->seek(dec->start_offset, dec->io->seek_data)) + return MP3D_E_IOERROR; + int ret = mp3dec_iterate_cb(dec->io, (uint8_t *)dec->file.buffer, dec->file.size, mp3dec_load_index, dec); + if (ret && MP3D_E_USER != ret) + return ret; + } else + { + int ret = mp3dec_iterate_buf(dec->file.buffer + dec->start_offset, dec->file.size - dec->start_offset, mp3dec_load_index, dec); + if (ret && MP3D_E_USER != ret) + return ret; + } + for (i = 0; i < dec->index.num_frames; i++) + dec->index.frames[i].offset += dec->start_offset; + dec->samples = dec->detected_samples; + } + if (!dec->index.frames) + goto seek_zero; /* no frames in file - seek to zero */ +#ifdef MINIMP3_SEEK_IDX_LINEAR_SEARCH + for (i = 0; i < dec->index.num_frames; i++) + { + if (dec->index.frames[i].sample >= position) + break; + } +#else + i = mp3dec_idx_binary_search(&dec->index, position); +#endif + if (i) + { + int to_fill_bytes = 511; + int skip_frames = MINIMP3_PREDECODE_FRAMES +#ifdef MINIMP3_SEEK_IDX_LINEAR_SEARCH + + ((dec->index.frames[i].sample == position) ? 0 : 1) +#endif + ; + i -= MINIMP3_MIN(i, (size_t)skip_frames); + if (3 == dec->info.layer) + { + while (i && to_fill_bytes) + { /* make sure bit-reservoir is filled when we start decoding */ + bs_t bs[1]; + L3_gr_info_t gr_info[4]; + int frame_bytes, frame_size; + const uint8_t *hdr; + if (dec->io) + { + hdr = dec->file.buffer; + if (dec->io->seek(dec->index.frames[i - 1].offset, dec->io->seek_data)) + return MP3D_E_IOERROR; + size_t readed = dec->io->read((uint8_t *)hdr, HDR_SIZE, dec->io->read_data); + if (readed != HDR_SIZE) + return MP3D_E_IOERROR; + frame_size = hdr_frame_bytes(hdr, dec->free_format_bytes) + hdr_padding(hdr); + readed = dec->io->read((uint8_t *)hdr + HDR_SIZE, frame_size - HDR_SIZE, dec->io->read_data); + if (readed != (size_t)(frame_size - HDR_SIZE)) + return MP3D_E_IOERROR; + bs_init(bs, hdr + HDR_SIZE, frame_size - HDR_SIZE); + } else + { + hdr = dec->file.buffer + dec->index.frames[i - 1].offset; + frame_size = hdr_frame_bytes(hdr, dec->free_format_bytes) + hdr_padding(hdr); + bs_init(bs, hdr + HDR_SIZE, frame_size - HDR_SIZE); + } + if (HDR_IS_CRC(hdr)) + get_bits(bs, 16); + i--; + if (L3_read_side_info(bs, gr_info, hdr) < 0) + break; /* frame not decodable, we can start from here */ + frame_bytes = (bs->limit - bs->pos)/8; + to_fill_bytes -= MINIMP3_MIN(to_fill_bytes, frame_bytes); + } + } + } + dec->offset = dec->index.frames[i].offset; + dec->to_skip = position - dec->index.frames[i].sample; + while ((i + 1) < dec->index.num_frames && !dec->index.frames[i].sample && !dec->index.frames[i + 1].sample) + { /* skip not decodable first frames */ + const uint8_t *hdr; + if (dec->io) + { + hdr = dec->file.buffer; + if (dec->io->seek(dec->index.frames[i].offset, dec->io->seek_data)) + return MP3D_E_IOERROR; + size_t readed = dec->io->read((uint8_t *)hdr, HDR_SIZE, dec->io->read_data); + if (readed != HDR_SIZE) + return MP3D_E_IOERROR; + } else + hdr = dec->file.buffer + dec->index.frames[i].offset; + dec->to_skip += hdr_frame_samples(hdr)*dec->info.channels; + i++; + } +do_exit: + if (dec->io) + { + if (dec->io->seek(dec->offset, dec->io->seek_data)) + return MP3D_E_IOERROR; + } + dec->buffer_samples = 0; + dec->buffer_consumed = 0; + dec->input_consumed = 0; + dec->input_filled = 0; + dec->last_error = 0; + mp3dec_init(&dec->mp3d); + return 0; +} + +size_t mp3dec_ex_read_frame(mp3dec_ex_t *dec, mp3d_sample_t **buf, mp3dec_frame_info_t *frame_info, size_t max_samples) +{ + if (!dec || !buf || !frame_info) + { + if (dec) + dec->last_error = MP3D_E_PARAM; + return 0; + } + if (dec->detected_samples && dec->cur_sample >= dec->detected_samples) + return 0; /* at end of stream */ + if (dec->last_error) + return 0; /* error eof state, seek can reset it */ + *buf = NULL; + uint64_t end_offset = dec->end_offset ? dec->end_offset : dec->file.size; + int eof = 0; + while (dec->buffer_consumed == dec->buffer_samples) + { + const uint8_t *dec_buf; + if (dec->io) + { + if (!eof && (dec->input_filled - dec->input_consumed) < MINIMP3_BUF_SIZE) + { /* keep minimum 10 consecutive mp3 frames (~16KB) worst case */ + memmove((uint8_t*)dec->file.buffer, (uint8_t*)dec->file.buffer + dec->input_consumed, dec->input_filled - dec->input_consumed); + dec->input_filled -= dec->input_consumed; + dec->input_consumed = 0; + size_t readed = dec->io->read((uint8_t*)dec->file.buffer + dec->input_filled, dec->file.size - dec->input_filled, dec->io->read_data); + if (readed > (dec->file.size - dec->input_filled)) + { + dec->last_error = MP3D_E_IOERROR; + readed = 0; + } + if (readed != (dec->file.size - dec->input_filled)) + eof = 1; + dec->input_filled += readed; + if (eof) + mp3dec_skip_id3v1((uint8_t*)dec->file.buffer, &dec->input_filled); + } + dec_buf = dec->file.buffer + dec->input_consumed; + if (!(dec->input_filled - dec->input_consumed)) + return 0; + dec->buffer_samples = mp3dec_decode_frame(&dec->mp3d, dec_buf, dec->input_filled - dec->input_consumed, dec->buffer, frame_info); + dec->input_consumed += frame_info->frame_bytes; + } else + { + dec_buf = dec->file.buffer + dec->offset; + uint64_t buf_size = end_offset - dec->offset; + if (!buf_size) + return 0; + dec->buffer_samples = mp3dec_decode_frame(&dec->mp3d, dec_buf, MINIMP3_MIN(buf_size, (uint64_t)INT_MAX), dec->buffer, frame_info); + } + dec->buffer_consumed = 0; + if (dec->info.hz != frame_info->hz || dec->info.layer != frame_info->layer) + { +return_e_decode: + dec->last_error = MP3D_E_DECODE; + return 0; + } + if (dec->buffer_samples) + { + dec->buffer_samples *= frame_info->channels; + if (dec->to_skip) + { + size_t skip = MINIMP3_MIN(dec->buffer_samples, dec->to_skip); + dec->buffer_consumed += skip; + dec->to_skip -= skip; + } + if ( +#ifdef MINIMP3_ALLOW_MONO_STEREO_TRANSITION + !(dec->flags & MP3D_ALLOW_MONO_STEREO_TRANSITION) && +#endif + dec->buffer_consumed != dec->buffer_samples && dec->info.channels != frame_info->channels) + { + goto return_e_decode; + } + } else if (dec->to_skip) + { /* In mp3 decoding not always can start decode from any frame because of bit reservoir, + count skip samples for such frames */ + int frame_samples = hdr_frame_samples(dec_buf)*frame_info->channels; + dec->to_skip -= MINIMP3_MIN(frame_samples, dec->to_skip); + } + dec->offset += frame_info->frame_bytes; + } + size_t out_samples = MINIMP3_MIN((size_t)(dec->buffer_samples - dec->buffer_consumed), max_samples); + if (dec->detected_samples) + { /* count decoded samples to properly cut padding */ + if (dec->cur_sample + out_samples >= dec->detected_samples) + out_samples = dec->detected_samples - dec->cur_sample; + } + dec->cur_sample += out_samples; + *buf = dec->buffer + dec->buffer_consumed; + dec->buffer_consumed += out_samples; + return out_samples; +} + +size_t mp3dec_ex_read(mp3dec_ex_t *dec, mp3d_sample_t *buf, size_t samples) +{ + if (!dec || !buf) + { + if (dec) + dec->last_error = MP3D_E_PARAM; + return 0; + } + mp3dec_frame_info_t frame_info; + memset(&frame_info, 0, sizeof(frame_info)); + size_t samples_requested = samples; + while (samples) + { + mp3d_sample_t *buf_frame = NULL; + size_t read_samples = mp3dec_ex_read_frame(dec, &buf_frame, &frame_info, samples); + if (!read_samples) + { + break; + } + memcpy(buf, buf_frame, read_samples * sizeof(mp3d_sample_t)); + buf += read_samples; + samples -= read_samples; + } + return samples_requested - samples; +} + +int mp3dec_ex_open_cb(mp3dec_ex_t *dec, mp3dec_io_t *io, int flags) +{ + if (!dec || !io || (flags & (~MP3D_FLAGS_MASK))) + return MP3D_E_PARAM; + memset(dec, 0, sizeof(*dec)); +#ifdef MINIMP3_HAVE_RING + int ret; + if (ret = mp3dec_open_ring(&dec->file, MINIMP3_IO_SIZE)) + return ret; +#else + dec->file.size = MINIMP3_IO_SIZE; + dec->file.buffer = (const uint8_t*)malloc(dec->file.size); + if (!dec->file.buffer) + return MP3D_E_MEMORY; +#endif + dec->flags = flags; + dec->io = io; + mp3dec_init(&dec->mp3d); + if (io->seek(0, io->seek_data)) + return MP3D_E_IOERROR; + int ret = mp3dec_iterate_cb(io, (uint8_t *)dec->file.buffer, dec->file.size, mp3dec_load_index, dec); + if (ret && MP3D_E_USER != ret) + return ret; + if (dec->io->seek(dec->start_offset, dec->io->seek_data)) + return MP3D_E_IOERROR; + mp3dec_init(&dec->mp3d); + dec->buffer_samples = 0; + dec->indexes_built = !(dec->vbr_tag_found || (flags & MP3D_DO_NOT_SCAN)); + dec->flags &= (~MP3D_DO_NOT_SCAN); + return 0; +} + + +#ifndef MINIMP3_NO_STDIO + +#if defined(__linux__) || defined(__FreeBSD__) +#include <errno.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#if !defined(_GNU_SOURCE) +#include <sys/ipc.h> +#include <sys/shm.h> +#endif +#if !defined(MAP_POPULATE) && defined(__linux__) +#define MAP_POPULATE 0x08000 +#elif !defined(MAP_POPULATE) +#define MAP_POPULATE 0 +#endif + +static void mp3dec_close_file(mp3dec_map_info_t *map_info) +{ + if (map_info->buffer && MAP_FAILED != map_info->buffer) + munmap((void *)map_info->buffer, map_info->size); + map_info->buffer = 0; + map_info->size = 0; +} + +static int mp3dec_open_file(const char *file_name, mp3dec_map_info_t *map_info) +{ + if (!file_name) + return MP3D_E_PARAM; + int file; + struct stat st; + memset(map_info, 0, sizeof(*map_info)); +retry_open: + file = open(file_name, O_RDONLY); + if (file < 0 && (errno == EAGAIN || errno == EINTR)) + goto retry_open; + if (file < 0 || fstat(file, &st) < 0) + { + close(file); + return MP3D_E_IOERROR; + } + + map_info->size = st.st_size; +retry_mmap: + map_info->buffer = (const uint8_t *)mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE | MAP_POPULATE, file, 0); + if (MAP_FAILED == map_info->buffer && (errno == EAGAIN || errno == EINTR)) + goto retry_mmap; + close(file); + if (MAP_FAILED == map_info->buffer) + return MP3D_E_IOERROR; + return 0; +} + +#if MINIMP3_ENABLE_RING && defined(__linux__) && defined(_GNU_SOURCE) +#define MINIMP3_HAVE_RING +static void mp3dec_close_ring(mp3dec_map_info_t *map_info) +{ +#if defined(__linux__) && defined(_GNU_SOURCE) + if (map_info->buffer && MAP_FAILED != map_info->buffer) + munmap((void *)map_info->buffer, map_info->size*2); +#else + if (map_info->buffer) + { + shmdt(map_info->buffer); + shmdt(map_info->buffer + map_info->size); + } +#endif + map_info->buffer = 0; + map_info->size = 0; +} + +static int mp3dec_open_ring(mp3dec_map_info_t *map_info, size_t size) +{ + int memfd, page_size; +#if defined(__linux__) && defined(_GNU_SOURCE) + void *buffer; + int res; +#endif + memset(map_info, 0, sizeof(*map_info)); + +#ifdef _SC_PAGESIZE + page_size = sysconf(_SC_PAGESIZE); +#else + page_size = getpagesize(); +#endif + map_info->size = (size + page_size - 1)/page_size*page_size; + +#if defined(__linux__) && defined(_GNU_SOURCE) + memfd = memfd_create("mp3_ring", 0); + if (memfd < 0) + return MP3D_E_MEMORY; + +retry_ftruncate: + res = ftruncate(memfd, map_info->size); + if (res && (errno == EAGAIN || errno == EINTR)) + goto retry_ftruncate; + if (res) + goto error; + +retry_mmap: + map_info->buffer = (const uint8_t *)mmap(NULL, map_info->size*2, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (MAP_FAILED == map_info->buffer && (errno == EAGAIN || errno == EINTR)) + goto retry_mmap; + if (MAP_FAILED == map_info->buffer || !map_info->buffer) + goto error; +retry_mmap2: + buffer = mmap((void *)map_info->buffer, map_info->size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, memfd, 0); + if (MAP_FAILED == map_info->buffer && (errno == EAGAIN || errno == EINTR)) + goto retry_mmap2; + if (MAP_FAILED == map_info->buffer || buffer != (void *)map_info->buffer) + goto error; +retry_mmap3: + buffer = mmap((void *)map_info->buffer + map_info->size, map_info->size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_SHARED, memfd, 0); + if (MAP_FAILED == map_info->buffer && (errno == EAGAIN || errno == EINTR)) + goto retry_mmap3; + if (MAP_FAILED == map_info->buffer || buffer != (void *)(map_info->buffer + map_info->size)) + goto error; + + close(memfd); + return 0; +error: + close(memfd); + mp3dec_close_ring(map_info); + return MP3D_E_MEMORY; +#else + memfd = shmget(IPC_PRIVATE, map_info->size, IPC_CREAT | 0700); + if (memfd < 0) + return MP3D_E_MEMORY; +retry_mmap: + map_info->buffer = (const uint8_t *)mmap(NULL, map_info->size*2, PROT_NONE, MAP_PRIVATE, -1, 0); + if (MAP_FAILED == map_info->buffer && (errno == EAGAIN || errno == EINTR)) + goto retry_mmap; + if (MAP_FAILED == map_info->buffer) + goto error; + if (map_info->buffer != shmat(memfd, map_info->buffer, 0)) + goto error; + if ((map_info->buffer + map_info->size) != shmat(memfd, map_info->buffer + map_info->size, 0)) + goto error; + if (shmctl(memfd, IPC_RMID, NULL) < 0) + return MP3D_E_MEMORY; + return 0; +error: + shmctl(memfd, IPC_RMID, NULL); + mp3dec_close_ring(map_info); + return MP3D_E_MEMORY; +#endif +} +#endif /*MINIMP3_ENABLE_RING*/ +#elif defined(_WIN32) +#include <windows.h> + +static void mp3dec_close_file(mp3dec_map_info_t *map_info) +{ + if (map_info->buffer) + UnmapViewOfFile(map_info->buffer); + map_info->buffer = 0; + map_info->size = 0; +} + +static int mp3dec_open_file_h(HANDLE file, mp3dec_map_info_t *map_info) +{ + memset(map_info, 0, sizeof(*map_info)); + + HANDLE mapping = NULL; + LARGE_INTEGER s; + s.LowPart = GetFileSize(file, (DWORD*)&s.HighPart); + if (s.LowPart == INVALID_FILE_SIZE && GetLastError() != NO_ERROR) + goto error; + map_info->size = s.QuadPart; + + mapping = CreateFileMapping(file, NULL, PAGE_READONLY, 0, 0, NULL); + if (!mapping) + goto error; + map_info->buffer = (const uint8_t*)MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, s.QuadPart); + CloseHandle(mapping); + if (!map_info->buffer) + goto error; + + CloseHandle(file); + return 0; +error: + mp3dec_close_file(map_info); + CloseHandle(file); + return MP3D_E_IOERROR; +} + +static int mp3dec_open_file(const char *file_name, mp3dec_map_info_t *map_info) +{ + if (!file_name) + return MP3D_E_PARAM; + HANDLE file = CreateFileA(file_name, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, 0, 0); + if (INVALID_HANDLE_VALUE == file) + return MP3D_E_IOERROR; + return mp3dec_open_file_h(file, map_info); +} + +static int mp3dec_open_file_w(const wchar_t *file_name, mp3dec_map_info_t *map_info) +{ + if (!file_name) + return MP3D_E_PARAM; + HANDLE file = CreateFileW(file_name, GENERIC_READ, FILE_SHARE_READ, 0, OPEN_EXISTING, 0, 0); + if (INVALID_HANDLE_VALUE == file) + return MP3D_E_IOERROR; + return mp3dec_open_file_h(file, map_info); +} +#else +#include <stdio.h> + +static void mp3dec_close_file(mp3dec_map_info_t *map_info) +{ + if (map_info->buffer) + free((void *)map_info->buffer); + map_info->buffer = 0; + map_info->size = 0; +} + +static int mp3dec_open_file(const char *file_name, mp3dec_map_info_t *map_info) +{ + if (!file_name) + return MP3D_E_PARAM; + memset(map_info, 0, sizeof(*map_info)); + FILE *file = fopen(file_name, "rb"); + if (!file) + return MP3D_E_IOERROR; + int res = MP3D_E_IOERROR; + long size = -1; + if (fseek(file, 0, SEEK_END)) + goto error; + size = ftell(file); + if (size < 0) + goto error; + map_info->size = (size_t)size; + if (fseek(file, 0, SEEK_SET)) + goto error; + map_info->buffer = (uint8_t *)malloc(map_info->size); + if (!map_info->buffer) + { + res = MP3D_E_MEMORY; + goto error; + } + if (fread((void *)map_info->buffer, 1, map_info->size, file) != map_info->size) + goto error; + fclose(file); + return 0; +error: + mp3dec_close_file(map_info); + fclose(file); + return res; +} +#endif + +static int mp3dec_detect_mapinfo(mp3dec_map_info_t *map_info) +{ + int ret = mp3dec_detect_buf(map_info->buffer, map_info->size); + mp3dec_close_file(map_info); + return ret; +} + +static int mp3dec_load_mapinfo(mp3dec_t *dec, mp3dec_map_info_t *map_info, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data) +{ + int ret = mp3dec_load_buf(dec, map_info->buffer, map_info->size, info, progress_cb, user_data); + mp3dec_close_file(map_info); + return ret; +} + +static int mp3dec_iterate_mapinfo(mp3dec_map_info_t *map_info, MP3D_ITERATE_CB callback, void *user_data) +{ + int ret = mp3dec_iterate_buf(map_info->buffer, map_info->size, callback, user_data); + mp3dec_close_file(map_info); + return ret; +} + +static int mp3dec_ex_open_mapinfo(mp3dec_ex_t *dec, int flags) +{ + int ret = mp3dec_ex_open_buf(dec, dec->file.buffer, dec->file.size, flags); + dec->is_file = 1; + if (ret) + mp3dec_ex_close(dec); + return ret; +} + +int mp3dec_detect(const char *file_name) +{ + int ret; + mp3dec_map_info_t map_info; + if ((ret = mp3dec_open_file(file_name, &map_info))) + return ret; + return mp3dec_detect_mapinfo(&map_info); +} + +int mp3dec_load(mp3dec_t *dec, const char *file_name, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data) +{ + int ret; + mp3dec_map_info_t map_info; + if ((ret = mp3dec_open_file(file_name, &map_info))) + return ret; + return mp3dec_load_mapinfo(dec, &map_info, info, progress_cb, user_data); +} + +int mp3dec_iterate(const char *file_name, MP3D_ITERATE_CB callback, void *user_data) +{ + int ret; + mp3dec_map_info_t map_info; + if ((ret = mp3dec_open_file(file_name, &map_info))) + return ret; + return mp3dec_iterate_mapinfo(&map_info, callback, user_data); +} + +int mp3dec_ex_open(mp3dec_ex_t *dec, const char *file_name, int flags) +{ + int ret; + if (!dec) + return MP3D_E_PARAM; + if ((ret = mp3dec_open_file(file_name, &dec->file))) + return ret; + return mp3dec_ex_open_mapinfo(dec, flags); +} + +void mp3dec_ex_close(mp3dec_ex_t *dec) +{ +#ifdef MINIMP3_HAVE_RING + if (dec->io) + mp3dec_close_ring(&dec->file); +#else + if (dec->io && dec->file.buffer) + free((void*)dec->file.buffer); +#endif + if (dec->is_file) + mp3dec_close_file(&dec->file); + if (dec->index.frames) + free(dec->index.frames); + memset(dec, 0, sizeof(*dec)); +} + +#ifdef _WIN32 +int mp3dec_detect_w(const wchar_t *file_name) +{ + int ret; + mp3dec_map_info_t map_info; + if ((ret = mp3dec_open_file_w(file_name, &map_info))) + return ret; + return mp3dec_detect_mapinfo(&map_info); +} + +int mp3dec_load_w(mp3dec_t *dec, const wchar_t *file_name, mp3dec_file_info_t *info, MP3D_PROGRESS_CB progress_cb, void *user_data) +{ + int ret; + mp3dec_map_info_t map_info; + if ((ret = mp3dec_open_file_w(file_name, &map_info))) + return ret; + return mp3dec_load_mapinfo(dec, &map_info, info, progress_cb, user_data); +} + +int mp3dec_iterate_w(const wchar_t *file_name, MP3D_ITERATE_CB callback, void *user_data) +{ + int ret; + mp3dec_map_info_t map_info; + if ((ret = mp3dec_open_file_w(file_name, &map_info))) + return ret; + return mp3dec_iterate_mapinfo(&map_info, callback, user_data); +} + +int mp3dec_ex_open_w(mp3dec_ex_t *dec, const wchar_t *file_name, int flags) +{ + int ret; + if ((ret = mp3dec_open_file_w(file_name, &dec->file))) + return ret; + return mp3dec_ex_open_mapinfo(dec, flags); +} +#endif +#else /* MINIMP3_NO_STDIO */ +void mp3dec_ex_close(mp3dec_ex_t *dec) +{ +#ifdef MINIMP3_HAVE_RING + if (dec->io) + mp3dec_close_ring(&dec->file); +#else + if (dec->io && dec->file.buffer) + free((void*)dec->file.buffer); +#endif + if (dec->index.frames) + free(dec->index.frames); + memset(dec, 0, sizeof(*dec)); +} +#endif + +#endif /*MINIMP3_IMPLEMENTATION*/ diff --git a/thirdparty/misc/easing_equations.cpp b/thirdparty/misc/easing_equations.cpp index bc84564b19..af48aaf079 100644 --- a/thirdparty/misc/easing_equations.cpp +++ b/thirdparty/misc/easing_equations.cpp @@ -188,7 +188,8 @@ static real_t out_in(real_t t, real_t b, real_t c, real_t d) { /////////////////////////////////////////////////////////////////////////// namespace cubic { static real_t in(real_t t, real_t b, real_t c, real_t d) { - return c * (t /= d) * t * t + b; + t /= d; + return c * t * t * t + b; } static real_t out(real_t t, real_t b, real_t c, real_t d) { @@ -197,8 +198,10 @@ static real_t out(real_t t, real_t b, real_t c, real_t d) { } static real_t in_out(real_t t, real_t b, real_t c, real_t d) { - if ((t /= d / 2) < 1) return c / 2 * t * t * t + b; - return c / 2 * ((t -= 2) * t * t + 2) + b; + t /= d / 2; + if (t < 1) return c / 2 * t * t * t + b; + t -= 2; + return c / 2 * (t * t * t + 2) + b; } static real_t out_in(real_t t, real_t b, real_t c, real_t d) { @@ -210,16 +213,22 @@ static real_t out_in(real_t t, real_t b, real_t c, real_t d) { /////////////////////////////////////////////////////////////////////////// namespace circ { static real_t in(real_t t, real_t b, real_t c, real_t d) { - return -c * (sqrt(1 - (t /= d) * t) - 1) + b; // TODO: ehrich: operation with t is undefined + t /= d; + return -c * (sqrt(1 - t * t) - 1) + b; } static real_t out(real_t t, real_t b, real_t c, real_t d) { - return c * sqrt(1 - (t = t / d - 1) * t) + b; // TODO: ehrich: operation with t is undefined + t = t / d - 1; + return c * sqrt(1 - t * t) + b; } static real_t in_out(real_t t, real_t b, real_t c, real_t d) { - if ((t /= d / 2) < 1) return -c / 2 * (sqrt(1 - t * t) - 1) + b; - return c / 2 * (sqrt(1 - t * (t -= 2)) + 1) + b; // TODO: ehrich: operation with t is undefined + t /= d / 2; + if (t < 1) { + return -c / 2 * (sqrt(1 - t * t) - 1) + b; + } + t -= 2; + return c / 2 * (sqrt(1 - t * t) + 1) + b; } static real_t out_in(real_t t, real_t b, real_t c, real_t d) { @@ -271,14 +280,16 @@ static real_t in(real_t t, real_t b, real_t c, real_t d) { static real_t out(real_t t, real_t b, real_t c, real_t d) { float s = 1.70158f; - return c * ((t = t / d - 1) * t * ((s + 1) * t + s) + 1) + b; // TODO: ehrich: operation with t is undefined + t = t / d - 1; + return c * (t * t * ((s + 1) * t + s) + 1) + b; } static real_t in_out(real_t t, real_t b, real_t c, real_t d) { - float s = 1.70158f; - if ((t /= d / 2) < 1) return c / 2 * (t * t * (((s *= (1.525f)) + 1) * t - s)) + b; // TODO: ehrich: operation with s is undefined - float postFix = t -= 2; - return c / 2 * ((postFix)*t * (((s *= (1.525f)) + 1) * t + s) + 2) + b; // TODO: ehrich: operation with s is undefined + float s = 1.70158f * 1.525f; + t /= d / 2; + if (t < 1) return c / 2 * (t * t * ((s + 1) * t - s)) + b; + t -= 2; + return c / 2 * (t * t * ((s + 1) * t + s) + 2) + b; } static real_t out_in(real_t t, real_t b, real_t c, real_t d) { diff --git a/thirdparty/misc/open-simplex-noise.c b/thirdparty/misc/open-simplex-noise.c index 88fbd3e51d..44a072cad1 100644 --- a/thirdparty/misc/open-simplex-noise.c +++ b/thirdparty/misc/open-simplex-noise.c @@ -100,27 +100,27 @@ static const signed char gradients4D[] = { -3, -1, -1, -1, -1, -3, -1, -1, -1, -1, -3, -1, -1, -1, -1, -3, }; -static double extrapolate2(struct osn_context *ctx, int xsb, int ysb, double dx, double dy) +static double extrapolate2(const struct osn_context *ctx, int xsb, int ysb, double dx, double dy) { - int16_t *perm = ctx->perm; + const int16_t *perm = ctx->perm; int index = perm[(perm[xsb & 0xFF] + ysb) & 0xFF] & 0x0E; return gradients2D[index] * dx + gradients2D[index + 1] * dy; } -static double extrapolate3(struct osn_context *ctx, int xsb, int ysb, int zsb, double dx, double dy, double dz) +static double extrapolate3(const struct osn_context *ctx, int xsb, int ysb, int zsb, double dx, double dy, double dz) { - int16_t *perm = ctx->perm; - int16_t *permGradIndex3D = ctx->permGradIndex3D; + const int16_t *perm = ctx->perm; + const int16_t *permGradIndex3D = ctx->permGradIndex3D; int index = permGradIndex3D[(perm[(perm[xsb & 0xFF] + ysb) & 0xFF] + zsb) & 0xFF]; return gradients3D[index] * dx + gradients3D[index + 1] * dy + gradients3D[index + 2] * dz; } -static double extrapolate4(struct osn_context *ctx, int xsb, int ysb, int zsb, int wsb, double dx, double dy, double dz, double dw) +static double extrapolate4(const struct osn_context *ctx, int xsb, int ysb, int zsb, int wsb, double dx, double dy, double dz, double dw) { - int16_t *perm = ctx->perm; + const int16_t *perm = ctx->perm; int index = perm[(perm[(perm[(perm[xsb & 0xFF] + ysb) & 0xFF] + zsb) & 0xFF] + wsb) & 0xFF] & 0xFC; return gradients4D[index] * dx + gradients4D[index + 1] * dy @@ -227,7 +227,7 @@ void open_simplex_noise_free(struct osn_context *ctx) // -- GODOT end -- /* 2D OpenSimplex (Simplectic) Noise. */ -double open_simplex_noise2(struct osn_context *ctx, double x, double y) +double open_simplex_noise2(const struct osn_context *ctx, double x, double y) { /* Place input coordinates onto grid. */ @@ -355,7 +355,7 @@ double open_simplex_noise2(struct osn_context *ctx, double x, double y) /* * 3D OpenSimplex (Simplectic) Noise */ -double open_simplex_noise3(struct osn_context *ctx, double x, double y, double z) +double open_simplex_noise3(const struct osn_context *ctx, double x, double y, double z) { /* Place input coordinates on simplectic honeycomb. */ @@ -928,7 +928,7 @@ double open_simplex_noise3(struct osn_context *ctx, double x, double y, double z /* * 4D OpenSimplex (Simplectic) Noise. */ -double open_simplex_noise4(struct osn_context *ctx, double x, double y, double z, double w) +double open_simplex_noise4(const struct osn_context *ctx, double x, double y, double z, double w) { double uins; double dx1, dy1, dz1, dw1; diff --git a/thirdparty/misc/open-simplex-noise.h b/thirdparty/misc/open-simplex-noise.h index 89e0df8218..fd9248c3a1 100644 --- a/thirdparty/misc/open-simplex-noise.h +++ b/thirdparty/misc/open-simplex-noise.h @@ -47,9 +47,9 @@ int open_simplex_noise(int64_t seed, struct osn_context *ctx); //int open_simplex_noise_init_perm(struct osn_context *ctx, int16_t p[], int nelements); // -- GODOT end -- void open_simplex_noise_free(struct osn_context *ctx); -double open_simplex_noise2(struct osn_context *ctx, double x, double y); -double open_simplex_noise3(struct osn_context *ctx, double x, double y, double z); -double open_simplex_noise4(struct osn_context *ctx, double x, double y, double z, double w); +double open_simplex_noise2(const struct osn_context *ctx, double x, double y); +double open_simplex_noise3(const struct osn_context *ctx, double x, double y, double z); +double open_simplex_noise4(const struct osn_context *ctx, double x, double y, double z, double w); #ifdef __cplusplus } diff --git a/thirdparty/xatlas/xatlas.cpp b/thirdparty/xatlas/xatlas.cpp index 43aec33a9f..9f66ae0067 100644 --- a/thirdparty/xatlas/xatlas.cpp +++ b/thirdparty/xatlas/xatlas.cpp @@ -33,19 +33,25 @@ https://github.com/brandonpelfrey/Fast-BVH MIT License Copyright (c) 2012 Brandon Pelfrey */ -#include <atomic> -#include <condition_variable> -#include <mutex> -#include <thread> +#include "xatlas.h" +#ifndef XATLAS_C_API +#define XATLAS_C_API 0 +#endif +#if XATLAS_C_API +#include "xatlas_c.h" +#endif #include <assert.h> #include <float.h> // FLT_MAX #include <limits.h> #include <math.h> +#include <atomic> +#include <condition_variable> +#include <mutex> +#include <thread> #define __STDC_LIMIT_MACROS #include <stdint.h> #include <stdio.h> #include <string.h> -#include "xatlas.h" #ifndef XA_DEBUG #ifdef NDEBUG @@ -59,7 +65,7 @@ Copyright (c) 2012 Brandon Pelfrey #define XA_PROFILE 0 #endif #if XA_PROFILE -#include <time.h> +#include <chrono> #endif #ifndef XA_MULTITHREADED @@ -70,7 +76,10 @@ Copyright (c) 2012 Brandon Pelfrey #define XA_XSTR(x) XA_STR(x) #ifndef XA_ASSERT -#define XA_ASSERT(exp) if (!(exp)) { XA_PRINT_WARNING("\rASSERT: %s %s %d\n", XA_XSTR(exp), __FILE__, __LINE__); } +#define XA_ASSERT(exp) \ + if (!(exp)) { \ + XA_PRINT_WARNING("\rASSERT: %s %s %d\n", XA_XSTR(exp), __FILE__, __LINE__); \ + } #endif #ifndef XA_DEBUG_ASSERT @@ -78,13 +87,13 @@ Copyright (c) 2012 Brandon Pelfrey #endif #ifndef XA_PRINT -#define XA_PRINT(...) \ +#define XA_PRINT(...) \ if (xatlas::internal::s_print && xatlas::internal::s_printVerbose) \ xatlas::internal::s_print(__VA_ARGS__); #endif #ifndef XA_PRINT_WARNING -#define XA_PRINT_WARNING(...) \ +#define XA_PRINT_WARNING(...) \ if (xatlas::internal::s_print) \ xatlas::internal::s_print(__VA_ARGS__); #endif @@ -116,9 +125,9 @@ Copyright (c) 2012 Brandon Pelfrey #define XA_MERGE_CHARTS 1 #define XA_MERGE_CHARTS_MIN_NORMAL_DEVIATION 0.5f #define XA_RECOMPUTE_CHARTS 1 -#define XA_CLOSE_HOLES_CHECK_EDGE_INTERSECTION 0 -#define XA_FIX_INTERNAL_BOUNDARY_LOOPS 1 -#define XA_PRINT_CHART_WARNINGS 0 +#define XA_CHECK_PARAM_WINDING 0 +#define XA_CHECK_PIECEWISE_CHART_QUALITY 0 +#define XA_CHECK_T_JUNCTIONS 0 #define XA_DEBUG_HEAP 0 #define XA_DEBUG_SINGLE_CHART 0 @@ -131,25 +140,19 @@ Copyright (c) 2012 Brandon Pelfrey #define XA_DEBUG_EXPORT_OBJ_CHART_GROUPS 0 #define XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS 0 #define XA_DEBUG_EXPORT_OBJ_CHARTS 0 -#define XA_DEBUG_EXPORT_OBJ_BEFORE_FIX_TJUNCTION 0 -#define XA_DEBUG_EXPORT_OBJ_CLOSE_HOLES_ERROR 0 +#define XA_DEBUG_EXPORT_OBJ_TJUNCTION 0 // XA_CHECK_T_JUNCTIONS must also be set #define XA_DEBUG_EXPORT_OBJ_CHARTS_AFTER_PARAMETERIZATION 0 #define XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION 0 #define XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS 0 -#define XA_DEBUG_EXPORT_OBJ (0 \ - || XA_DEBUG_EXPORT_OBJ_FACE_GROUPS \ - || XA_DEBUG_EXPORT_OBJ_CHART_GROUPS \ - || XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS \ - || XA_DEBUG_EXPORT_OBJ_CHARTS \ - || XA_DEBUG_EXPORT_OBJ_BEFORE_FIX_TJUNCTION \ - || XA_DEBUG_EXPORT_OBJ_CLOSE_HOLES_ERROR \ - || XA_DEBUG_EXPORT_OBJ_CHARTS_AFTER_PARAMETERIZATION \ - || XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION \ - || XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS) +#define XA_DEBUG_EXPORT_OBJ (0 || XA_DEBUG_EXPORT_OBJ_FACE_GROUPS || XA_DEBUG_EXPORT_OBJ_CHART_GROUPS || XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS || XA_DEBUG_EXPORT_OBJ_CHARTS || XA_DEBUG_EXPORT_OBJ_TJUNCTION || XA_DEBUG_EXPORT_OBJ_CHARTS_AFTER_PARAMETERIZATION || XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION || XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS) #ifdef _MSC_VER -#define XA_FOPEN(_file, _filename, _mode) { if (fopen_s(&_file, _filename, _mode) != 0) _file = NULL; } +#define XA_FOPEN(_file, _filename, _mode) \ + { \ + if (fopen_s(&_file, _filename, _mode) != 0) \ + _file = NULL; \ + } #define XA_SPRINTF(_buffer, _size, _format, ...) sprintf_s(_buffer, _size, _format, __VA_ARGS__) #else #define XA_FOPEN(_file, _filename, _mode) _file = fopen(_filename, _mode) @@ -165,74 +168,76 @@ static PrintFunc s_print = printf; static bool s_printVerbose = false; #if XA_PROFILE -#define XA_PROFILE_START(var) const clock_t var##Start = clock(); -#define XA_PROFILE_END(var) internal::s_profile.var += clock() - var##Start; -#define XA_PROFILE_PRINT_AND_RESET(label, var) XA_PRINT("%s%.2f seconds (%g ms)\n", label, internal::clockToSeconds(internal::s_profile.var), internal::clockToMs(internal::s_profile.var)); internal::s_profile.var = 0; +typedef uint64_t Duration; + +#define XA_PROFILE_START(var) const std::chrono::time_point<std::chrono::high_resolution_clock> var##Start = std::chrono::high_resolution_clock::now(); +#define XA_PROFILE_END(var) internal::s_profile.var += uint64_t(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - var##Start).count()); +#define XA_PROFILE_PRINT_AND_RESET(label, var) \ + XA_PRINT("%s%.2f seconds (%g ms)\n", label, internal::durationToSeconds(internal::s_profile.var), internal::durationToMs(internal::s_profile.var)); \ + internal::s_profile.var = 0u; #define XA_PROFILE_ALLOC 0 -struct ProfileData -{ +struct ProfileData { #if XA_PROFILE_ALLOC - std::atomic<clock_t> alloc; + std::atomic<Duration> alloc; #endif - clock_t addMeshReal; - clock_t addMeshCopyData; - std::atomic<clock_t> addMeshThread; - std::atomic<clock_t> addMeshCreateColocals; - clock_t computeChartsReal; - std::atomic<clock_t> computeChartsThread; - std::atomic<clock_t> createFaceGroups; - std::atomic<clock_t> extractInvalidMeshGeometry; - std::atomic<clock_t> chartGroupComputeChartsReal; - std::atomic<clock_t> chartGroupComputeChartsThread; - std::atomic<clock_t> createChartGroupMesh; - std::atomic<clock_t> createChartGroupMeshColocals; - std::atomic<clock_t> createChartGroupMeshBoundaries; - std::atomic<clock_t> buildAtlas; - std::atomic<clock_t> buildAtlasInit; - std::atomic<clock_t> planarCharts; - std::atomic<clock_t> clusteredCharts; - std::atomic<clock_t> clusteredChartsPlaceSeeds; - std::atomic<clock_t> clusteredChartsPlaceSeedsBoundaryIntersection; - std::atomic<clock_t> clusteredChartsRelocateSeeds; - std::atomic<clock_t> clusteredChartsReset; - std::atomic<clock_t> clusteredChartsGrow; - std::atomic<clock_t> clusteredChartsGrowBoundaryIntersection; - std::atomic<clock_t> clusteredChartsMerge; - std::atomic<clock_t> clusteredChartsFillHoles; - std::atomic<clock_t> copyChartFaces; - clock_t parameterizeChartsReal; - std::atomic<clock_t> parameterizeChartsThread; - std::atomic<clock_t> createChartMesh; - std::atomic<clock_t> fixChartMeshTJunctions; - std::atomic<clock_t> closeChartMeshHoles; - std::atomic<clock_t> parameterizeChartsOrthogonal; - std::atomic<clock_t> parameterizeChartsLSCM; - std::atomic<clock_t> parameterizeChartsRecompute; - std::atomic<clock_t> parameterizeChartsPiecewise; - std::atomic<clock_t> parameterizeChartsPiecewiseBoundaryIntersection; - std::atomic<clock_t> parameterizeChartsEvaluateQuality; - clock_t packCharts; - clock_t packChartsAddCharts; - std::atomic<clock_t> packChartsAddChartsThread; - std::atomic<clock_t> packChartsAddChartsRestoreTexcoords; - clock_t packChartsRasterize; - clock_t packChartsDilate; - clock_t packChartsFindLocation; - clock_t packChartsBlit; - clock_t buildOutputMeshes; + std::chrono::time_point<std::chrono::high_resolution_clock> addMeshRealStart; + Duration addMeshReal; + Duration addMeshCopyData; + std::atomic<Duration> addMeshThread; + std::atomic<Duration> addMeshCreateColocals; + Duration computeChartsReal; + std::atomic<Duration> computeChartsThread; + std::atomic<Duration> createFaceGroups; + std::atomic<Duration> extractInvalidMeshGeometry; + std::atomic<Duration> chartGroupComputeChartsReal; + std::atomic<Duration> chartGroupComputeChartsThread; + std::atomic<Duration> createChartGroupMesh; + std::atomic<Duration> createChartGroupMeshColocals; + std::atomic<Duration> createChartGroupMeshBoundaries; + std::atomic<Duration> buildAtlas; + std::atomic<Duration> buildAtlasInit; + std::atomic<Duration> planarCharts; + std::atomic<Duration> originalUvCharts; + std::atomic<Duration> clusteredCharts; + std::atomic<Duration> clusteredChartsPlaceSeeds; + std::atomic<Duration> clusteredChartsPlaceSeedsBoundaryIntersection; + std::atomic<Duration> clusteredChartsRelocateSeeds; + std::atomic<Duration> clusteredChartsReset; + std::atomic<Duration> clusteredChartsGrow; + std::atomic<Duration> clusteredChartsGrowBoundaryIntersection; + std::atomic<Duration> clusteredChartsMerge; + std::atomic<Duration> clusteredChartsFillHoles; + std::atomic<Duration> copyChartFaces; + std::atomic<Duration> createChartMeshAndParameterizeReal; + std::atomic<Duration> createChartMeshAndParameterizeThread; + std::atomic<Duration> createChartMesh; + std::atomic<Duration> parameterizeCharts; + std::atomic<Duration> parameterizeChartsOrthogonal; + std::atomic<Duration> parameterizeChartsLSCM; + std::atomic<Duration> parameterizeChartsRecompute; + std::atomic<Duration> parameterizeChartsPiecewise; + std::atomic<Duration> parameterizeChartsPiecewiseBoundaryIntersection; + std::atomic<Duration> parameterizeChartsEvaluateQuality; + Duration packCharts; + Duration packChartsAddCharts; + std::atomic<Duration> packChartsAddChartsThread; + std::atomic<Duration> packChartsAddChartsRestoreTexcoords; + Duration packChartsRasterize; + Duration packChartsDilate; + Duration packChartsFindLocation; + Duration packChartsBlit; + Duration buildOutputMeshes; }; static ProfileData s_profile; -static double clockToMs(clock_t c) -{ - return c * 1000.0 / CLOCKS_PER_SEC; +static double durationToMs(Duration c) { + return (double)c * 0.001; } -static double clockToSeconds(clock_t c) -{ - return c / (double)CLOCKS_PER_SEC; +static double durationToSeconds(Duration c) { + return (double)c * 0.000001; } #else #define XA_PROFILE_START(var) @@ -241,10 +246,8 @@ static double clockToSeconds(clock_t c) #define XA_PROFILE_ALLOC 0 #endif -struct MemTag -{ - enum - { +struct MemTag { + enum { Default, BitImage, BVH, @@ -267,8 +270,7 @@ struct MemTag }; #if XA_DEBUG_HEAP -struct AllocHeader -{ +struct AllocHeader { size_t size; const char *file; int line; @@ -281,11 +283,10 @@ struct AllocHeader static std::mutex s_allocMutex; static AllocHeader *s_allocRoot = nullptr; static size_t s_allocTotalCount = 0, s_allocTotalSize = 0, s_allocPeakSize = 0, s_allocCount[MemTag::Count] = { 0 }, s_allocTotalTagSize[MemTag::Count] = { 0 }, s_allocPeakTagSize[MemTag::Count] = { 0 }; -static uint32_t s_allocId =0 ; +static uint32_t s_allocId = 0; static constexpr uint32_t kAllocRedzone = 0x12345678; -static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line) -{ +static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line) { std::unique_lock<std::mutex> lock(s_allocMutex); if (!size && !ptr) return nullptr; @@ -346,8 +347,7 @@ static void *Realloc(void *ptr, size_t size, int tag, const char *file, int line return newPtr + sizeof(AllocHeader); } -static void ReportLeaks() -{ +static void ReportLeaks() { printf("Checking for memory leaks...\n"); bool anyLeaks = false; AllocHeader *header = s_allocRoot; @@ -375,8 +375,7 @@ static void ReportLeaks() s_allocTotalTagSize[i] = s_allocPeakTagSize[i] = 0; } -static void PrintMemoryUsage() -{ +static void PrintMemoryUsage() { XA_PRINT("Total allocations: %zu\n", s_allocTotalCount); XA_PRINT("Memory usage: %0.2fMB current, %0.2fMB peak\n", internal::s_allocTotalSize / 1024.0f / 1024.0f, internal::s_allocPeakSize / 1024.0f / 1024.0f); static const char *labels[] = { // Sync with MemTag @@ -405,8 +404,7 @@ static void PrintMemoryUsage() #define XA_PRINT_MEM_USAGE internal::PrintMemoryUsage(); #else -static void *Realloc(void *ptr, size_t size, int /*tag*/, const char * /*file*/, int /*line*/) -{ +static void *Realloc(void *ptr, size_t size, int /*tag*/, const char * /*file*/, int /*line*/) { if (size == 0 && !ptr) return nullptr; if (size == 0 && s_free) { @@ -432,89 +430,75 @@ static constexpr float kEpsilon = 0.0001f; static constexpr float kAreaEpsilon = FLT_EPSILON; static constexpr float kNormalEpsilon = 0.001f; -static int align(int x, int a) -{ +static int align(int x, int a) { return (x + a - 1) & ~(a - 1); } template <typename T> -static T max(const T &a, const T &b) -{ +static T max(const T &a, const T &b) { return a > b ? a : b; } template <typename T> -static T min(const T &a, const T &b) -{ +static T min(const T &a, const T &b) { return a < b ? a : b; } template <typename T> -static T max3(const T &a, const T &b, const T &c) -{ +static T max3(const T &a, const T &b, const T &c) { return max(a, max(b, c)); } /// Return the maximum of the three arguments. template <typename T> -static T min3(const T &a, const T &b, const T &c) -{ +static T min3(const T &a, const T &b, const T &c) { return min(a, min(b, c)); } /// Clamp between two values. template <typename T> -static T clamp(const T &x, const T &a, const T &b) -{ +static T clamp(const T &x, const T &a, const T &b) { return min(max(x, a), b); } template <typename T> -static void swap(T &a, T &b) -{ +static void swap(T &a, T &b) { T temp = a; a = b; b = temp; } -union FloatUint32 -{ +union FloatUint32 { float f; uint32_t u; }; -static bool isFinite(float f) -{ +static bool isFinite(float f) { FloatUint32 fu; fu.f = f; return fu.u != 0x7F800000u && fu.u != 0x7F800001u; } -static bool isNan(float f) -{ +static bool isNan(float f) { return f != f; } // Robust floating point comparisons: // http://realtimecollisiondetection.net/blog/?p=89 -static bool equal(const float f0, const float f1, const float epsilon) -{ +static bool equal(const float f0, const float f1, const float epsilon) { //return fabs(f0-f1) <= epsilon; return fabs(f0 - f1) <= epsilon * max3(1.0f, fabsf(f0), fabsf(f1)); } -static int ftoi_ceil(float val) -{ +static int ftoi_ceil(float val) { return (int)ceilf(val); } -static bool isZero(const float f, const float epsilon) -{ +static bool isZero(const float f, const float epsilon) { return fabs(f) <= epsilon; } -static float square(float f) -{ +static float square(float f) { return f * f; } @@ -524,9 +508,8 @@ static float square(float f) * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x * @note nextPowerOfTwo(x) = 2 << log2(x-1) */ -static uint32_t nextPowerOfTwo(uint32_t x) -{ - XA_DEBUG_ASSERT( x != 0 ); +static uint32_t nextPowerOfTwo(uint32_t x) { + XA_DEBUG_ASSERT(x != 0); // On modern CPUs this is supposed to be as fast as using the bsr instruction. x--; x |= x >> 1; @@ -537,38 +520,34 @@ static uint32_t nextPowerOfTwo(uint32_t x) return x + 1; } -class Vector2 -{ +class Vector2 { public: Vector2() {} - explicit Vector2(float f) : x(f), y(f) {} - Vector2(float x, float y): x(x), y(y) {} + explicit Vector2(float f) : + x(f), y(f) {} + Vector2(float _x, float _y) : + x(_x), y(_y) {} - Vector2 operator-() const - { + Vector2 operator-() const { return Vector2(-x, -y); } - void operator+=(const Vector2 &v) - { + void operator+=(const Vector2 &v) { x += v.x; y += v.y; } - void operator-=(const Vector2 &v) - { + void operator-=(const Vector2 &v) { x -= v.x; y -= v.y; } - void operator*=(float s) - { + void operator*=(float s) { x *= s; y *= s; } - void operator*=(const Vector2 &v) - { + void operator*=(const Vector2 &v) { x *= v.x; y *= v.y; } @@ -576,13 +555,11 @@ public: float x, y; }; -static bool operator==(const Vector2 &a, const Vector2 &b) -{ +static bool operator==(const Vector2 &a, const Vector2 &b) { return a.x == b.x && a.y == b.y; } -static bool operator!=(const Vector2 &a, const Vector2 &b) -{ +static bool operator!=(const Vector2 &a, const Vector2 &b) { return a.x != b.x || a.y != b.y; } @@ -591,78 +568,64 @@ static bool operator!=(const Vector2 &a, const Vector2 &b) return Vector2(a.x + b.x, a.y + b.y); }*/ -static Vector2 operator-(const Vector2 &a, const Vector2 &b) -{ +static Vector2 operator-(const Vector2 &a, const Vector2 &b) { return Vector2(a.x - b.x, a.y - b.y); } -static Vector2 operator*(const Vector2 &v, float s) -{ +static Vector2 operator*(const Vector2 &v, float s) { return Vector2(v.x * s, v.y * s); } -static float dot(const Vector2 &a, const Vector2 &b) -{ +static float dot(const Vector2 &a, const Vector2 &b) { return a.x * b.x + a.y * b.y; } -static float lengthSquared(const Vector2 &v) -{ +static float lengthSquared(const Vector2 &v) { return v.x * v.x + v.y * v.y; } -static float length(const Vector2 &v) -{ +static float length(const Vector2 &v) { return sqrtf(lengthSquared(v)); } #if XA_DEBUG -static bool isNormalized(const Vector2 &v, float epsilon = kNormalEpsilon) -{ +static bool isNormalized(const Vector2 &v, float epsilon = kNormalEpsilon) { return equal(length(v), 1, epsilon); } #endif -static Vector2 normalize(const Vector2 &v, float epsilon) -{ - float l = length(v); - XA_DEBUG_ASSERT(!isZero(l, epsilon)); - XA_UNUSED(epsilon); - Vector2 n = v * (1.0f / l); +static Vector2 normalize(const Vector2 &v) { + const float l = length(v); + XA_DEBUG_ASSERT(l > 0.0f); // Never negative. + const Vector2 n = v * (1.0f / l); XA_DEBUG_ASSERT(isNormalized(n)); return n; } -static Vector2 normalizeSafe(const Vector2 &v, const Vector2 &fallback, float epsilon) -{ - float l = length(v); - if (isZero(l, epsilon)) - return fallback; - return v * (1.0f / l); +static Vector2 normalizeSafe(const Vector2 &v, const Vector2 &fallback) { + const float l = length(v); + if (l > 0.0f) // Never negative. + return v * (1.0f / l); + return fallback; } -static bool equal(const Vector2 &v1, const Vector2 &v2, float epsilon) -{ +static bool equal(const Vector2 &v1, const Vector2 &v2, float epsilon) { return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon); } -static Vector2 min(const Vector2 &a, const Vector2 &b) -{ +static Vector2 min(const Vector2 &a, const Vector2 &b) { return Vector2(min(a.x, b.x), min(a.y, b.y)); } -static Vector2 max(const Vector2 &a, const Vector2 &b) -{ +static Vector2 max(const Vector2 &a, const Vector2 &b) { return Vector2(max(a.x, b.x), max(a.y, b.y)); } -static bool isFinite(const Vector2 &v) -{ +static bool isFinite(const Vector2 &v) { return isFinite(v.x) && isFinite(v.y); } -static float triangleArea(const Vector2 &a, const Vector2 &b, const Vector2 &c) -{ +static float triangleArea(const Vector2 &a, const Vector2 &b, const Vector2 &c) { // IC: While it may be appealing to use the following expression: //return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y) * 0.5f; // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point @@ -676,8 +639,7 @@ static float triangleArea(const Vector2 &a, const Vector2 &b, const Vector2 &c) return (v0.x * v1.y - v0.y * v1.x) * 0.5f; } -static bool linesIntersect(const Vector2 &a1, const Vector2 &a2, const Vector2 &b1, const Vector2 &b2, float epsilon) -{ +static bool linesIntersect(const Vector2 &a1, const Vector2 &a2, const Vector2 &b1, const Vector2 &b2, float epsilon) { const Vector2 v0 = a2 - a1; const Vector2 v1 = b2 - b1; const float denom = -v1.x * v0.y + v0.x * v1.y; @@ -685,76 +647,70 @@ static bool linesIntersect(const Vector2 &a1, const Vector2 &a2, const Vector2 & return false; const float s = (-v0.y * (a1.x - b1.x) + v0.x * (a1.y - b1.y)) / denom; if (s > epsilon && s < 1.0f - epsilon) { - const float t = ( v1.x * (a1.y - b1.y) - v1.y * (a1.x - b1.x)) / denom; + const float t = (v1.x * (a1.y - b1.y) - v1.y * (a1.x - b1.x)) / denom; return t > epsilon && t < 1.0f - epsilon; } return false; } -struct Vector2i -{ +struct Vector2i { Vector2i() {} - Vector2i(int32_t x, int32_t y) : x(x), y(y) {} + Vector2i(int32_t _x, int32_t _y) : + x(_x), y(_y) {} int32_t x, y; }; -class Vector3 -{ +class Vector3 { public: Vector3() {} - explicit Vector3(float f) : x(f), y(f), z(f) {} - Vector3(float x, float y, float z) : x(x), y(y), z(z) {} - Vector3(const Vector2 &v, float z) : x(v.x), y(v.y), z(z) {} - - Vector2 xy() const - { + explicit Vector3(float f) : + x(f), y(f), z(f) {} + Vector3(float _x, float _y, float _z) : + x(_x), y(_y), z(_z) {} + Vector3(const Vector2 &v, float _z) : + x(v.x), y(v.y), z(_z) {} + + Vector2 xy() const { return Vector2(x, y); } - Vector3 operator-() const - { + Vector3 operator-() const { return Vector3(-x, -y, -z); } - void operator+=(const Vector3 &v) - { + void operator+=(const Vector3 &v) { x += v.x; y += v.y; z += v.z; } - void operator-=(const Vector3 &v) - { + void operator-=(const Vector3 &v) { x -= v.x; y -= v.y; z -= v.z; } - void operator*=(float s) - { + void operator*=(float s) { x *= s; y *= s; z *= s; } - void operator/=(float s) - { + void operator/=(float s) { float is = 1.0f / s; x *= is; y *= is; z *= is; } - void operator*=(const Vector3 &v) - { + void operator*=(const Vector3 &v) { x *= v.x; y *= v.y; z *= v.z; } - void operator/=(const Vector3 &v) - { + void operator/=(const Vector3 &v) { x /= v.x; y /= v.y; z /= v.z; @@ -763,260 +719,151 @@ public: float x, y, z; }; -static Vector3 operator+(const Vector3 &a, const Vector3 &b) -{ +static Vector3 operator+(const Vector3 &a, const Vector3 &b) { return Vector3(a.x + b.x, a.y + b.y, a.z + b.z); } -static Vector3 operator-(const Vector3 &a, const Vector3 &b) -{ +static Vector3 operator-(const Vector3 &a, const Vector3 &b) { return Vector3(a.x - b.x, a.y - b.y, a.z - b.z); } -static Vector3 cross(const Vector3 &a, const Vector3 &b) -{ +static bool operator==(const Vector3 &a, const Vector3 &b) { + return a.x == b.x && a.y == b.y && a.z == b.z; +} + +static Vector3 cross(const Vector3 &a, const Vector3 &b) { return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); } -static Vector3 operator*(const Vector3 &v, float s) -{ +static Vector3 operator*(const Vector3 &v, float s) { return Vector3(v.x * s, v.y * s, v.z * s); } -static Vector3 operator/(const Vector3 &v, float s) -{ +static Vector3 operator/(const Vector3 &v, float s) { return v * (1.0f / s); } -static float dot(const Vector3 &a, const Vector3 &b) -{ +static float dot(const Vector3 &a, const Vector3 &b) { return a.x * b.x + a.y * b.y + a.z * b.z; } -static float lengthSquared(const Vector3 &v) -{ +static float lengthSquared(const Vector3 &v) { return v.x * v.x + v.y * v.y + v.z * v.z; } -static float length(const Vector3 &v) -{ +static float length(const Vector3 &v) { return sqrtf(lengthSquared(v)); } -static bool isNormalized(const Vector3 &v, float epsilon = kNormalEpsilon) -{ - return equal(length(v), 1, epsilon); +static bool isNormalized(const Vector3 &v, float epsilon = kNormalEpsilon) { + return equal(length(v), 1.0f, epsilon); } -static Vector3 normalize(const Vector3 &v, float epsilon) -{ - float l = length(v); - XA_DEBUG_ASSERT(!isZero(l, epsilon)); - XA_UNUSED(epsilon); - Vector3 n = v * (1.0f / l); +static Vector3 normalize(const Vector3 &v) { + const float l = length(v); + XA_DEBUG_ASSERT(l > 0.0f); // Never negative. + const Vector3 n = v * (1.0f / l); XA_DEBUG_ASSERT(isNormalized(n)); return n; } -static Vector3 normalizeSafe(const Vector3 &v, const Vector3 &fallback, float epsilon) -{ - float l = length(v); - if (isZero(l, epsilon)) { - return fallback; - } - return v * (1.0f / l); +static Vector3 normalizeSafe(const Vector3 &v, const Vector3 &fallback) { + const float l = length(v); + if (l > 0.0f) // Never negative. + return v * (1.0f / l); + return fallback; } -static bool equal(const Vector3 &v0, const Vector3 &v1, float epsilon) -{ +static bool equal(const Vector3 &v0, const Vector3 &v1, float epsilon) { return fabs(v0.x - v1.x) <= epsilon && fabs(v0.y - v1.y) <= epsilon && fabs(v0.z - v1.z) <= epsilon; } -static Vector3 min(const Vector3 &a, const Vector3 &b) -{ +static Vector3 min(const Vector3 &a, const Vector3 &b) { return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); } -static Vector3 max(const Vector3 &a, const Vector3 &b) -{ +static Vector3 max(const Vector3 &a, const Vector3 &b) { return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); } #if XA_DEBUG -bool isFinite(const Vector3 &v) -{ +bool isFinite(const Vector3 &v) { return isFinite(v.x) && isFinite(v.y) && isFinite(v.z); } #endif -struct Extents2 -{ +struct Extents2 { Vector2 min, max; Extents2() {} - - Extents2(Vector2 p1, Vector2 p2) - { + + Extents2(Vector2 p1, Vector2 p2) { min = xatlas::internal::min(p1, p2); max = xatlas::internal::max(p1, p2); } - void reset() - { + void reset() { min.x = min.y = FLT_MAX; max.x = max.y = -FLT_MAX; } - void add(Vector2 p) - { + void add(Vector2 p) { min = xatlas::internal::min(min, p); max = xatlas::internal::max(max, p); } - Vector2 midpoint() const - { + Vector2 midpoint() const { return Vector2(min.x + (max.x - min.x) * 0.5f, min.y + (max.y - min.y) * 0.5f); } - static bool intersect(const Extents2 &e1, const Extents2 &e2) - { + static bool intersect(const Extents2 &e1, const Extents2 &e2) { return e1.min.x <= e2.max.x && e1.max.x >= e2.min.x && e1.min.y <= e2.max.y && e1.max.y >= e2.min.y; } }; -struct Plane -{ - Plane() = default; - - Plane(const Vector3 &p1, const Vector3 &p2, const Vector3 &p3) - { - normal = cross(p2 - p1, p3 - p1); - dist = dot(normal, p1); - } - - float distance(const Vector3 &p) const - { - return dot(normal, p) - dist; - } - - void normalize() - { - const float len = length(normal); - if (len > 0.0f) { - const float il = 1.0f / len; - normal *= il; - dist *= il; - } - } - - Vector3 normal; - float dist; -}; - -static bool lineIntersectsPoint(const Vector3 &point, const Vector3 &lineStart, const Vector3 &lineEnd, float *t, float epsilon) -{ - float tt; - if (!t) - t = &tt; - *t = 0.0f; - if (equal(lineStart, point, epsilon) || equal(lineEnd, point, epsilon)) - return false; // Vertex lies on either line vertices. - const Vector3 v01 = point - lineStart; - const Vector3 v21 = lineEnd - lineStart; - const float l = length(v21); - const float d = length(cross(v01, v21)) / l; - if (!isZero(d, epsilon)) - return false; - *t = dot(v01, v21) / (l * l); - return *t > kEpsilon && *t < 1.0f - kEpsilon; -} - -static bool sameSide(const Vector3 &p1, const Vector3 &p2, const Vector3 &a, const Vector3 &b) -{ - const Vector3 &ab = b - a; - return dot(cross(ab, p1 - a), cross(ab, p2 - a)) >= 0.0f; -} - -// http://blackpawn.com/texts/pointinpoly/default.html -static bool pointInTriangle(const Vector3 &p, const Vector3 &a, const Vector3 &b, const Vector3 &c) -{ - return sameSide(p, a, b, c) && sameSide(p, b, a, c) && sameSide(p, c, a, b); -} - -#if XA_CLOSE_HOLES_CHECK_EDGE_INTERSECTION -// https://en.wikipedia.org/wiki/M%C3%B6ller%E2%80%93Trumbore_intersection_algorithm -static bool rayIntersectsTriangle(const Vector3 &rayOrigin, const Vector3 &rayDir, const Vector3 *tri, float *t) -{ - *t = 0.0f; - const Vector3 &edge1 = tri[1] - tri[0]; - const Vector3 &edge2 = tri[2] - tri[0]; - const Vector3 h = cross(rayDir, edge2); - const float a = dot(edge1, h); - if (a > -kEpsilon && a < kEpsilon) - return false; // This ray is parallel to this triangle. - const float f = 1.0f / a; - const Vector3 s = rayOrigin - tri[0]; - const float u = f * dot(s, h); - if (u < 0.0f || u > 1.0f) - return false; - const Vector3 q = cross(s, edge1); - const float v = f * dot(rayDir, q); - if (v < 0.0f || u + v > 1.0f) - return false; - // At this stage we can compute t to find out where the intersection point is on the line. - *t = f * dot(edge2, q); - if (*t > kEpsilon && *t < 1.0f - kEpsilon) - return true; - // This means that there is a line intersection but not a ray intersection. - return false; -} -#endif - // From Fast-BVH -struct AABB -{ - AABB() : min(FLT_MAX, FLT_MAX, FLT_MAX), max(-FLT_MAX, -FLT_MAX, -FLT_MAX) {} - AABB(const Vector3 &min, const Vector3 &max) : min(min), max(max) { } - AABB(const Vector3 &p, float radius = 0.0f) : min(p), max(p) { if (radius > 0.0f) expand(radius); } - - bool intersect(const AABB &other) const - { +struct AABB { + AABB() : + min(FLT_MAX, FLT_MAX, FLT_MAX), max(-FLT_MAX, -FLT_MAX, -FLT_MAX) {} + AABB(const Vector3 &_min, const Vector3 &_max) : + min(_min), max(_max) {} + AABB(const Vector3 &p, float radius = 0.0f) : + min(p), max(p) { + if (radius > 0.0f) + expand(radius); + } + + bool intersect(const AABB &other) const { return min.x <= other.max.x && max.x >= other.min.x && min.y <= other.max.y && max.y >= other.min.y && min.z <= other.max.z && max.z >= other.min.z; } - void expandToInclude(const Vector3 &p) - { + void expandToInclude(const Vector3 &p) { min = internal::min(min, p); max = internal::max(max, p); } - void expandToInclude(const AABB &aabb) - { + void expandToInclude(const AABB &aabb) { min = internal::min(min, aabb.min); max = internal::max(max, aabb.max); } - void expand(float amount) - { + void expand(float amount) { min -= Vector3(amount); max += Vector3(amount); } - Vector3 centroid() const - { + Vector3 centroid() const { return min + (max - min) * 0.5f; } - uint32_t maxDimension() const - { + uint32_t maxDimension() const { const Vector3 extent = max - min; uint32_t result = 0; if (extent.y > extent.x) { result = 1; if (extent.z > extent.y) result = 2; - } - else if(extent.z > extent.x) + } else if (extent.z > extent.x) result = 2; return result; } @@ -1024,10 +871,9 @@ struct AABB Vector3 min, max; }; -struct ArrayBase -{ - ArrayBase(uint32_t elementSize, int memTag = MemTag::Default) : buffer(nullptr), elementSize(elementSize), size(0), capacity(0) - { +struct ArrayBase { + ArrayBase(uint32_t _elementSize, int memTag = MemTag::Default) : + buffer(nullptr), elementSize(_elementSize), size(0), capacity(0) { #if XA_DEBUG_HEAP this->memTag = memTag; #else @@ -1035,31 +881,31 @@ struct ArrayBase #endif } - ~ArrayBase() - { + ~ArrayBase() { XA_FREE(buffer); } - XA_INLINE void clear() - { + XA_INLINE void clear() { size = 0; } - void copyFrom(const uint8_t *data, uint32_t length) - { + void copyFrom(const uint8_t *data, uint32_t length) { + XA_DEBUG_ASSERT(data); + XA_DEBUG_ASSERT(length > 0); resize(length, true); - memcpy(buffer, data, length * elementSize); + if (buffer && data && length > 0) + memcpy(buffer, data, length * elementSize); } - void copyTo(ArrayBase &other) const - { + void copyTo(ArrayBase &other) const { XA_DEBUG_ASSERT(elementSize == other.elementSize); + XA_DEBUG_ASSERT(size > 0); other.resize(size, true); - memcpy(other.buffer, buffer, size * elementSize); + if (other.buffer && buffer && size > 0) + memcpy(other.buffer, buffer, size * elementSize); } - void destroy() - { + void destroy() { size = 0; XA_FREE(buffer); buffer = nullptr; @@ -1068,17 +914,18 @@ struct ArrayBase } // Insert the given element at the given index shifting all the elements up. - void insertAt(uint32_t index, const uint8_t *value) - { + void insertAt(uint32_t index, const uint8_t *value) { XA_DEBUG_ASSERT(index >= 0 && index <= size); + XA_DEBUG_ASSERT(value); resize(size + 1, false); - if (index < size - 1) + XA_DEBUG_ASSERT(buffer); + if (buffer && index < size - 1) memmove(buffer + elementSize * (index + 1), buffer + elementSize * index, elementSize * (size - 1 - index)); - memcpy(&buffer[index * elementSize], value, elementSize); + if (buffer && value) + memcpy(&buffer[index * elementSize], value, elementSize); } - void moveTo(ArrayBase &other) - { + void moveTo(ArrayBase &other) { XA_DEBUG_ASSERT(elementSize == other.elementSize); other.destroy(); other.buffer = buffer; @@ -1092,55 +939,61 @@ struct ArrayBase elementSize = size = capacity = 0; } - void pop_back() - { + void pop_back() { XA_DEBUG_ASSERT(size > 0); resize(size - 1, false); } - void push_back(const uint8_t *value) - { + void push_back(const uint8_t *value) { XA_DEBUG_ASSERT(value < buffer || value >= buffer + size); + XA_DEBUG_ASSERT(value); resize(size + 1, false); - memcpy(&buffer[(size - 1) * elementSize], value, elementSize); + XA_DEBUG_ASSERT(buffer); + if (buffer && value) + memcpy(&buffer[(size - 1) * elementSize], value, elementSize); } - void push_back(const ArrayBase &other) - { + void push_back(const ArrayBase &other) { XA_DEBUG_ASSERT(elementSize == other.elementSize); - if (other.size == 0) - return; - const uint32_t oldSize = size; - resize(size + other.size, false); - memcpy(buffer + oldSize * elementSize, other.buffer, other.size * other.elementSize); + if (other.size > 0) { + const uint32_t oldSize = size; + resize(size + other.size, false); + XA_DEBUG_ASSERT(buffer); + if (buffer) + memcpy(buffer + oldSize * elementSize, other.buffer, other.size * other.elementSize); + } } // Remove the element at the given index. This is an expensive operation! - void removeAt(uint32_t index) - { + void removeAt(uint32_t index) { XA_DEBUG_ASSERT(index >= 0 && index < size); - if (size != 1) - memmove(buffer + elementSize * index, buffer + elementSize * (index + 1), elementSize * (size - 1 - index)); - size--; + XA_DEBUG_ASSERT(buffer); + if (buffer) { + if (size > 1) + memmove(buffer + elementSize * index, buffer + elementSize * (index + 1), elementSize * (size - 1 - index)); + if (size > 0) + size--; + } } // Element at index is swapped with the last element, then the array length is decremented. - void removeAtFast(uint32_t index) - { + void removeAtFast(uint32_t index) { XA_DEBUG_ASSERT(index >= 0 && index < size); - if (size != 1 && index != size - 1) - memcpy(buffer + elementSize * index, buffer + elementSize * (size - 1), elementSize); - size--; + XA_DEBUG_ASSERT(buffer); + if (buffer) { + if (size > 1 && index != size - 1) + memcpy(buffer + elementSize * index, buffer + elementSize * (size - 1), elementSize); + if (size > 0) + size--; + } } - void reserve(uint32_t desiredSize) - { + void reserve(uint32_t desiredSize) { if (desiredSize > capacity) setArrayCapacity(desiredSize); } - void resize(uint32_t newSize, bool exact) - { + void resize(uint32_t newSize, bool exact) { size = newSize; if (size > capacity) { // First allocation is always exact. Otherwise, following allocations grow array to 150% of desired size. @@ -1153,8 +1006,7 @@ struct ArrayBase } } - void setArrayCapacity(uint32_t newCapacity) - { + void setArrayCapacity(uint32_t newCapacity) { XA_DEBUG_ASSERT(newCapacity >= size); if (newCapacity == 0) { // free the buffer. @@ -1174,8 +1026,7 @@ struct ArrayBase } #if XA_DEBUG_HEAP - void setMemTag(int _memTag) - { + void setMemTag(int _memTag) { this->memTag = _memTag; } #endif @@ -1189,28 +1040,27 @@ struct ArrayBase #endif }; -template<typename T> -class Array -{ +template <typename T> +class Array { public: - Array(int memTag = MemTag::Default) : m_base(sizeof(T), memTag) {} - Array(const Array&) = delete; + Array(int memTag = MemTag::Default) : + m_base(sizeof(T), memTag) {} + Array(const Array &) = delete; Array &operator=(const Array &) = delete; - XA_INLINE const T &operator[](uint32_t index) const - { + XA_INLINE const T &operator[](uint32_t index) const { XA_DEBUG_ASSERT(index < m_base.size); + XA_DEBUG_ASSERT(m_base.buffer); return ((const T *)m_base.buffer)[index]; } - XA_INLINE T &operator[](uint32_t index) - { + XA_INLINE T &operator[](uint32_t index) { XA_DEBUG_ASSERT(index < m_base.size); + XA_DEBUG_ASSERT(m_base.buffer); return ((T *)m_base.buffer)[index]; } - XA_INLINE const T &back() const - { + XA_INLINE const T &back() const { XA_DEBUG_ASSERT(!isEmpty()); return ((const T *)m_base.buffer)[m_base.size - 1]; } @@ -1218,8 +1068,7 @@ public: XA_INLINE T *begin() { return (T *)m_base.buffer; } XA_INLINE void clear() { m_base.clear(); } - bool contains(const T &value) const - { + bool contains(const T &value) const { for (uint32_t i = 0; i < m_base.size; i++) { if (((const T *)m_base.buffer)[i] == value) return true; @@ -1244,28 +1093,25 @@ public: void reserve(uint32_t desiredSize) { m_base.reserve(desiredSize); } void resize(uint32_t newSize) { m_base.resize(newSize, true); } - void runCtors() - { + void runCtors() { for (uint32_t i = 0; i < m_base.size; i++) new (&((T *)m_base.buffer)[i]) T; } - void runDtors() - { + void runDtors() { for (uint32_t i = 0; i < m_base.size; i++) ((T *)m_base.buffer)[i].~T(); } - void fill(const T &value) - { + void fill(const T &value) { auto buffer = (T *)m_base.buffer; for (uint32_t i = 0; i < m_base.size; i++) buffer[i] = value; } - void fillBytes(uint8_t value) - { - memset(m_base.buffer, (int)value, m_base.size * m_base.elementSize); + void fillBytes(uint8_t value) { + if (m_base.buffer && m_base.size > 0) + memset(m_base.buffer, (int)value, m_base.size * m_base.elementSize); } #if XA_DEBUG_HEAP @@ -1273,41 +1119,67 @@ public: #endif XA_INLINE uint32_t size() const { return m_base.size; } - XA_INLINE void zeroOutMemory() { memset(m_base.buffer, 0, m_base.elementSize * m_base.size); } + + XA_INLINE void zeroOutMemory() { + if (m_base.buffer && m_base.size > 0) + memset(m_base.buffer, 0, m_base.elementSize * m_base.size); + } private: ArrayBase m_base; }; -template<typename T> -struct ArrayView -{ - ArrayView() : data(nullptr), length(0) {} - ArrayView(Array<T> &a) : data(a.data()), length(a.size()) {} - ArrayView(T *data, uint32_t length) : data(data), length(length) {} - ArrayView &operator=(Array<T> &a) { data = a.data(); length = a.size(); return *this; } - XA_INLINE const T &operator[](uint32_t index) const { XA_DEBUG_ASSERT(index < length); return data[index]; } +template <typename T> +struct ArrayView { + ArrayView() : + data(nullptr), length(0) {} + ArrayView(Array<T> &a) : + data(a.data()), length(a.size()) {} + ArrayView(T *_data, uint32_t _length) : + data(_data), length(_length) {} + ArrayView &operator=(Array<T> &a) { + data = a.data(); + length = a.size(); + return *this; + } + XA_INLINE const T &operator[](uint32_t index) const { + XA_DEBUG_ASSERT(index < length); + return data[index]; + } + XA_INLINE T &operator[](uint32_t index) { + XA_DEBUG_ASSERT(index < length); + return data[index]; + } T *data; uint32_t length; }; -template<typename T> -struct ConstArrayView -{ - ConstArrayView() : data(nullptr), length(0) {} - ConstArrayView(const Array<T> &a) : data(a.data()), length(a.size()) {} - ConstArrayView(const T *data, uint32_t length) : data(data), length(length) {} - ConstArrayView &operator=(const Array<T> &a) { data = a.data(); length = a.size(); return *this; } - XA_INLINE const T &operator[](uint32_t index) const { XA_DEBUG_ASSERT(index < length); return data[index]; } +template <typename T> +struct ConstArrayView { + ConstArrayView() : + data(nullptr), length(0) {} + ConstArrayView(const Array<T> &a) : + data(a.data()), length(a.size()) {} + ConstArrayView(ArrayView<T> av) : + data(av.data), length(av.length) {} + ConstArrayView(const T *_data, uint32_t _length) : + data(_data), length(_length) {} + ConstArrayView &operator=(const Array<T> &a) { + data = a.data(); + length = a.size(); + return *this; + } + XA_INLINE const T &operator[](uint32_t index) const { + XA_DEBUG_ASSERT(index < length); + return data[index]; + } const T *data; uint32_t length; }; /// Basis class to compute tangent space basis, ortogonalizations and to transform vectors from one space to another. -struct Basis -{ - XA_NODISCARD static Vector3 computeTangent(const Vector3 &normal) - { +struct Basis { + XA_NODISCARD static Vector3 computeTangent(const Vector3 &normal) { XA_ASSERT(isNormalized(normal)); // Choose minimum axis. Vector3 tangent; @@ -1319,12 +1191,11 @@ struct Basis tangent = Vector3(0, 0, 1); // Ortogonalize tangent -= normal * dot(normal, tangent); - tangent = normalize(tangent, kEpsilon); + tangent = normalize(tangent); return tangent; } - XA_NODISCARD static Vector3 computeBitangent(const Vector3 &normal, const Vector3 &tangent) - { + XA_NODISCARD static Vector3 computeBitangent(const Vector3 &normal, const Vector3 &tangent) { return cross(normal, tangent); } @@ -1334,42 +1205,36 @@ struct Basis }; // Simple bit array. -class BitArray -{ +class BitArray { public: - BitArray() : m_size(0) {} + BitArray() : + m_size(0) {} - BitArray(uint32_t sz) - { + BitArray(uint32_t sz) { resize(sz); } - void resize(uint32_t new_size) - { + void resize(uint32_t new_size) { m_size = new_size; m_wordArray.resize((m_size + 31) >> 5); } - bool get(uint32_t index) const - { + bool get(uint32_t index) const { XA_DEBUG_ASSERT(index < m_size); return (m_wordArray[index >> 5] & (1 << (index & 31))) != 0; } - void set(uint32_t index) - { + void set(uint32_t index) { XA_DEBUG_ASSERT(index < m_size); m_wordArray[index >> 5] |= (1 << (index & 31)); } - void unset(uint32_t index) - { + void unset(uint32_t index) { XA_DEBUG_ASSERT(index < m_size); m_wordArray[index >> 5] &= ~(1 << (index & 31)); } - void zeroOutMemory() - { + void zeroOutMemory() { m_wordArray.zeroOutMemory(); } @@ -1378,13 +1243,13 @@ private: Array<uint32_t> m_wordArray; }; -class BitImage -{ +class BitImage { public: - BitImage() : m_width(0), m_height(0), m_rowStride(0), m_data(MemTag::BitImage) {} + BitImage() : + m_width(0), m_height(0), m_rowStride(0), m_data(MemTag::BitImage) {} - BitImage(uint32_t w, uint32_t h) : m_width(w), m_height(h), m_data(MemTag::BitImage) - { + BitImage(uint32_t w, uint32_t h) : + m_width(w), m_height(h), m_data(MemTag::BitImage) { m_rowStride = (m_width + 63) >> 6; m_data.resize(m_rowStride * m_height); m_data.zeroOutMemory(); @@ -1395,16 +1260,14 @@ public: uint32_t width() const { return m_width; } uint32_t height() const { return m_height; } - void copyTo(BitImage &other) - { + void copyTo(BitImage &other) { other.m_width = m_width; other.m_height = m_height; other.m_rowStride = m_rowStride; m_data.copyTo(other.m_data); } - void resize(uint32_t w, uint32_t h, bool discard) - { + void resize(uint32_t w, uint32_t h, bool discard) { const uint32_t rowStride = (w + 63) >> 6; if (discard) { m_data.resize(rowStride * h); @@ -1428,28 +1291,24 @@ public: m_rowStride = rowStride; } - bool get(uint32_t x, uint32_t y) const - { + bool get(uint32_t x, uint32_t y) const { XA_DEBUG_ASSERT(x < m_width && y < m_height); const uint32_t index = (x >> 6) + y * m_rowStride; return (m_data[index] & (UINT64_C(1) << (uint64_t(x) & UINT64_C(63)))) != 0; } - void set(uint32_t x, uint32_t y) - { + void set(uint32_t x, uint32_t y) { XA_DEBUG_ASSERT(x < m_width && y < m_height); const uint32_t index = (x >> 6) + y * m_rowStride; m_data[index] |= UINT64_C(1) << (uint64_t(x) & UINT64_C(63)); XA_DEBUG_ASSERT(get(x, y)); } - void zeroOutMemory() - { + void zeroOutMemory() { m_data.zeroOutMemory(); } - bool canBlit(const BitImage &image, uint32_t offsetX, uint32_t offsetY) const - { + bool canBlit(const BitImage &image, uint32_t offsetX, uint32_t offsetY) const { for (uint32_t y = 0; y < image.m_height; y++) { const uint32_t thisY = y + offsetY; if (thisY >= m_height) @@ -1473,8 +1332,7 @@ public: return true; } - void dilate(uint32_t padding) - { + void dilate(uint32_t padding) { BitImage tmp(m_width, m_height); for (uint32_t p = 0; p < padding; p++) { tmp.zeroOutMemory(); @@ -1484,15 +1342,21 @@ public: if (!b) { if (x > 0) { b |= get(x - 1, y); - if (y > 0) b |= get(x - 1, y - 1); - if (y < m_height - 1) b |= get(x - 1, y + 1); + if (y > 0) + b |= get(x - 1, y - 1); + if (y < m_height - 1) + b |= get(x - 1, y + 1); } - if (y > 0) b |= get(x, y - 1); - if (y < m_height - 1) b |= get(x, y + 1); + if (y > 0) + b |= get(x, y - 1); + if (y < m_height - 1) + b |= get(x, y + 1); if (x < m_width - 1) { b |= get(x + 1, y); - if (y > 0) b |= get(x + 1, y - 1); - if (y < m_height - 1) b |= get(x + 1, y + 1); + if (y > 0) + b |= get(x + 1, y - 1); + if (y < m_height - 1) + b |= get(x + 1, y + 1); } } if (b) @@ -1511,11 +1375,10 @@ private: }; // From Fast-BVH -class BVH -{ +class BVH { public: - BVH(const Array<AABB> &objectAabbs, uint32_t leafSize = 4) : m_objectIds(MemTag::BVH), m_nodes(MemTag::BVH) - { + BVH(const Array<AABB> &objectAabbs, uint32_t leafSize = 4) : + m_objectIds(MemTag::BVH), m_nodes(MemTag::BVH) { m_objectAabbs = &objectAabbs; if (m_objectAabbs->isEmpty()) return; @@ -1535,7 +1398,7 @@ public: Node node; m_nodes.reserve(objectAabbs.size() * 2); uint32_t nNodes = 0; - while(stackptr > 0) { + while (stackptr > 0) { // Pop the next item off of the stack const BuildEntry &bnode = todo[--stackptr]; const uint32_t start = bnode.start; @@ -1548,7 +1411,7 @@ public: // Calculate the bounding box for this node AABB bb(objectAabbs[m_objectIds[start]]); AABB bc(objectAabbs[m_objectIds[start]].centroid()); - for(uint32_t p = start + 1; p < end; ++p) { + for (uint32_t p = start + 1; p < end; ++p) { bb.expandToInclude(objectAabbs[m_objectIds[p]]); bc.expandToInclude(objectAabbs[m_objectIds[p]].centroid()); } @@ -1564,7 +1427,7 @@ public: m_nodes[bnode.parent].rightOffset--; // When this is the second touch, this is the right child. // The right child sets up the offset for the flat tree. - if (m_nodes[bnode.parent].rightOffset == kTouchedTwice ) + if (m_nodes[bnode.parent].rightOffset == kTouchedTwice) m_nodes[bnode.parent].rightOffset = nNodes - 1 - bnode.parent; } // If this is a leaf, no need to subdivide. @@ -1599,21 +1462,20 @@ public: } } - void query(const AABB &queryAabb, Array<uint32_t> &result) const - { + void query(const AABB &queryAabb, Array<uint32_t> &result) const { result.clear(); // Working set uint32_t todo[64]; int32_t stackptr = 0; // "Push" on the root node to the working set todo[stackptr] = 0; - while(stackptr >= 0) { + while (stackptr >= 0) { // Pop off the next node to work on. const int ni = todo[stackptr--]; const Node &node = m_nodes[ni]; // Is leaf -> Intersect if (node.rightOffset == 0) { - for(uint32_t o = 0; o < node.nPrims; ++o) { + for (uint32_t o = 0; o < node.nPrims; ++o) { const uint32_t obj = node.start + o; if (queryAabb.intersect((*m_objectAabbs)[m_objectIds[obj]])) result.push_back(m_objectIds[obj]); @@ -1630,14 +1492,12 @@ public: } private: - struct BuildEntry - { + struct BuildEntry { uint32_t parent; // If non-zero then this is the index of the parent. (used in offsets) uint32_t start, end; // The range of objects in the object list covered by this node. }; - struct Node - { + struct Node { AABB aabb; uint32_t start, nPrims, rightOffset; }; @@ -1647,16 +1507,14 @@ private: Array<Node> m_nodes; }; -struct Fit -{ - static bool computeBasis(const Vector3 *points, uint32_t pointsCount, Basis *basis) - { - if (computeLeastSquaresNormal(points, pointsCount, &basis->normal)) { +struct Fit { + static bool computeBasis(ConstArrayView<Vector3> points, Basis *basis) { + if (computeLeastSquaresNormal(points, &basis->normal)) { basis->tangent = Basis::computeTangent(basis->normal); basis->bitangent = Basis::computeBitangent(basis->normal, basis->tangent); return true; } - return computeEigen(points, pointsCount, basis); + return computeEigen(points, basis); } private: @@ -1664,21 +1522,20 @@ private: // Fast, and accurate to within a few degrees. // Returns None if the points do not span a plane. // https://www.ilikebigbits.com/2015_03_04_plane_from_points.html - static bool computeLeastSquaresNormal(const Vector3 *points, uint32_t pointsCount, Vector3 *normal) - { - XA_DEBUG_ASSERT(pointsCount >= 3); - if (pointsCount == 3) { - *normal = normalize(cross(points[2] - points[0], points[1] - points[0]), kEpsilon); + static bool computeLeastSquaresNormal(ConstArrayView<Vector3> points, Vector3 *normal) { + XA_DEBUG_ASSERT(points.length >= 3); + if (points.length == 3) { + *normal = normalize(cross(points[2] - points[0], points[1] - points[0])); return true; } - const float invN = 1.0f / float(pointsCount); + const float invN = 1.0f / float(points.length); Vector3 centroid(0.0f); - for (uint32_t i = 0; i < pointsCount; i++) + for (uint32_t i = 0; i < points.length; i++) centroid += points[i]; centroid *= invN; // Calculate full 3x3 covariance matrix, excluding symmetries: float xx = 0.0f, xy = 0.0f, xz = 0.0f, yy = 0.0f, yz = 0.0f, zz = 0.0f; - for (uint32_t i = 0; i < pointsCount; i++) { + for (uint32_t i = 0; i < points.length; i++) { Vector3 r = points[i] - centroid; xx += r.x * r.x; xy += r.x * r.y; @@ -1730,7 +1587,7 @@ private: // Pick path with best conditioning: Vector3 dir(0.0f); if (det_max == det_x) - dir = Vector3(det_x,xz * yz - xy * zz,xy * yz - xz * yy); + dir = Vector3(det_x, xz * yz - xy * zz, xy * yz - xz * yy); else if (det_max == det_y) dir = Vector3(xz * yz - xy * zz, det_y, xy * xz - yz * xx); else if (det_max == det_z) @@ -1743,41 +1600,37 @@ private: return isNormalized(*normal); } - static bool computeEigen(const Vector3 *points, uint32_t pointsCount, Basis *basis) - { + static bool computeEigen(ConstArrayView<Vector3> points, Basis *basis) { float matrix[6]; - computeCovariance(pointsCount, points, matrix); + computeCovariance(points, matrix); if (matrix[0] == 0 && matrix[3] == 0 && matrix[5] == 0) return false; float eigenValues[3]; Vector3 eigenVectors[3]; if (!eigenSolveSymmetric3(matrix, eigenValues, eigenVectors)) return false; - basis->normal = normalize(eigenVectors[2], kEpsilon); - basis->tangent = normalize(eigenVectors[0], kEpsilon); - basis->bitangent = normalize(eigenVectors[1], kEpsilon); + basis->normal = normalize(eigenVectors[2]); + basis->tangent = normalize(eigenVectors[0]); + basis->bitangent = normalize(eigenVectors[1]); return true; } - static Vector3 computeCentroid(int n, const Vector3 * points) - { + static Vector3 computeCentroid(ConstArrayView<Vector3> points) { Vector3 centroid(0.0f); - for (int i = 0; i < n; i++) { + for (uint32_t i = 0; i < points.length; i++) centroid += points[i]; - } - centroid /= float(n); + centroid /= float(points.length); return centroid; } - static Vector3 computeCovariance(int n, const Vector3 * points, float * covariance) - { + static Vector3 computeCovariance(ConstArrayView<Vector3> points, float *covariance) { // compute the centroid - Vector3 centroid = computeCentroid(n, points); + Vector3 centroid = computeCentroid(points); // compute covariance matrix for (int i = 0; i < 6; i++) { covariance[i] = 0.0f; } - for (int i = 0; i < n; i++) { + for (uint32_t i = 0; i < points.length; i++) { Vector3 v = points[i] - centroid; covariance[0] += v.x * v.x; covariance[1] += v.x * v.y; @@ -1792,8 +1645,7 @@ private: // Tridiagonal solver from Charles Bloom. // Householder transforms followed by QL decomposition. // Seems to be based on the code from Numerical Recipes in C. - static bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]) - { + static bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]) { XA_DEBUG_ASSERT(matrix != nullptr && eigenValues != nullptr && eigenVectors != nullptr); float subd[3]; float diag[3]; @@ -1818,7 +1670,7 @@ private: // eigenvectors are the columns; make them the rows : for (int i = 0; i < 3; i++) { for (int j = 0; j < 3; j++) { - (&eigenVectors[j].x)[i] = (float) work[i][j]; + (&eigenVectors[j].x)[i] = (float)work[i][j]; } } // shuffle to sort by singular value : @@ -1840,8 +1692,7 @@ private: } private: - static void EigenSolver3_Tridiagonal(float mat[3][3], float *diag, float *subd) - { + static void EigenSolver3_Tridiagonal(float mat[3][3], float *diag, float *subd) { // Householder reduction T = Q^t M Q // Input: // mat, symmetric 3x3 matrix M @@ -1893,8 +1744,7 @@ private: } } - static bool EigenSolver3_QLAlgorithm(float mat[3][3], float *diag, float *subd) - { + static bool EigenSolver3_QLAlgorithm(float mat[3][3], float *diag, float *subd) { // QL iteration with implicit shifting to reduce matrix from tridiagonal // to diagonal const int maxiter = 32; @@ -1904,21 +1754,21 @@ private: int m; for (m = ell; m <= 1; m++) { float dd = fabsf(diag[m]) + fabsf(diag[m + 1]); - if ( fabsf(subd[m]) + dd == dd ) + if (fabsf(subd[m]) + dd == dd) break; } - if ( m == ell ) + if (m == ell) break; float g = (diag[ell + 1] - diag[ell]) / (2 * subd[ell]); float r = sqrtf(g * g + 1); - if ( g < 0 ) + if (g < 0) g = diag[m] - diag[ell] + subd[ell] / (g - r); else g = diag[m] - diag[ell] + subd[ell] / (g + r); float s = 1, c = 1, p = 0; for (int i = m - 1; i >= ell; i--) { float f = s * subd[i], b = c * subd[i]; - if ( fabsf(f) >= fabsf(g) ) { + if (fabsf(f) >= fabsf(g)) { c = g / f; r = sqrtf(c * c + 1); subd[i + 1] = f * r; @@ -1944,7 +1794,7 @@ private: subd[ell] = g; subd[m] = 0; } - if ( iter == maxiter ) + if (iter == maxiter) // should not get here under normal circumstances return false; } @@ -1952,56 +1802,48 @@ private: } }; -static uint32_t sdbmHash(const void *data_in, uint32_t size, uint32_t h = 5381) -{ - const uint8_t *data = (const uint8_t *) data_in; +static uint32_t sdbmHash(const void *data_in, uint32_t size, uint32_t h = 5381) { + const uint8_t *data = (const uint8_t *)data_in; uint32_t i = 0; while (i < size) { - h = (h << 16) + (h << 6) - h + (uint32_t ) data[i++]; + h = (h << 16) + (h << 6) - h + (uint32_t)data[i++]; } return h; } template <typename T> -static uint32_t hash(const T &t, uint32_t h = 5381) -{ +static uint32_t hash(const T &t, uint32_t h = 5381) { return sdbmHash(&t, sizeof(T), h); } template <typename Key> -struct Hash -{ +struct Hash { uint32_t operator()(const Key &k) const { return hash(k); } }; template <typename Key> -struct PassthroughHash -{ +struct PassthroughHash { uint32_t operator()(const Key &k) const { return (uint32_t)k; } }; template <typename Key> -struct Equal -{ +struct Equal { bool operator()(const Key &k0, const Key &k1) const { return k0 == k1; } }; -template<typename Key, typename H = Hash<Key>, typename E = Equal<Key> > -class HashMap -{ +template <typename Key, typename H = Hash<Key>, typename E = Equal<Key>> +class HashMap { public: - HashMap(int memTag, uint32_t size) : m_memTag(memTag), m_size(size), m_numSlots(0), m_slots(nullptr), m_keys(memTag), m_next(memTag) - { + HashMap(int memTag, uint32_t size) : + m_memTag(memTag), m_size(size), m_numSlots(0), m_slots(nullptr), m_keys(memTag), m_next(memTag) { } - ~HashMap() - { + ~HashMap() { if (m_slots) XA_FREE(m_slots); } - void destroy() - { + void destroy() { if (m_slots) { XA_FREE(m_slots); m_slots = nullptr; @@ -2010,8 +1852,7 @@ public: m_next.destroy(); } - uint32_t add(const Key &key) - { + uint32_t add(const Key &key) { if (!m_slots) alloc(); const uint32_t hash = computeHash(key); @@ -2021,36 +1862,18 @@ public: return m_keys.size() - 1; } - uint32_t get(const Key &key) const - { + uint32_t get(const Key &key) const { if (!m_slots) return UINT32_MAX; - const uint32_t hash = computeHash(key); - uint32_t i = m_slots[hash]; - E equal; - while (i != UINT32_MAX) { - if (equal(m_keys[i], key)) - return i; - i = m_next[i]; - } - return UINT32_MAX; + return find(key, m_slots[computeHash(key)]); } - uint32_t getNext(uint32_t current) const - { - uint32_t i = m_next[current]; - E equal; - while (i != UINT32_MAX) { - if (equal(m_keys[i], m_keys[current])) - return i; - i = m_next[i]; - } - return UINT32_MAX; + uint32_t getNext(const Key &key, uint32_t current) const { + return find(key, m_next[current]); } private: - void alloc() - { + void alloc() { XA_DEBUG_ASSERT(m_size > 0); m_numSlots = nextPowerOfTwo(m_size); auto minNumSlots = uint32_t(m_size * 1.3); @@ -2063,12 +1886,21 @@ private: m_next.reserve(m_size); } - uint32_t computeHash(const Key &key) const - { + uint32_t computeHash(const Key &key) const { H hash; return hash(key) & (m_numSlots - 1); } + uint32_t find(const Key &key, uint32_t current) const { + E equal; + while (current != UINT32_MAX) { + if (equal(m_keys[current], key)) + return current; + current = m_next[current]; + } + return current; + } + int m_memTag; uint32_t m_size; uint32_t m_numSlots; @@ -2077,9 +1909,8 @@ private: Array<uint32_t> m_next; }; -template<typename T> -static void insertionSort(T *data, uint32_t length) -{ +template <typename T> +static void insertionSort(T *data, uint32_t length) { for (int32_t i = 1; i < (int32_t)length; i++) { T x = data[i]; int32_t j = i - 1; @@ -2091,21 +1922,18 @@ static void insertionSort(T *data, uint32_t length) } } -class KISSRng -{ +class KISSRng { public: KISSRng() { reset(); } - void reset() - { + void reset() { x = 123456789; y = 362436000; z = 521288629; c = 7654321; } - uint32_t getRange(uint32_t range) - { + uint32_t getRange(uint32_t range) { if (range == 0) return 0; x = 69069 * x + 12345; @@ -2124,12 +1952,10 @@ private: // Based on Pierre Terdiman's and Michael Herf's source code. // http://www.codercorner.com/RadixSortRevisited.htm // http://www.stereopsis.com/radix.html -class RadixSort -{ +class RadixSort { public: - void sort(const float *input, uint32_t count) - { - if (input == nullptr || count == 0) { + void sort(ConstArrayView<float> input) { + if (input.length == 0) { m_buffer1.clear(); m_buffer2.clear(); m_ranks = m_buffer1.data(); @@ -2137,33 +1963,27 @@ public: return; } // Resize lists if needed - m_buffer1.resize(count); - m_buffer2.resize(count); + m_buffer1.resize(input.length); + m_buffer2.resize(input.length); m_ranks = m_buffer1.data(); m_ranks2 = m_buffer2.data(); m_validRanks = false; - if (count < 32) - insertionSort(input, count); + if (input.length < 32) + insertionSort(input); else { // @@ Avoid touching the input multiple times. - for (uint32_t i = 0; i < count; i++) { + for (uint32_t i = 0; i < input.length; i++) { floatFlip((uint32_t &)input[i]); } - radixSort<uint32_t>((const uint32_t *)input, count); - for (uint32_t i = 0; i < count; i++) { + radixSort(ConstArrayView<uint32_t>((const uint32_t *)input.data, input.length)); + for (uint32_t i = 0; i < input.length; i++) { ifloatFlip((uint32_t &)input[i]); } } } - void sort(const Array<float> &input) - { - sort(input.data(), input.size()); - } - // Access to results. m_ranks is a list of indices in sorted order, i.e. in the order you may further process your data - const uint32_t *ranks() const - { + const uint32_t *ranks() const { XA_DEBUG_ASSERT(m_validRanks); return m_ranks; } @@ -2171,54 +1991,40 @@ public: private: uint32_t *m_ranks, *m_ranks2; Array<uint32_t> m_buffer1, m_buffer2; - bool m_validRanks; + bool m_validRanks = false; - void floatFlip(uint32_t &f) - { + void floatFlip(uint32_t &f) { int32_t mask = (int32_t(f) >> 31) | 0x80000000; // Warren Hunt, Manchor Ko. f ^= mask; } - void ifloatFlip(uint32_t &f) - { + void ifloatFlip(uint32_t &f) { uint32_t mask = ((f >> 31) - 1) | 0x80000000; // Michael Herf. f ^= mask; } - template<typename T> - void createHistograms(const T *buffer, uint32_t count, uint32_t *histogram) - { - const uint32_t bucketCount = sizeof(T); // (8 * sizeof(T)) / log2(radix) + void createHistograms(ConstArrayView<uint32_t> input, uint32_t *histogram) { + const uint32_t bucketCount = sizeof(uint32_t); // Init bucket pointers. uint32_t *h[bucketCount]; for (uint32_t i = 0; i < bucketCount; i++) { h[i] = histogram + 256 * i; } // Clear histograms. - memset(histogram, 0, 256 * bucketCount * sizeof(uint32_t )); + memset(histogram, 0, 256 * bucketCount * sizeof(uint32_t)); // @@ Add support for signed integers. // Build histograms. - const uint8_t *p = (const uint8_t *)buffer; // @@ Does this break aliasing rules? - const uint8_t *pe = p + count * sizeof(T); + const uint8_t *p = (const uint8_t *)input.data; // @@ Does this break aliasing rules? + const uint8_t *pe = p + input.length * sizeof(uint32_t); while (p != pe) { h[0][*p++]++, h[1][*p++]++, h[2][*p++]++, h[3][*p++]++; -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4127) -#endif - if (bucketCount == 8) h[4][*p++]++, h[5][*p++]++, h[6][*p++]++, h[7][*p++]++; -#ifdef _MSC_VER -#pragma warning(pop) -#endif } } - template <typename T> - void insertionSort(const T *input, uint32_t count) - { + void insertionSort(ConstArrayView<float> input) { if (!m_validRanks) { m_ranks[0] = 0; - for (uint32_t i = 1; i != count; ++i) { + for (uint32_t i = 1; i != input.length; ++i) { int rank = m_ranks[i] = i; uint32_t j = i; while (j != 0 && input[rank] < input[m_ranks[j - 1]]) { @@ -2231,7 +2037,7 @@ private: } m_validRanks = true; } else { - for (uint32_t i = 1; i != count; ++i) { + for (uint32_t i = 1; i != input.length; ++i) { int rank = m_ranks[i]; uint32_t j = i; while (j != 0 && input[rank] < input[m_ranks[j - 1]]) { @@ -2245,35 +2051,34 @@ private: } } - template <typename T> - void radixSort(const T *input, uint32_t count) - { - const uint32_t P = sizeof(T); // pass count + void radixSort(ConstArrayView<uint32_t> input) { + const uint32_t P = sizeof(uint32_t); // pass count // Allocate histograms & offsets on the stack uint32_t histogram[256 * P]; uint32_t *link[256]; - createHistograms(input, count, histogram); + createHistograms(input, histogram); // Radix sort, j is the pass number (0=LSB, P=MSB) for (uint32_t j = 0; j < P; j++) { // Pointer to this bucket. const uint32_t *h = &histogram[j * 256]; - const uint8_t *inputBytes = (const uint8_t *)input; // @@ Is this aliasing legal? + auto inputBytes = (const uint8_t *)input.data; // @@ Is this aliasing legal? inputBytes += j; - if (h[inputBytes[0]] == count) { + if (h[inputBytes[0]] == input.length) { // Skip this pass, all values are the same. continue; } // Create offsets link[0] = m_ranks2; - for (uint32_t i = 1; i < 256; i++) link[i] = link[i - 1] + h[i - 1]; + for (uint32_t i = 1; i < 256; i++) + link[i] = link[i - 1] + h[i - 1]; // Perform Radix Sort if (!m_validRanks) { - for (uint32_t i = 0; i < count; i++) { + for (uint32_t i = 0; i < input.length; i++) { *link[inputBytes[i * P]]++ = i; } m_validRanks = true; } else { - for (uint32_t i = 0; i < count; i++) { + for (uint32_t i = 0; i < input.length; i++) { const uint32_t idx = m_ranks[i]; *link[inputBytes[idx * P]]++ = idx; } @@ -2283,7 +2088,7 @@ private: } // All values were equal, generate linear ranks. if (!m_validRanks) { - for (uint32_t i = 0; i < count; i++) + for (uint32_t i = 0; i < input.length; i++) m_ranks[i] = i; m_validRanks = true; } @@ -2291,30 +2096,25 @@ private: }; // Wrapping this in a class allows temporary arrays to be re-used. -class BoundingBox2D -{ +class BoundingBox2D { public: Vector2 majorAxis, minorAxis, minCorner, maxCorner; - void clear() - { + void clear() { m_boundaryVertices.clear(); } - void appendBoundaryVertex(Vector2 v) - { + void appendBoundaryVertex(Vector2 v) { m_boundaryVertices.push_back(v); } // This should compute convex hull and use rotating calipers to find the best box. Currently it uses a brute force method. - // If vertices is null or vertexCount is 0, the boundary vertices are used. - void compute(const Vector2 *vertices = nullptr, uint32_t vertexCount = 0) - { - if (!vertices || vertexCount == 0) { - vertices = m_boundaryVertices.data(); - vertexCount = m_boundaryVertices.size(); - } - convexHull(m_boundaryVertices.data(), m_boundaryVertices.size(), m_hull, 0.00001f); + // If vertices are empty, the boundary vertices are used. + void compute(ConstArrayView<Vector2> vertices = ConstArrayView<Vector2>()) { + XA_DEBUG_ASSERT(!m_boundaryVertices.isEmpty()); + if (vertices.length == 0) + vertices = m_boundaryVertices; + convexHull(m_boundaryVertices, m_hull, 0.00001f); // @@ Ideally I should use rotating calipers to find the best box. Using brute force for now. float best_area = FLT_MAX; Vector2 best_min(0); @@ -2324,13 +2124,13 @@ public: for (uint32_t i = 0, j = hullCount - 1; i < hullCount; j = i, i++) { if (equal(m_hull[i], m_hull[j], kEpsilon)) continue; - Vector2 axis = normalize(m_hull[i] - m_hull[j], 0.0f); + Vector2 axis = normalize(m_hull[i] - m_hull[j]); XA_DEBUG_ASSERT(isFinite(axis)); // Compute bounding box. Vector2 box_min(FLT_MAX, FLT_MAX); Vector2 box_max(-FLT_MAX, -FLT_MAX); // Consider all points, not only boundary points, in case the input chart is malformed. - for (uint32_t v = 0; v < vertexCount; v++) { + for (uint32_t v = 0; v < vertices.length; v++) { const Vector2 &point = vertices[v]; const float x = dot(axis, point); const float y = dot(Vector2(-axis.y, axis.x), point); @@ -2357,28 +2157,27 @@ public: private: // Compute the convex hull using Graham Scan. - void convexHull(const Vector2 *input, uint32_t inputCount, Array<Vector2> &output, float epsilon) - { - m_coords.resize(inputCount); - for (uint32_t i = 0; i < inputCount; i++) + void convexHull(ConstArrayView<Vector2> input, Array<Vector2> &output, float epsilon) { + m_coords.resize(input.length); + for (uint32_t i = 0; i < input.length; i++) m_coords[i] = input[i].x; m_radix.sort(m_coords); const uint32_t *ranks = m_radix.ranks(); m_top.clear(); m_bottom.clear(); - m_top.reserve(inputCount); - m_bottom.reserve(inputCount); + m_top.reserve(input.length); + m_bottom.reserve(input.length); Vector2 P = input[ranks[0]]; - Vector2 Q = input[ranks[inputCount - 1]]; + Vector2 Q = input[ranks[input.length - 1]]; float topy = max(P.y, Q.y); float boty = min(P.y, Q.y); - for (uint32_t i = 0; i < inputCount; i++) { + for (uint32_t i = 0; i < input.length; i++) { Vector2 p = input[ranks[i]]; if (p.y >= boty) m_top.push_back(p); } - for (uint32_t i = 0; i < inputCount; i++) { - Vector2 p = input[ranks[inputCount - 1 - i]]; + for (uint32_t i = 0; i < input.length; i++) { + Vector2 p = input[ranks[input.length - 1 - i]]; if (p.y <= topy) m_bottom.push_back(p); } @@ -2387,7 +2186,7 @@ private: XA_DEBUG_ASSERT(m_top.size() >= 2); output.push_back(m_top[0]); output.push_back(m_top[1]); - for (uint32_t i = 2; i < m_top.size(); ) { + for (uint32_t i = 2; i < m_top.size();) { Vector2 a = output[output.size() - 2]; Vector2 b = output[output.size() - 1]; Vector2 c = m_top[i]; @@ -2403,7 +2202,7 @@ private: XA_DEBUG_ASSERT(m_bottom.size() >= 2); output.push_back(m_bottom[1]); // Filter bottom list. - for (uint32_t i = 2; i < m_bottom.size(); ) { + for (uint32_t i = 2; i < m_bottom.size();) { Vector2 a = output[output.size() - 2]; Vector2 b = output[output.size() - 1]; Vector2 c = m_bottom[i]; @@ -2426,32 +2225,45 @@ private: RadixSort m_radix; }; -static uint32_t meshEdgeFace(uint32_t edge) { return edge / 3; } -static uint32_t meshEdgeIndex0(uint32_t edge) { return edge; } +struct EdgeKey { + EdgeKey(const EdgeKey &k) : + v0(k.v0), v1(k.v1) {} + EdgeKey(uint32_t _v0, uint32_t _v1) : + v0(_v0), v1(_v1) {} + bool operator==(const EdgeKey &k) const { return v0 == k.v0 && v1 == k.v1; } -static uint32_t meshEdgeIndex1(uint32_t edge) -{ + uint32_t v0; + uint32_t v1; +}; + +struct EdgeHash { + uint32_t operator()(const EdgeKey &k) const { return k.v0 * 32768u + k.v1; } +}; + +static uint32_t meshEdgeFace(uint32_t edge) { + return edge / 3; +} +static uint32_t meshEdgeIndex0(uint32_t edge) { + return edge; +} + +static uint32_t meshEdgeIndex1(uint32_t edge) { const uint32_t faceFirstEdge = edge / 3 * 3; return faceFirstEdge + (edge - faceFirstEdge + 1) % 3; } -struct MeshFlags -{ - enum - { - HasIgnoredFaces = 1<<0, - HasNormals = 1<<1 +struct MeshFlags { + enum { + HasIgnoredFaces = 1 << 0, + HasNormals = 1 << 1, + HasMaterials = 1 << 2 }; }; -class Mesh; -static void meshGetBoundaryLoops(const Mesh &mesh, Array<uint32_t> &boundaryLoops); - -class Mesh -{ +class Mesh { public: - Mesh(float epsilon, uint32_t approxVertexCount, uint32_t approxFaceCount, uint32_t flags = 0, uint32_t id = UINT32_MAX) : m_epsilon(epsilon), m_flags(flags), m_id(id), m_faceIgnore(MemTag::Mesh), m_indices(MemTag::MeshIndices), m_positions(MemTag::MeshPositions), m_normals(MemTag::MeshNormals), m_texcoords(MemTag::MeshTexcoords), m_nextColocalVertex(MemTag::MeshColocals), m_boundaryEdges(MemTag::MeshBoundaries), m_oppositeEdges(MemTag::MeshBoundaries), m_nextBoundaryEdges(MemTag::MeshBoundaries), m_edgeMap(MemTag::MeshEdgeMap, approxFaceCount * 3) - { + Mesh(float epsilon, uint32_t approxVertexCount, uint32_t approxFaceCount, uint32_t flags = 0, uint32_t id = UINT32_MAX) : + m_epsilon(epsilon), m_flags(flags), m_id(id), m_faceIgnore(MemTag::Mesh), m_faceMaterials(MemTag::Mesh), m_indices(MemTag::MeshIndices), m_positions(MemTag::MeshPositions), m_normals(MemTag::MeshNormals), m_texcoords(MemTag::MeshTexcoords), m_nextColocalVertex(MemTag::MeshColocals), m_firstColocalVertex(MemTag::MeshColocals), m_boundaryEdges(MemTag::MeshBoundaries), m_oppositeEdges(MemTag::MeshBoundaries), m_edgeMap(MemTag::MeshEdgeMap, approxFaceCount * 3) { m_indices.reserve(approxFaceCount * 3); m_positions.reserve(approxVertexCount); m_texcoords.reserve(approxVertexCount); @@ -2459,13 +2271,14 @@ public: m_faceIgnore.reserve(approxFaceCount); if (m_flags & MeshFlags::HasNormals) m_normals.reserve(approxVertexCount); + if (m_flags & MeshFlags::HasMaterials) + m_faceMaterials.reserve(approxFaceCount); } uint32_t flags() const { return m_flags; } uint32_t id() const { return m_id; } - void addVertex(const Vector3 &pos, const Vector3 &normal = Vector3(0.0f), const Vector2 &texcoord = Vector2(0.0f)) - { + void addVertex(const Vector3 &pos, const Vector3 &normal = Vector3(0.0f), const Vector2 &texcoord = Vector2(0.0f)) { XA_DEBUG_ASSERT(isFinite(pos)); m_positions.push_back(pos); if (m_flags & MeshFlags::HasNormals) @@ -2473,45 +2286,22 @@ public: m_texcoords.push_back(texcoord); } - struct AddFaceResult - { - enum Enum - { - OK, - DuplicateEdge = 1 - }; - }; - - AddFaceResult::Enum addFace(uint32_t v0, uint32_t v1, uint32_t v2, bool ignore = false) - { - uint32_t indexArray[3]; - indexArray[0] = v0; - indexArray[1] = v1; - indexArray[2] = v2; - return addFace(indexArray, ignore); - } - - AddFaceResult::Enum addFace(const uint32_t *indices, bool ignore = false) - { - AddFaceResult::Enum result = AddFaceResult::OK; + void addFace(const uint32_t *indices, bool ignore = false, uint32_t material = UINT32_MAX) { if (m_flags & MeshFlags::HasIgnoredFaces) m_faceIgnore.push_back(ignore); + if (m_flags & MeshFlags::HasMaterials) + m_faceMaterials.push_back(material); const uint32_t firstIndex = m_indices.size(); for (uint32_t i = 0; i < 3; i++) m_indices.push_back(indices[i]); for (uint32_t i = 0; i < 3; i++) { const uint32_t vertex0 = m_indices[firstIndex + i]; const uint32_t vertex1 = m_indices[firstIndex + (i + 1) % 3]; - const EdgeKey key(vertex0, vertex1); - if (m_edgeMap.get(key) != UINT32_MAX) - result = AddFaceResult::DuplicateEdge; - m_edgeMap.add(key); + m_edgeMap.add(EdgeKey(vertex0, vertex1)); } - return result; } - void createColocals() - { + void createColocalsBVH() { const uint32_t vertexCount = m_positions.size(); Array<AABB> aabbs(MemTag::BVH); aabbs.resize(vertexCount); @@ -2522,6 +2312,8 @@ public: Array<uint32_t> potential(MemTag::MeshColocals); m_nextColocalVertex.resize(vertexCount); m_nextColocalVertex.fillBytes(0xff); + m_firstColocalVertex.resize(vertexCount); + m_firstColocalVertex.fillBytes(0xff); for (uint32_t i = 0; i < vertexCount; i++) { if (m_nextColocalVertex[i] != UINT32_MAX) continue; // Already linked. @@ -2537,18 +2329,65 @@ public: if (colocals.size() == 1) { // No colocals for this vertex. m_nextColocalVertex[i] = i; - continue; + m_firstColocalVertex[i] = i; + continue; } // Link in ascending order. insertionSort(colocals.data(), colocals.size()); - for (uint32_t j = 0; j < colocals.size(); j++) + for (uint32_t j = 0; j < colocals.size(); j++) { m_nextColocalVertex[colocals[j]] = colocals[(j + 1) % colocals.size()]; + m_firstColocalVertex[colocals[j]] = colocals[0]; + } XA_DEBUG_ASSERT(m_nextColocalVertex[i] != UINT32_MAX); } } - void createBoundaries() - { + void createColocalsHash() { + const uint32_t vertexCount = m_positions.size(); + HashMap<Vector3> positionToVertexMap(MemTag::Default, vertexCount); + for (uint32_t i = 0; i < vertexCount; i++) + positionToVertexMap.add(m_positions[i]); + Array<uint32_t> colocals(MemTag::MeshColocals); + m_nextColocalVertex.resize(vertexCount); + m_nextColocalVertex.fillBytes(0xff); + m_firstColocalVertex.resize(vertexCount); + m_firstColocalVertex.fillBytes(0xff); + for (uint32_t i = 0; i < vertexCount; i++) { + if (m_nextColocalVertex[i] != UINT32_MAX) + continue; // Already linked. + // Find other vertices colocal to this one. + colocals.clear(); + colocals.push_back(i); // Always add this vertex. + uint32_t otherVertex = positionToVertexMap.get(m_positions[i]); + while (otherVertex != UINT32_MAX) { + if (otherVertex != i && equal(m_positions[i], m_positions[otherVertex], m_epsilon) && m_nextColocalVertex[otherVertex] == UINT32_MAX) + colocals.push_back(otherVertex); + otherVertex = positionToVertexMap.getNext(m_positions[i], otherVertex); + } + if (colocals.size() == 1) { + // No colocals for this vertex. + m_nextColocalVertex[i] = i; + m_firstColocalVertex[i] = i; + continue; + } + // Link in ascending order. + insertionSort(colocals.data(), colocals.size()); + for (uint32_t j = 0; j < colocals.size(); j++) { + m_nextColocalVertex[colocals[j]] = colocals[(j + 1) % colocals.size()]; + m_firstColocalVertex[colocals[j]] = colocals[0]; + } + XA_DEBUG_ASSERT(m_nextColocalVertex[i] != UINT32_MAX); + } + } + + void createColocals() { + if (m_epsilon <= FLT_EPSILON) + createColocalsHash(); + else + createColocalsBVH(); + } + + void createBoundaries() { const uint32_t edgeCount = m_indices.size(); const uint32_t vertexCount = m_positions.size(); m_oppositeEdges.resize(edgeCount); @@ -2578,151 +2417,54 @@ public: } } - void linkBoundaries() - { - const uint32_t edgeCount = m_indices.size(); - HashMap<uint32_t> vertexToEdgeMap(MemTag::Mesh, edgeCount); // Edge is index / 2 - for (uint32_t i = 0; i < edgeCount; i++) { - vertexToEdgeMap.add(m_indices[meshEdgeIndex0(i)]); - vertexToEdgeMap.add(m_indices[meshEdgeIndex1(i)]); - } - m_nextBoundaryEdges.resize(edgeCount); - for (uint32_t i = 0; i < edgeCount; i++) - m_nextBoundaryEdges[i] = UINT32_MAX; - uint32_t numBoundaryLoops = 0, numUnclosedBoundaries = 0; - BitArray linkedEdges(edgeCount); - linkedEdges.zeroOutMemory(); - for (;;) { - // Find the first boundary edge that hasn't been linked yet. - uint32_t firstEdge = UINT32_MAX; - for (uint32_t i = 0; i < edgeCount; i++) { - if (m_oppositeEdges[i] == UINT32_MAX && !linkedEdges.get(i)) { - firstEdge = i; - break; - } - } - if (firstEdge == UINT32_MAX) - break; - uint32_t currentEdge = firstEdge; - for (;;) { - // Find the next boundary edge. The first vertex will be the same as (or colocal to) the current edge second vertex. - const uint32_t startVertex = m_indices[meshEdgeIndex1(currentEdge)]; - uint32_t bestNextEdge = UINT32_MAX; - for (ColocalVertexIterator it(this, startVertex); !it.isDone(); it.advance()) { - uint32_t mapIndex = vertexToEdgeMap.get(it.vertex()); - while (mapIndex != UINT32_MAX) { - const uint32_t otherEdge = mapIndex / 2; // Two vertices added per edge. - if (m_oppositeEdges[otherEdge] != UINT32_MAX) - goto next; // Not a boundary edge. - if (linkedEdges.get(otherEdge)) - goto next; // Already linked. - if (m_indices[meshEdgeIndex0(otherEdge)] != it.vertex()) - goto next; // Edge contains the vertex, but it's the wrong one. - // First edge (closing the boundary loop) has the highest priority. - // Non-colocal vertex has the next highest. - if (bestNextEdge != firstEdge && (bestNextEdge == UINT32_MAX || it.vertex() == startVertex)) - bestNextEdge = otherEdge; - next: - mapIndex = vertexToEdgeMap.getNext(mapIndex); - } - } - if (bestNextEdge == UINT32_MAX) { - numUnclosedBoundaries++; - if (currentEdge == firstEdge) - linkedEdges.set(firstEdge); // Only 1 edge in this boundary "loop". - break; // Can't find a next edge. - } - m_nextBoundaryEdges[currentEdge] = bestNextEdge; - linkedEdges.set(bestNextEdge); - currentEdge = bestNextEdge; - if (currentEdge == firstEdge) { - numBoundaryLoops++; - break; // Closed the boundary loop. - } - } - } -#if XA_FIX_INTERNAL_BOUNDARY_LOOPS - // Find internal boundary loops and separate them. - // Detect by finding two edges in a boundary loop that have a colocal end vertex. - // Fix by swapping their next boundary edge. - // Need to start over after every fix since known boundary loops have changed. - Array<uint32_t> boundaryLoops; - fixInternalBoundary: - meshGetBoundaryLoops(*this, boundaryLoops); - for (uint32_t loop = 0; loop < boundaryLoops.size(); loop++) { - linkedEdges.zeroOutMemory(); - for (Mesh::BoundaryLoopEdgeIterator it1(this, boundaryLoops[loop]); !it1.isDone(); it1.advance()) { - const uint32_t e1 = it1.edge(); - if (linkedEdges.get(e1)) - continue; - for (Mesh::BoundaryLoopEdgeIterator it2(this, boundaryLoops[loop]); !it2.isDone(); it2.advance()) { - const uint32_t e2 = it2.edge(); - if (e1 == e2 || !isBoundaryEdge(e2) || linkedEdges.get(e2)) - continue; - if (!areColocal(m_indices[meshEdgeIndex1(e1)], m_indices[meshEdgeIndex1(e2)])) - continue; - swap(m_nextBoundaryEdges[e1], m_nextBoundaryEdges[e2]); - linkedEdges.set(e1); - linkedEdges.set(e2); - goto fixInternalBoundary; // start over - } - } - } -#endif - } - /// Find edge, test all colocals. - uint32_t findEdge(uint32_t vertex0, uint32_t vertex1) const - { - uint32_t result = UINT32_MAX; - if (m_nextColocalVertex.isEmpty()) { + uint32_t findEdge(uint32_t vertex0, uint32_t vertex1) const { + // Try to find exact vertex match first. + { EdgeKey key(vertex0, vertex1); uint32_t edge = m_edgeMap.get(key); while (edge != UINT32_MAX) { // Don't find edges of ignored faces. - if (!isFaceIgnored(meshEdgeFace(edge))) { - //XA_DEBUG_ASSERT(m_id != UINT32_MAX || (m_id == UINT32_MAX && result == UINT32_MAX)); // duplicate edge - ignore on initial meshes - result = edge; -#if !XA_DEBUG - return result; -#endif - } - edge = m_edgeMap.getNext(edge); + if (!isFaceIgnored(meshEdgeFace(edge))) + return edge; + edge = m_edgeMap.getNext(key, edge); } - } else { - for (ColocalVertexIterator it0(this, vertex0); !it0.isDone(); it0.advance()) { - for (ColocalVertexIterator it1(this, vertex1); !it1.isDone(); it1.advance()) { - EdgeKey key(it0.vertex(), it1.vertex()); + } + // If colocals were created, try every permutation. + if (!m_nextColocalVertex.isEmpty()) { + uint32_t colocalVertex0 = vertex0; + for (;;) { + uint32_t colocalVertex1 = vertex1; + for (;;) { + EdgeKey key(colocalVertex0, colocalVertex1); uint32_t edge = m_edgeMap.get(key); while (edge != UINT32_MAX) { // Don't find edges of ignored faces. - if (!isFaceIgnored(meshEdgeFace(edge))) { - XA_DEBUG_ASSERT(m_id != UINT32_MAX || (m_id == UINT32_MAX && result == UINT32_MAX)); // duplicate edge - ignore on initial meshes - result = edge; -#if !XA_DEBUG - return result; -#endif - } - edge = m_edgeMap.getNext(edge); + if (!isFaceIgnored(meshEdgeFace(edge))) + return edge; + edge = m_edgeMap.getNext(key, edge); } + colocalVertex1 = m_nextColocalVertex[colocalVertex1]; + if (colocalVertex1 == vertex1) + break; // Back to start. } + colocalVertex0 = m_nextColocalVertex[colocalVertex0]; + if (colocalVertex0 == vertex0) + break; // Back to start. } } - return result; + return UINT32_MAX; } // Edge map can be destroyed when no longer used to reduce memory usage. It's used by: // * Mesh::createBoundaries() - // * Mesh::ColocalEdgeIterator (used by MeshFaceGroups) - // * meshCloseHole() - void destroyEdgeMap() - { + // * Mesh::edgeMap() (used by MeshFaceGroups) + void destroyEdgeMap() { m_edgeMap.destroy(); } #if XA_DEBUG_EXPORT_OBJ - void writeObjVertices(FILE *file) const - { + void writeObjVertices(FILE *file) const { for (uint32_t i = 0; i < m_positions.size(); i++) fprintf(file, "v %g %g %g\n", m_positions[i].x, m_positions[i].y, m_positions[i].z); if (m_flags & MeshFlags::HasNormals) { @@ -2733,8 +2475,7 @@ public: fprintf(file, "vt %g %g\n", m_texcoords[i].x, m_texcoords[i].y); } - void writeObjFace(FILE *file, uint32_t face, uint32_t offset = 0) const - { + void writeObjFace(FILE *file, uint32_t face, uint32_t offset = 0) const { fprintf(file, "f "); for (uint32_t j = 0; j < 3; j++) { const uint32_t index = m_indices[face * 3 + j] + 1 + offset; // 1-indexed @@ -2742,8 +2483,7 @@ public: } } - void writeObjBoundaryEges(FILE *file) const - { + void writeObjBoundaryEges(FILE *file) const { if (m_oppositeEdges.isEmpty()) return; // Boundaries haven't been created. fprintf(file, "o boundary_edges\n"); @@ -2754,31 +2494,7 @@ public: } } - void writeObjLinkedBoundaries(FILE *file) const - { - if (m_oppositeEdges.isEmpty() || m_nextBoundaryEdges.isEmpty()) - return; // Boundaries haven't been created and/or linked. - Array<uint32_t> boundaryLoops; - meshGetBoundaryLoops(*this, boundaryLoops); - for (uint32_t i = 0; i < boundaryLoops.size(); i++) { - uint32_t edge = boundaryLoops[i]; - fprintf(file, "o boundary_%04d\n", i); - fprintf(file, "l"); - for (;;) { - const uint32_t vertex0 = m_indices[meshEdgeIndex0(edge)]; - const uint32_t vertex1 = m_indices[meshEdgeIndex1(edge)]; - fprintf(file, " %d", vertex0 + 1); // 1-indexed - edge = m_nextBoundaryEdges[edge]; - if (edge == boundaryLoops[i] || edge == UINT32_MAX) { - fprintf(file, " %d\n", vertex1 + 1); // 1-indexed - break; - } - } - } - } - - void writeObjFile(const char *filename) const - { + void writeObjFile(const char *filename) const { FILE *file; XA_FOPEN(file, filename, "w"); if (!file) @@ -2789,13 +2505,11 @@ public: for (uint32_t i = 0; i < faceCount(); i++) writeObjFace(file, i); writeObjBoundaryEges(file); - writeObjLinkedBoundaries(file); fclose(file); } #endif - float computeSurfaceArea() const - { + float computeSurfaceArea() const { float area = 0; for (uint32_t f = 0; f < faceCount(); f++) area += computeFaceArea(f); @@ -2804,24 +2518,21 @@ public: } // Returned value is always positive, even if some triangles are flipped. - float computeParametricArea() const - { + float computeParametricArea() const { float area = 0; for (uint32_t f = 0; f < faceCount(); f++) area += fabsf(computeFaceParametricArea(f)); // May be negative, depends on texcoord winding. - return area; + return area; } - float computeFaceArea(uint32_t face) const - { + float computeFaceArea(uint32_t face) const { const Vector3 &p0 = m_positions[m_indices[face * 3 + 0]]; const Vector3 &p1 = m_positions[m_indices[face * 3 + 1]]; const Vector3 &p2 = m_positions[m_indices[face * 3 + 2]]; return length(cross(p1 - p0, p2 - p0)) * 0.5f; } - Vector3 computeFaceCentroid(uint32_t face) const - { + Vector3 computeFaceCentroid(uint32_t face) const { Vector3 sum(0.0f); for (uint32_t i = 0; i < 3; i++) sum += m_positions[m_indices[face * 3 + i]]; @@ -2830,8 +2541,7 @@ public: // Average of the edge midpoints weighted by the edge length. // I want a point inside the triangle, but closer to the cirumcenter. - Vector3 computeFaceCenter(uint32_t face) const - { + Vector3 computeFaceCenter(uint32_t face) const { const Vector3 &p0 = m_positions[m_indices[face * 3 + 0]]; const Vector3 &p1 = m_positions[m_indices[face * 3 + 1]]; const Vector3 &p2 = m_positions[m_indices[face * 3 + 2]]; @@ -2844,28 +2554,25 @@ public: return m0 + m1 + m2; } - Vector3 computeFaceNormal(uint32_t face) const - { + Vector3 computeFaceNormal(uint32_t face) const { const Vector3 &p0 = m_positions[m_indices[face * 3 + 0]]; const Vector3 &p1 = m_positions[m_indices[face * 3 + 1]]; const Vector3 &p2 = m_positions[m_indices[face * 3 + 2]]; const Vector3 e0 = p2 - p0; const Vector3 e1 = p1 - p0; const Vector3 normalAreaScaled = cross(e0, e1); - return normalizeSafe(normalAreaScaled, Vector3(0, 0, 1), 0.0f); + return normalizeSafe(normalAreaScaled, Vector3(0, 0, 1)); } - float computeFaceParametricArea(uint32_t face) const - { + float computeFaceParametricArea(uint32_t face) const { const Vector2 &t0 = m_texcoords[m_indices[face * 3 + 0]]; const Vector2 &t1 = m_texcoords[m_indices[face * 3 + 1]]; const Vector2 &t2 = m_texcoords[m_indices[face * 3 + 2]]; return triangleArea(t0, t1, t2); } - + // @@ This is not exactly accurate, we should compare the texture coordinates... - bool isSeam(uint32_t edge) const - { + bool isSeam(uint32_t edge) const { const uint32_t oppositeEdge = m_oppositeEdges[edge]; if (oppositeEdge == UINT32_MAX) return false; // boundary edge @@ -2876,8 +2583,7 @@ public: return m_indices[e0] != m_indices[oe1] || m_indices[e1] != m_indices[oe0]; } - bool isTextureSeam(uint32_t edge) const - { + bool isTextureSeam(uint32_t edge) const { const uint32_t oppositeEdge = m_oppositeEdges[edge]; if (oppositeEdge == UINT32_MAX) return false; // boundary edge @@ -2888,26 +2594,9 @@ public: return m_texcoords[m_indices[e0]] != m_texcoords[m_indices[oe1]] || m_texcoords[m_indices[e1]] != m_texcoords[m_indices[oe0]]; } - uint32_t firstColocal(uint32_t vertex) const - { - for (ColocalVertexIterator it(this, vertex); !it.isDone(); it.advance()) { - if (it.vertex() < vertex) - vertex = it.vertex(); - } - return vertex; - } - - bool areColocal(uint32_t vertex0, uint32_t vertex1) const - { - if (vertex0 == vertex1) - return true; - if (m_nextColocalVertex.isEmpty()) - return false; - for (ColocalVertexIterator it(this, vertex0); !it.isDone(); it.advance()) { - if (it.vertex() == vertex1) - return true; - } - return false; + uint32_t firstColocalVertex(uint32_t vertex) const { + XA_DEBUG_ASSERT(m_firstColocalVertex.size() == m_positions.size()); + return m_firstColocalVertex[vertex]; } XA_INLINE float epsilon() const { return m_epsilon; } @@ -2919,23 +2608,28 @@ public: XA_INLINE uint32_t vertexCount() const { return m_positions.size(); } XA_INLINE uint32_t vertexAt(uint32_t i) const { return m_indices[i]; } XA_INLINE const Vector3 &position(uint32_t vertex) const { return m_positions[vertex]; } - XA_INLINE const Vector3 *positions() const { return m_positions.data(); } - XA_INLINE const Vector3 &normal(uint32_t vertex) const { XA_DEBUG_ASSERT(m_flags & MeshFlags::HasNormals); return m_normals[vertex]; } + XA_INLINE ConstArrayView<Vector3> positions() const { return m_positions; } + XA_INLINE const Vector3 &normal(uint32_t vertex) const { + XA_DEBUG_ASSERT(m_flags & MeshFlags::HasNormals); + return m_normals[vertex]; + } XA_INLINE const Vector2 &texcoord(uint32_t vertex) const { return m_texcoords[vertex]; } XA_INLINE Vector2 &texcoord(uint32_t vertex) { return m_texcoords[vertex]; } - XA_INLINE const Vector2 *texcoords() const { return m_texcoords.data(); } - XA_INLINE Vector2 *texcoords() { return m_texcoords.data(); } + XA_INLINE const ConstArrayView<Vector2> texcoords() const { return m_texcoords; } + XA_INLINE ArrayView<Vector2> texcoords() { return m_texcoords; } XA_INLINE uint32_t faceCount() const { return m_indices.size() / 3; } - XA_INLINE const uint32_t *indices() const { return m_indices.data(); } + XA_INLINE ConstArrayView<uint32_t> indices() const { return m_indices; } XA_INLINE uint32_t indexCount() const { return m_indices.size(); } XA_INLINE bool isFaceIgnored(uint32_t face) const { return (m_flags & MeshFlags::HasIgnoredFaces) && m_faceIgnore[face]; } + XA_INLINE uint32_t faceMaterial(uint32_t face) const { return (m_flags & MeshFlags::HasMaterials) ? m_faceMaterials[face] : UINT32_MAX; } + XA_INLINE const HashMap<EdgeKey, EdgeHash> &edgeMap() const { return m_edgeMap; } private: - float m_epsilon; uint32_t m_flags; uint32_t m_id; Array<bool> m_faceIgnore; + Array<uint32_t> m_faceMaterials; Array<uint32_t> m_indices; Array<Vector3> m_positions; Array<Vector3> m_normals; @@ -2943,205 +2637,31 @@ private: // Populated by createColocals Array<uint32_t> m_nextColocalVertex; // In: vertex index. Out: the vertex index of the next colocal position. + Array<uint32_t> m_firstColocalVertex; // Populated by createBoundaries BitArray m_isBoundaryVertex; Array<uint32_t> m_boundaryEdges; Array<uint32_t> m_oppositeEdges; // In: edge index. Out: the index of the opposite edge (i.e. wound the opposite direction). UINT32_MAX if the input edge is a boundary edge. - // Populated by linkBoundaries - Array<uint32_t> m_nextBoundaryEdges; // The index of the next boundary edge. UINT32_MAX if the edge is not a boundary edge. - - struct EdgeKey - { - EdgeKey(const EdgeKey &k) : v0(k.v0), v1(k.v1) {} - EdgeKey(uint32_t v0, uint32_t v1) : v0(v0), v1(v1) {} - bool operator==(const EdgeKey &k) const { return v0 == k.v0 && v1 == k.v1; } - - uint32_t v0; - uint32_t v1; - }; - - struct EdgeHash - { - uint32_t operator()(const EdgeKey &k) const { return k.v0 * 32768u + k.v1; } - }; - HashMap<EdgeKey, EdgeHash> m_edgeMap; public: - class BoundaryLoopEdgeIterator - { + class FaceEdgeIterator { public: - BoundaryLoopEdgeIterator(const Mesh *mesh, uint32_t edge) : m_mesh(mesh), m_first(UINT32_MAX), m_current(edge) {} - - void advance() - { - if (m_first == UINT32_MAX) - m_first = m_current; - m_current = m_mesh->m_nextBoundaryEdges[m_current]; - } - - bool isDone() const - { - return m_first == m_current || m_current == UINT32_MAX; - } - - uint32_t edge() const - { - return m_current; - } - - uint32_t nextEdge() const - { - return m_mesh->m_nextBoundaryEdges[m_current]; - } - - private: - const Mesh *m_mesh; - uint32_t m_first; - uint32_t m_current; - }; - - class ColocalVertexIterator - { - public: - ColocalVertexIterator(const Mesh *mesh, uint32_t v) : m_mesh(mesh), m_first(UINT32_MAX), m_current(v) {} - - void advance() - { - if (m_first == UINT32_MAX) - m_first = m_current; - if (!m_mesh->m_nextColocalVertex.isEmpty()) - m_current = m_mesh->m_nextColocalVertex[m_current]; - } - - bool isDone() const - { - return m_first == m_current; - } - - uint32_t vertex() const - { - return m_current; - } - - const Vector3 *pos() const - { - return &m_mesh->m_positions[m_current]; - } - - private: - const Mesh *m_mesh; - uint32_t m_first; - uint32_t m_current; - }; - - class ColocalEdgeIterator - { - public: - ColocalEdgeIterator(const Mesh *mesh, uint32_t vertex0, uint32_t vertex1) : m_mesh(mesh), m_vertex0It(mesh, vertex0), m_vertex1It(mesh, vertex1), m_vertex1(vertex1) - { - do { - if (!resetElement()) { - advanceVertex1(); - } - else { - break; - } - } while (!isDone()); - } - - void advance() - { - advanceElement(); - } - - bool isDone() const - { - return m_vertex0It.isDone() && m_vertex1It.isDone() && m_edge == UINT32_MAX; - } - - uint32_t edge() const - { - return m_edge; - } - - private: - bool resetElement() - { - m_edge = m_mesh->m_edgeMap.get(Mesh::EdgeKey(m_vertex0It.vertex(), m_vertex1It.vertex())); - while (m_edge != UINT32_MAX) { - if (!isIgnoredFace()) - break; - m_edge = m_mesh->m_edgeMap.getNext(m_edge); - } - if (m_edge == UINT32_MAX) { - return false; - } - return true; - } - - void advanceElement() - { - for (;;) { - m_edge = m_mesh->m_edgeMap.getNext(m_edge); - if (m_edge == UINT32_MAX) - break; - if (!isIgnoredFace()) - break; - } - if (m_edge == UINT32_MAX) - advanceVertex1(); - } - - void advanceVertex1() - { - auto successful = false; - while (!successful) { - m_vertex1It.advance(); - if (m_vertex1It.isDone()) { - if (!m_vertex0It.isDone()) { - m_vertex0It.advance(); - m_vertex1It = ColocalVertexIterator(m_mesh, m_vertex1); - } - else { - return; - } - } - successful = resetElement(); - } - } - - bool isIgnoredFace() const - { - return m_mesh->m_faceIgnore[meshEdgeFace(m_edge)]; - } - - const Mesh *m_mesh; - ColocalVertexIterator m_vertex0It, m_vertex1It; - const uint32_t m_vertex1; - uint32_t m_edge; - }; - - class FaceEdgeIterator - { - public: - FaceEdgeIterator (const Mesh *mesh, uint32_t face) : m_mesh(mesh), m_face(face), m_relativeEdge(0) - { + FaceEdgeIterator(const Mesh *mesh, uint32_t face) : + m_mesh(mesh), m_face(face), m_relativeEdge(0) { m_edge = m_face * 3; } - void advance() - { + void advance() { if (m_relativeEdge < 3) { m_edge++; m_relativeEdge++; } } - bool isDone() const - { + bool isDone() const { return m_relativeEdge == 3; } @@ -3152,9 +2672,8 @@ public: uint32_t relativeEdge() const { return m_relativeEdge; } uint32_t face() const { return m_face; } uint32_t oppositeEdge() const { return m_mesh->m_oppositeEdges[m_edge]; } - - uint32_t oppositeFace() const - { + + uint32_t oppositeFace() const { const uint32_t oedge = m_mesh->m_oppositeEdges[m_edge]; if (oedge == UINT32_MAX) return UINT32_MAX; @@ -3178,19 +2697,18 @@ public: }; }; -struct MeshFaceGroups -{ +struct MeshFaceGroups { typedef uint32_t Handle; static constexpr Handle kInvalid = UINT32_MAX; - MeshFaceGroups(const Mesh *mesh) : m_mesh(mesh), m_groups(MemTag::Mesh), m_firstFace(MemTag::Mesh), m_nextFace(MemTag::Mesh), m_faceCount(MemTag::Mesh) {} + MeshFaceGroups(const Mesh *mesh) : + m_mesh(mesh), m_groups(MemTag::Mesh), m_firstFace(MemTag::Mesh), m_nextFace(MemTag::Mesh), m_faceCount(MemTag::Mesh) {} XA_INLINE Handle groupAt(uint32_t face) const { return m_groups[face]; } XA_INLINE uint32_t groupCount() const { return m_faceCount.size(); } XA_INLINE uint32_t nextFace(uint32_t face) const { return m_nextFace[face]; } XA_INLINE uint32_t faceCount(uint32_t group) const { return m_faceCount[group]; } - void compute() - { + void compute() { m_groups.resize(m_mesh->faceCount()); m_groups.fillBytes(0xff); // Set all faces to kInvalid uint32_t firstUnassignedFace = 0; @@ -3222,57 +2740,25 @@ struct MeshFaceGroups break; const uint32_t f = growFaces.back(); growFaces.pop_back(); + const uint32_t material = m_mesh->faceMaterial(f); for (Mesh::FaceEdgeIterator edgeIt(m_mesh, f); !edgeIt.isDone(); edgeIt.advance()) { - // Iterate opposite edges. There may be more than one - non-manifold geometry can have duplicate edges. - // Prioritize the one with exact vertex match, not just colocal. - // If *any* of the opposite edges are already assigned to this group, don't do anything. - bool alreadyAssignedToThisGroup = false; - uint32_t bestConnectedFace = UINT32_MAX; - for (Mesh::ColocalEdgeIterator oppositeEdgeIt(m_mesh, edgeIt.vertex1(), edgeIt.vertex0()); !oppositeEdgeIt.isDone(); oppositeEdgeIt.advance()) { - const uint32_t oppositeEdge = oppositeEdgeIt.edge(); - const uint32_t oppositeFace = meshEdgeFace(oppositeEdge); -#if 0 - // Reject opposite face if dihedral angle >= 90 degrees. - { - Vector3 a = m_mesh->computeFaceNormal(f); - Vector3 b = m_mesh->computeFaceNormal(oppositeFace); - if (dot(a, b) <= 0.0f) - continue; - } -#endif - if (m_mesh->isFaceIgnored(oppositeFace)) - continue; // Don't add ignored faces to group. - if (m_groups[oppositeFace] == group) { - alreadyAssignedToThisGroup = true; - break; - } - if (m_groups[oppositeFace] != kInvalid) - continue; // Connected face is already assigned to another group. - if (faceDuplicatesGroupEdge(group, oppositeFace)) - continue; // Don't want duplicate edges in a group. - const uint32_t oppositeVertex0 = m_mesh->vertexAt(meshEdgeIndex0(oppositeEdge)); - const uint32_t oppositeVertex1 = m_mesh->vertexAt(meshEdgeIndex1(oppositeEdge)); - if (bestConnectedFace == UINT32_MAX || (oppositeVertex0 == edgeIt.vertex1() && oppositeVertex1 == edgeIt.vertex0())) - bestConnectedFace = oppositeFace; -#if 0 - else { - // Choose the opposite face with the smallest dihedral angle. - const float d1 = 1.0f - dot(computeFaceNormal(f), computeFaceNormal(bestConnectedFace)); - const float d2 = 1.0f - dot(computeFaceNormal(f), computeFaceNormal(oppositeFace)); - if (d2 < d1) - bestConnectedFace = oppositeFace; - } -#endif - } - if (!alreadyAssignedToThisGroup && bestConnectedFace != UINT32_MAX) { - m_groups[bestConnectedFace] = group; - m_nextFace[bestConnectedFace] = UINT32_MAX; - if (prevFace != UINT32_MAX) - m_nextFace[prevFace] = bestConnectedFace; - prevFace = bestConnectedFace; - groupFaceCount++; - growFaces.push_back(bestConnectedFace); - } + const uint32_t oppositeEdge = m_mesh->findEdge(edgeIt.vertex1(), edgeIt.vertex0()); + if (oppositeEdge == UINT32_MAX) + continue; // Boundary edge. + const uint32_t oppositeFace = meshEdgeFace(oppositeEdge); + if (m_mesh->isFaceIgnored(oppositeFace)) + continue; // Don't add ignored faces to group. + if (m_mesh->faceMaterial(oppositeFace) != material) + continue; // Different material. + if (m_groups[oppositeFace] != kInvalid) + continue; // Connected face is already assigned to another group. + m_groups[oppositeFace] = group; + m_nextFace[oppositeFace] = UINT32_MAX; + if (prevFace != UINT32_MAX) + m_nextFace[prevFace] = oppositeFace; + prevFace = oppositeFace; + groupFaceCount++; + growFaces.push_back(oppositeFace); } } m_faceCount.push_back(groupFaceCount); @@ -3281,27 +2767,23 @@ struct MeshFaceGroups } } - class Iterator - { + class Iterator { public: - Iterator(const MeshFaceGroups *meshFaceGroups, Handle group) : m_meshFaceGroups(meshFaceGroups) - { + Iterator(const MeshFaceGroups *meshFaceGroups, Handle group) : + m_meshFaceGroups(meshFaceGroups) { XA_DEBUG_ASSERT(group != kInvalid); m_current = m_meshFaceGroups->m_firstFace[group]; } - void advance() - { + void advance() { m_current = m_meshFaceGroups->m_nextFace[m_current]; } - bool isDone() const - { + bool isDone() const { return m_current == UINT32_MAX; } - uint32_t face() const - { + uint32_t face() const { return m_current; } @@ -3311,18 +2793,6 @@ struct MeshFaceGroups }; private: - // Check if the face duplicates any edges of any face already in the group. - bool faceDuplicatesGroupEdge(Handle group, uint32_t face) const - { - for (Mesh::FaceEdgeIterator edgeIt(m_mesh, face); !edgeIt.isDone(); edgeIt.advance()) { - for (Mesh::ColocalEdgeIterator colocalEdgeIt(m_mesh, edgeIt.vertex0(), edgeIt.vertex1()); !colocalEdgeIt.isDone(); colocalEdgeIt.advance()) { - if (m_groups[meshEdgeFace(colocalEdgeIt.edge())] == group) - return true; - } - } - return false; - } - const Mesh *m_mesh; Array<Handle> m_groups; Array<uint32_t> m_firstFace; @@ -3332,243 +2802,27 @@ private: constexpr MeshFaceGroups::Handle MeshFaceGroups::kInvalid; -static bool meshCloseHole(Mesh *mesh, const Array<uint32_t> &holeVertices, const Vector3 &normal) -{ -#if XA_CLOSE_HOLES_CHECK_EDGE_INTERSECTION - const uint32_t faceCount = mesh->faceCount(); -#endif - const bool compareNormal = equal(normal, Vector3(0.0f), FLT_EPSILON); - uint32_t frontCount = holeVertices.size(); - Array<uint32_t> frontVertices; - Array<Vector3> frontPoints; - Array<float> frontAngles; - frontVertices.resize(frontCount); - frontPoints.resize(frontCount); - for (uint32_t i = 0; i < frontCount; i++) { - frontVertices[i] = holeVertices[i]; - frontPoints[i] = mesh->position(frontVertices[i]); - } - while (frontCount >= 3) { - frontAngles.resize(frontCount); - float smallestAngle = kPi2, smallestAngleIgnoringNormal = kPi2; - uint32_t smallestAngleIndex = UINT32_MAX, smallestAngleIndexIgnoringNormal = UINT32_MAX; - for (uint32_t i = 0; i < frontCount; i++) { - const uint32_t i1 = i == 0 ? frontCount - 1 : i - 1; - const uint32_t i2 = i; - const uint32_t i3 = (i + 1) % frontCount; - const Vector3 edge1 = frontPoints[i1] - frontPoints[i2]; - const Vector3 edge2 = frontPoints[i3] - frontPoints[i2]; - frontAngles[i] = atan2f(length(cross(edge1, edge2)), dot(edge1, edge2)); - if (frontAngles[i] >= smallestAngle || isNan(frontAngles[i])) - continue; - // Don't duplicate edges. - if (mesh->findEdge(frontVertices[i1], frontVertices[i2]) != UINT32_MAX) - continue; - if (mesh->findEdge(frontVertices[i2], frontVertices[i3]) != UINT32_MAX) - continue; - if (mesh->findEdge(frontVertices[i3], frontVertices[i1]) != UINT32_MAX) - continue; - /* - Make sure he new edge that would be formed by (i3, i1) doesn't intersect any vertices. This often happens when fixing t-junctions. - - i2 - * - / \ - / \ - i1 *--*--* i3 - \ | / - \|/ - * - */ - bool intersection = false; - for (uint32_t j = 0; j < frontCount; j++) { - if (j == i1 || j == i2 || j == i3) - continue; - if (lineIntersectsPoint(frontPoints[j], frontPoints[i3], frontPoints[i1], nullptr, mesh->epsilon())) { - intersection = true; - break; - } - } - if (intersection) - continue; - // Don't add the triangle if a boundary point lies on the same plane as the triangle, and is inside it. - intersection = false; - const Plane plane(frontPoints[i1], frontPoints[i2], frontPoints[i3]); - for (uint32_t j = 0; j < frontCount; j++) { - if (j == i1 || j == i2 || j == i3) - continue; - if (!isZero(plane.distance(frontPoints[j]), mesh->epsilon())) - continue; - if (pointInTriangle(frontPoints[j], frontPoints[i1], frontPoints[i2], frontPoints[i3])) { - intersection = true; - break; - } - } - if (intersection) - continue; -#if XA_CLOSE_HOLES_CHECK_EDGE_INTERSECTION - // Don't add the triangle if the new edge (i3, i1), intersects any other triangle that isn't part of the filled hole. - intersection = false; - const Vector3 newEdgeVector = frontPoints[i1] - frontPoints[i3]; - for (uint32_t f = 0; f < faceCount; f++) { - Vector3 tri[3]; - for (uint32_t j = 0; j < 3; j++) - tri[j] = mesh->position(mesh->vertexAt(f * 3 + j)); - float t; - if (rayIntersectsTriangle(frontPoints[i3], newEdgeVector, tri, &t)) { - intersection = true; - break; - } - } - if (intersection) - continue; -#endif - // Skip backwards facing triangles. - if (compareNormal) { - if (frontAngles[i] < smallestAngleIgnoringNormal) { - smallestAngleIgnoringNormal = frontAngles[i]; - smallestAngleIndexIgnoringNormal = i; - } - const Vector3 e0 = frontPoints[i3] - frontPoints[i1]; - const Vector3 e1 = frontPoints[i2] - frontPoints[i1]; - const Vector3 triNormal = normalizeSafe(cross(e0, e1), Vector3(0.0f), mesh->epsilon()); - if (dot(normal, triNormal) <= 0.0f) - continue; - } - smallestAngle = smallestAngleIgnoringNormal = frontAngles[i]; - smallestAngleIndex = smallestAngleIndexIgnoringNormal = i; - } - // Closing holes failed if we don't have a smallest angle. - // Fallback to ignoring the backwards facing normal test if possible. - if (smallestAngleIndex == UINT32_MAX || smallestAngle <= 0.0f || smallestAngle >= kPi) { - if (smallestAngleIgnoringNormal == UINT32_MAX || smallestAngleIgnoringNormal <= 0.0f || smallestAngleIgnoringNormal >= kPi) - return false; - else - smallestAngleIndex = smallestAngleIndexIgnoringNormal; - } - const uint32_t i1 = smallestAngleIndex == 0 ? frontCount - 1 : smallestAngleIndex - 1; - const uint32_t i2 = smallestAngleIndex; - const uint32_t i3 = (smallestAngleIndex + 1) % frontCount; - const Mesh::AddFaceResult::Enum result = mesh->addFace(frontVertices[i1], frontVertices[i2], frontVertices[i3]); - XA_DEBUG_ASSERT(result == Mesh::AddFaceResult::OK); // Shouldn't happen due to the findEdge calls above. - XA_UNUSED(result); - frontVertices.removeAt(i2); - frontPoints.removeAt(i2); - frontCount = frontVertices.size(); - } - return true; -} - -static bool meshCloseHoles(Mesh *mesh, const Array<uint32_t> &boundaryLoops, const Vector3 &normal, uint32_t *holeCount, Array<uint32_t> *holeFaceCounts) -{ - if (holeFaceCounts) - holeFaceCounts->clear(); - // Compute lengths. - const uint32_t boundaryCount = boundaryLoops.size(); - Array<float> boundaryLengths; - Array<uint32_t> boundaryEdgeCounts; - boundaryEdgeCounts.resize(boundaryCount); - for (uint32_t i = 0; i < boundaryCount; i++) { - float boundaryLength = 0.0f; - boundaryEdgeCounts[i] = 0; - for (Mesh::BoundaryLoopEdgeIterator it(mesh, boundaryLoops[i]); !it.isDone(); it.advance()) { - const Vector3 &t0 = mesh->position(mesh->vertexAt(meshEdgeIndex0(it.edge()))); - const Vector3 &t1 = mesh->position(mesh->vertexAt(meshEdgeIndex1(it.edge()))); - boundaryLength += length(t1 - t0); - boundaryEdgeCounts[i]++; - } - boundaryLengths.push_back(boundaryLength); - } - // Find disk boundary. - uint32_t diskBoundary = 0; - float maxLength = boundaryLengths[0]; - for (uint32_t i = 1; i < boundaryCount; i++) { - if (boundaryLengths[i] > maxLength) { - maxLength = boundaryLengths[i]; - diskBoundary = i; - } - } - // Close holes. - Array<uint32_t> holeVertices; - Array<Vector3> holePoints; - bool result = true; - for (uint32_t i = 0; i < boundaryCount; i++) { - if (diskBoundary == i) - continue; // Skip disk boundary. - holeVertices.resize(boundaryEdgeCounts[i]); - holePoints.resize(boundaryEdgeCounts[i]); - // Winding is backwards for internal boundaries. - uint32_t e = 0; - for (Mesh::BoundaryLoopEdgeIterator it(mesh, boundaryLoops[i]); !it.isDone(); it.advance()) { - const uint32_t vertex = mesh->vertexAt(meshEdgeIndex0(it.edge())); - holeVertices[boundaryEdgeCounts[i] - 1 - e] = vertex; - holePoints[boundaryEdgeCounts[i] - 1 - e] = mesh->position(vertex); - e++; - } - const uint32_t oldFaceCount = mesh->faceCount(); - if (!meshCloseHole(mesh, holeVertices, normal)) - result = false; // Return false if any hole failed to close, but keep trying to close other holes. - if (holeCount) - (*holeCount)++; - if (holeFaceCounts) - holeFaceCounts->push_back(mesh->faceCount() - oldFaceCount); - } - return result; -} - -static bool meshIsPlanar(const Mesh &mesh) -{ - const Vector3 p1 = mesh.position(mesh.vertexAt(0)); - const Vector3 p2 = mesh.position(mesh.vertexAt(1)); - const Vector3 p3 = mesh.position(mesh.vertexAt(2)); - const Plane plane(p1, p2, p3); - const uint32_t vertexCount = mesh.vertexCount(); - for (uint32_t v = 0; v < vertexCount; v++) { - const float d = plane.distance(mesh.position(v)); - if (!isZero(d, mesh.epsilon())) - return false; - } - return true; -} - -/* -Fixing T-junctions. - -- Find T-junctions. Find vertices that are on an edge. -- This test is approximate. -- Insert edges on a spatial index to speedup queries. -- Consider only open edges, that is edges that have no pairs. -- Consider only vertices on boundaries. -- Close T-junction. -- Split edge. - -*/ -struct SplitEdge -{ - uint32_t edge; - float t; - uint32_t vertex; - - bool operator<(const SplitEdge &other) const - { - if (edge < other.edge) - return true; - else if (edge == other.edge) { - if (t < other.t) - return true; - } +#if XA_CHECK_T_JUNCTIONS +static bool lineIntersectsPoint(const Vector3 &point, const Vector3 &lineStart, const Vector3 &lineEnd, float *t, float epsilon) { + float tt; + if (!t) + t = &tt; + *t = 0.0f; + if (equal(lineStart, point, epsilon) || equal(lineEnd, point, epsilon)) + return false; // Vertex lies on either line vertices. + const Vector3 v01 = point - lineStart; + const Vector3 v21 = lineEnd - lineStart; + const float l = length(v21); + const float d = length(cross(v01, v21)) / l; + if (!isZero(d, epsilon)) return false; - } -}; + *t = dot(v01, v21) / (l * l); + return *t > kEpsilon && *t < 1.0f - kEpsilon; +} -// Returns nullptr if there were no t-junctions to fix. -static Mesh *meshFixTJunctions(const Mesh &inputMesh, bool *duplicatedEdge, bool *failed, uint32_t *fixedTJunctionsCount) -{ - if (duplicatedEdge) - *duplicatedEdge = false; - if (failed) - *failed = false; - Array<SplitEdge> splitEdges; +// Returns the number of T-junctions found. +static int meshCheckTJunctions(const Mesh &inputMesh) { + int count = 0; const uint32_t vertexCount = inputMesh.vertexCount(); const uint32_t edgeCount = inputMesh.edgeCount(); for (uint32_t v = 0; v < vertexCount; v++) { @@ -3582,155 +2836,130 @@ static Mesh *meshFixTJunctions(const Mesh &inputMesh, bool *duplicatedEdge, bool const Vector3 &edgePos1 = inputMesh.position(inputMesh.vertexAt(meshEdgeIndex0(e))); const Vector3 &edgePos2 = inputMesh.position(inputMesh.vertexAt(meshEdgeIndex1(e))); float t; - if (!lineIntersectsPoint(pos, edgePos1, edgePos2, &t, inputMesh.epsilon())) - continue; - SplitEdge splitEdge; - splitEdge.edge = e; - splitEdge.t = t; - splitEdge.vertex = v; - splitEdges.push_back(splitEdge); + if (lineIntersectsPoint(pos, edgePos1, edgePos2, &t, inputMesh.epsilon())) + count++; } } - if (splitEdges.isEmpty()) - return nullptr; - const uint32_t faceCount = inputMesh.faceCount(); - Mesh *mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, inputMesh.epsilon(), vertexCount + splitEdges.size(), faceCount); - for (uint32_t v = 0; v < vertexCount; v++) - mesh->addVertex(inputMesh.position(v)); - Array<uint32_t> indexArray; - indexArray.reserve(4); - Array<SplitEdge> faceSplitEdges; - faceSplitEdges.reserve(4); - for (uint32_t f = 0; f < faceCount; f++) { - // Find t-junctions in this face. - faceSplitEdges.clear(); - for (uint32_t i = 0; i < splitEdges.size(); i++) { - if (meshEdgeFace(splitEdges[i].edge) == f) - faceSplitEdges.push_back(splitEdges[i]); - } - if (!faceSplitEdges.isEmpty()) { - // Need to split edges in winding order when a single edge has multiple t-junctions. - insertionSort(faceSplitEdges.data(), faceSplitEdges.size()); - indexArray.clear(); - for (Mesh::FaceEdgeIterator it(&inputMesh, f); !it.isDone(); it.advance()) { - indexArray.push_back(it.vertex0()); - for (uint32_t se = 0; se < faceSplitEdges.size(); se++) { - const SplitEdge &splitEdge = faceSplitEdges[se]; - if (splitEdge.edge == it.edge()) - indexArray.push_back(splitEdge.vertex); + return count; +} +#endif + +// References invalid faces and vertices in a mesh. +struct InvalidMeshGeometry { + // If meshFaceGroups is not null, invalid faces have the face group MeshFaceGroups::kInvalid. + // If meshFaceGroups is null, invalid faces are Mesh::isFaceIgnored. + void extract(const Mesh *mesh, const MeshFaceGroups *meshFaceGroups) { + // Copy invalid faces. + m_faces.clear(); + const uint32_t meshFaceCount = mesh->faceCount(); + for (uint32_t f = 0; f < meshFaceCount; f++) { + if ((meshFaceGroups && meshFaceGroups->groupAt(f) == MeshFaceGroups::kInvalid) || (!meshFaceGroups && mesh->isFaceIgnored(f))) + m_faces.push_back(f); + } + // Create *unique* list of vertices of invalid faces. + const uint32_t faceCount = m_faces.size(); + m_indices.resize(faceCount * 3); + const uint32_t approxVertexCount = min(faceCount * 3, mesh->vertexCount()); + m_vertexToSourceVertexMap.clear(); + m_vertexToSourceVertexMap.reserve(approxVertexCount); + HashMap<uint32_t, PassthroughHash<uint32_t>> sourceVertexToVertexMap(MemTag::Mesh, approxVertexCount); + for (uint32_t f = 0; f < faceCount; f++) { + const uint32_t face = m_faces[f]; + for (uint32_t i = 0; i < 3; i++) { + const uint32_t vertex = mesh->vertexAt(face * 3 + i); + uint32_t newVertex = sourceVertexToVertexMap.get(vertex); + if (newVertex == UINT32_MAX) { + newVertex = sourceVertexToVertexMap.add(vertex); + m_vertexToSourceVertexMap.push_back(vertex); } - } - if (!meshCloseHole(mesh, indexArray, Vector3(0.0f))) { - if (failed) - *failed = true; - } - } else { - // No t-junctions in this face. Copy from input mesh. - if (mesh->addFace(&inputMesh.indices()[f * 3]) == Mesh::AddFaceResult::DuplicateEdge) { - if (duplicatedEdge) - *duplicatedEdge = true; + m_indices[f * 3 + i] = newVertex; } } } - if (fixedTJunctionsCount) - *fixedTJunctionsCount = splitEdges.size(); - return mesh; -} -// boundaryLoops are the first edges for each boundary loop. -static void meshGetBoundaryLoops(const Mesh &mesh, Array<uint32_t> &boundaryLoops) -{ - const uint32_t edgeCount = mesh.edgeCount(); - BitArray bitFlags(edgeCount); - bitFlags.zeroOutMemory(); - boundaryLoops.clear(); - // Search for boundary edges. Mark all the edges that belong to the same boundary. - for (uint32_t e = 0; e < edgeCount; e++) { - if (bitFlags.get(e) || !mesh.isBoundaryEdge(e)) - continue; - for (Mesh::BoundaryLoopEdgeIterator it(&mesh, e); !it.isDone(); it.advance()) - bitFlags.set(it.edge()); - boundaryLoops.push_back(e); - } -} + ConstArrayView<uint32_t> faces() const { return m_faces; } + ConstArrayView<uint32_t> indices() const { return m_indices; } + ConstArrayView<uint32_t> vertices() const { return m_vertexToSourceVertexMap; } -struct Progress -{ - Progress(ProgressCategory::Enum category, ProgressFunc func, void *userData, uint32_t maxValue) : value(0), cancel(false), m_category(category), m_func(func), m_userData(userData), m_maxValue(maxValue), m_progress(0) - { +private: + Array<uint32_t> m_faces, m_indices; + Array<uint32_t> m_vertexToSourceVertexMap; // Map face vertices to vertices of the source mesh. +}; + +struct Progress { + Progress(ProgressCategory category, ProgressFunc func, void *userData, uint32_t maxValue) : + cancel(false), m_category(category), m_func(func), m_userData(userData), m_value(0), m_maxValue(maxValue), m_percent(0) { if (m_func) { if (!m_func(category, 0, userData)) cancel = true; } } - ~Progress() - { + ~Progress() { if (m_func) { if (!m_func(m_category, 100, m_userData)) cancel = true; } } - void update() - { - if (!m_func) - return; - m_mutex.lock(); - const uint32_t newProgress = uint32_t(ceilf(value.load() / (float)m_maxValue * 100.0f)); - if (newProgress != m_progress && newProgress < 100) { - m_progress = newProgress; - if (!m_func(m_category, m_progress, m_userData)) - cancel = true; - } - m_mutex.unlock(); + void increment(uint32_t value) { + m_value += value; + update(); } - void setMaxValue(uint32_t maxValue) - { - m_mutex.lock(); + void setMaxValue(uint32_t maxValue) { m_maxValue = maxValue; - m_mutex.unlock(); + update(); } - std::atomic<uint32_t> value; std::atomic<bool> cancel; private: - ProgressCategory::Enum m_category; + void update() { + if (!m_func) + return; + const uint32_t newPercent = uint32_t(ceilf(m_value.load() / (float)m_maxValue.load() * 100.0f)); + if (newPercent != m_percent) { + // Atomic max. + uint32_t oldPercent = m_percent; + while (oldPercent < newPercent && !m_percent.compare_exchange_weak(oldPercent, newPercent)) { + } + if (!m_func(m_category, m_percent, m_userData)) + cancel = true; + } + } + + ProgressCategory m_category; ProgressFunc m_func; void *m_userData; - uint32_t m_maxValue; - uint32_t m_progress; - std::mutex m_mutex; + std::atomic<uint32_t> m_value, m_maxValue, m_percent; }; -struct Spinlock -{ - void lock() { while(m_lock.test_and_set(std::memory_order_acquire)) {} } +struct Spinlock { + void lock() { + while (m_lock.test_and_set(std::memory_order_acquire)) { + } + } void unlock() { m_lock.clear(std::memory_order_release); } private: std::atomic_flag m_lock = ATOMIC_FLAG_INIT; }; -struct TaskGroupHandle -{ +struct TaskGroupHandle { uint32_t value = UINT32_MAX; }; -struct Task -{ - void (*func)(void *userData); - void *userData; +struct Task { + void (*func)(void *groupUserData, void *taskUserData); + void *userData; // Passed to func as taskUserData. }; #if XA_MULTITHREADED -class TaskScheduler -{ +class TaskScheduler { public: - TaskScheduler() : m_shutdown(false) - { + TaskScheduler() : + m_shutdown(false) { m_threadIndex = 0; // Max with current task scheduler usage is 1 per thread + 1 deep nesting, but allow for some slop. m_maxGroups = std::thread::hardware_concurrency() * 4; @@ -3739,6 +2968,7 @@ public: new (&m_groups[i]) TaskGroup(); m_groups[i].free = true; m_groups[i].ref = 0; + m_groups[i].userData = nullptr; } m_workers.resize(std::thread::hardware_concurrency() <= 1 ? 1 : std::thread::hardware_concurrency() - 1); for (uint32_t i = 0; i < m_workers.size(); i++) { @@ -3748,8 +2978,7 @@ public: } } - ~TaskScheduler() - { + ~TaskScheduler() { m_shutdown = true; for (uint32_t i = 0; i < m_workers.size(); i++) { Worker &worker = m_workers[i]; @@ -3767,13 +2996,12 @@ public: XA_FREE(m_groups); } - uint32_t threadCount() const - { + uint32_t threadCount() const { return max(1u, std::thread::hardware_concurrency()); // Including the main thread. } - TaskGroupHandle createTaskGroup(uint32_t reserveSize = 0) - { + // userData is passed to Task::func as groupUserData. + TaskGroupHandle createTaskGroup(void *userData = nullptr, uint32_t reserveSize = 0) { // Claim the first free group. for (uint32_t i = 0; i < m_maxGroups; i++) { TaskGroup &group = m_groups[i]; @@ -3785,6 +3013,8 @@ public: group.queue.clear(); group.queue.reserve(reserveSize); group.queueLock.unlock(); + group.userData = userData; + group.ref = 0; TaskGroupHandle handle; handle.value = i; return handle; @@ -3795,8 +3025,7 @@ public: return handle; } - void run(TaskGroupHandle handle, const Task &task) - { + void run(TaskGroupHandle handle, const Task &task) { XA_DEBUG_ASSERT(handle.value != UINT32_MAX); TaskGroup &group = m_groups[handle.value]; group.queueLock.lock(); @@ -3810,8 +3039,7 @@ public: } } - void wait(TaskGroupHandle *handle) - { + void wait(TaskGroupHandle *handle) { if (handle->value == UINT32_MAX) { XA_DEBUG_ASSERT(false); return; @@ -3826,7 +3054,7 @@ public: group.queueLock.unlock(); if (!task) break; - task->func(task->userData); + task->func(group.userData, task->userData); group.ref--; } // Even though the task queue is empty, workers can still be running tasks. @@ -3839,17 +3067,16 @@ public: static uint32_t currentThreadIndex() { return m_threadIndex; } private: - struct TaskGroup - { + struct TaskGroup { std::atomic<bool> free; Array<Task> queue; // Items are never removed. queueHead is incremented to pop items. uint32_t queueHead = 0; Spinlock queueLock; std::atomic<uint32_t> ref; // Increment when a task is enqueued, decrement when a task finishes. + void *userData; }; - struct Worker - { + struct Worker { std::thread *thread = nullptr; std::mutex mutex; std::condition_variable cv; @@ -3862,12 +3089,11 @@ private: uint32_t m_maxGroups; static thread_local uint32_t m_threadIndex; - static void workerThread(TaskScheduler *scheduler, Worker *worker, uint32_t threadIndex) - { + static void workerThread(TaskScheduler *scheduler, Worker *worker, uint32_t threadIndex) { m_threadIndex = threadIndex; std::unique_lock<std::mutex> lock(worker->mutex); for (;;) { - worker->cv.wait(lock, [=]{ return worker->wakeup.load(); }); + worker->cv.wait(lock, [=] { return worker->wakeup.load(); }); worker->wakeup = false; for (;;) { if (scheduler->m_shutdown) @@ -3889,7 +3115,7 @@ private: } if (!task) break; - task->func(task->userData); + task->func(group->userData, task->userData); group->ref--; } } @@ -3898,44 +3124,39 @@ private: thread_local uint32_t TaskScheduler::m_threadIndex; #else -class TaskScheduler -{ +class TaskScheduler { public: - ~TaskScheduler() - { + ~TaskScheduler() { for (uint32_t i = 0; i < m_groups.size(); i++) destroyGroup({ i }); } - uint32_t threadCount() const - { + uint32_t threadCount() const { return 1; } - TaskGroupHandle createTaskGroup(uint32_t reserveSize = 0) - { + TaskGroupHandle createTaskGroup(void *userData = nullptr, uint32_t reserveSize = 0) { TaskGroup *group = XA_NEW(MemTag::Default, TaskGroup); group->queue.reserve(reserveSize); + group->userData = userData; m_groups.push_back(group); TaskGroupHandle handle; handle.value = m_groups.size() - 1; return handle; } - void run(TaskGroupHandle handle, Task task) - { + void run(TaskGroupHandle handle, Task task) { m_groups[handle.value]->queue.push_back(task); } - void wait(TaskGroupHandle *handle) - { + void wait(TaskGroupHandle *handle) { if (handle->value == UINT32_MAX) { XA_DEBUG_ASSERT(false); return; } TaskGroup *group = m_groups[handle->value]; for (uint32_t i = 0; i < group->queue.size(); i++) - group->queue[i].func(group->queue[i].userData); + group->queue[i].func(group->userData, group->queue[i].userData); group->queue.clear(); destroyGroup(*handle); handle->value = UINT32_MAX; @@ -3944,8 +3165,7 @@ public: static uint32_t currentThreadIndex() { return 0; } private: - void destroyGroup(TaskGroupHandle handle) - { + void destroyGroup(TaskGroupHandle handle) { TaskGroup *group = m_groups[handle.value]; if (group) { group->~TaskGroup(); @@ -3954,9 +3174,9 @@ private: } } - struct TaskGroup - { + struct TaskGroup { Array<Task> queue; + void *userData; }; Array<TaskGroup *> m_groups; @@ -3968,8 +3188,7 @@ const uint8_t TGA_TYPE_RGB = 2; const uint8_t TGA_ORIGIN_UPPER = 0x20; #pragma pack(push, 1) -struct TgaHeader -{ +struct TgaHeader { uint8_t id_length; uint8_t colormap_type; uint8_t image_type; @@ -3986,8 +3205,7 @@ struct TgaHeader }; #pragma pack(pop) -static void WriteTga(const char *filename, const uint8_t *data, uint32_t width, uint32_t height) -{ +static void WriteTga(const char *filename, const uint8_t *data, uint32_t width, uint32_t height) { XA_DEBUG_ASSERT(sizeof(TgaHeader) == TgaHeader::Size); FILE *f; XA_FOPEN(f, filename, "wb"); @@ -4012,12 +3230,10 @@ static void WriteTga(const char *filename, const uint8_t *data, uint32_t width, } #endif -template<typename T> -class ThreadLocal -{ +template <typename T> +class ThreadLocal { public: - ThreadLocal() - { + ThreadLocal() { #if XA_MULTITHREADED const uint32_t n = std::thread::hardware_concurrency(); #else @@ -4028,8 +3244,7 @@ public: new (&m_array[i]) T; } - ~ThreadLocal() - { + ~ThreadLocal() { #if XA_MULTITHREADED const uint32_t n = std::thread::hardware_concurrency(); #else @@ -4040,8 +3255,7 @@ public: XA_FREE(m_array); } - T &get() const - { + T &get() const { return m_array[TaskScheduler::currentThreadIndex()]; } @@ -4049,11 +3263,104 @@ private: T *m_array; }; -class UniformGrid2 -{ +// Implemented as a struct so the temporary arrays can be reused. +struct Triangulator { + // This is doing a simple ear-clipping algorithm that skips invalid triangles. Ideally, we should + // also sort the ears by angle, start with the ones that have the smallest angle and proceed in order. + void triangulatePolygon(ConstArrayView<Vector3> vertices, ConstArrayView<uint32_t> inputIndices, Array<uint32_t> &outputIndices) { + m_polygonVertices.clear(); + m_polygonVertices.reserve(inputIndices.length); + outputIndices.clear(); + if (inputIndices.length == 3) { + // Simple case for triangles. + outputIndices.push_back(inputIndices[0]); + outputIndices.push_back(inputIndices[1]); + outputIndices.push_back(inputIndices[2]); + } else { + // Build 2D polygon projecting vertices onto normal plane. + // Faces are not necesarily planar, this is for example the case, when the face comes from filling a hole. In such cases + // it's much better to use the best fit plane. + Basis basis; + basis.normal = normalize(cross(vertices[inputIndices[1]] - vertices[inputIndices[0]], vertices[inputIndices[2]] - vertices[inputIndices[1]])); + basis.tangent = basis.computeTangent(basis.normal); + basis.bitangent = basis.computeBitangent(basis.normal, basis.tangent); + const uint32_t edgeCount = inputIndices.length; + m_polygonPoints.clear(); + m_polygonPoints.reserve(edgeCount); + m_polygonAngles.clear(); + m_polygonAngles.reserve(edgeCount); + for (uint32_t i = 0; i < inputIndices.length; i++) { + m_polygonVertices.push_back(inputIndices[i]); + const Vector3 &pos = vertices[inputIndices[i]]; + m_polygonPoints.push_back(Vector2(dot(basis.tangent, pos), dot(basis.bitangent, pos))); + } + m_polygonAngles.resize(edgeCount); + while (m_polygonVertices.size() > 2) { + const uint32_t size = m_polygonVertices.size(); + // Update polygon angles. @@ Update only those that have changed. + float minAngle = kPi2; + uint32_t bestEar = 0; // Use first one if none of them is valid. + bool bestIsValid = false; + for (uint32_t i = 0; i < size; i++) { + uint32_t i0 = i; + uint32_t i1 = (i + 1) % size; // Use Sean's polygon interation trick. + uint32_t i2 = (i + 2) % size; + Vector2 p0 = m_polygonPoints[i0]; + Vector2 p1 = m_polygonPoints[i1]; + Vector2 p2 = m_polygonPoints[i2]; + float d = clamp(dot(p0 - p1, p2 - p1) / (length(p0 - p1) * length(p2 - p1)), -1.0f, 1.0f); + float angle = acosf(d); + float area = triangleArea(p0, p1, p2); + if (area < 0.0f) + angle = kPi2 - angle; + m_polygonAngles[i1] = angle; + if (angle < minAngle || !bestIsValid) { + // Make sure this is a valid ear, if not, skip this point. + bool valid = true; + for (uint32_t j = 0; j < size; j++) { + if (j == i0 || j == i1 || j == i2) + continue; + Vector2 p = m_polygonPoints[j]; + if (pointInTriangle(p, p0, p1, p2)) { + valid = false; + break; + } + } + if (valid || !bestIsValid) { + minAngle = angle; + bestEar = i1; + bestIsValid = valid; + } + } + } + // Clip best ear: + const uint32_t i0 = (bestEar + size - 1) % size; + const uint32_t i1 = (bestEar + 0) % size; + const uint32_t i2 = (bestEar + 1) % size; + outputIndices.push_back(m_polygonVertices[i0]); + outputIndices.push_back(m_polygonVertices[i1]); + outputIndices.push_back(m_polygonVertices[i2]); + m_polygonVertices.removeAt(i1); + m_polygonPoints.removeAt(i1); + m_polygonAngles.removeAt(i1); + } + } + } + +private: + static bool pointInTriangle(const Vector2 &p, const Vector2 &a, const Vector2 &b, const Vector2 &c) { + return triangleArea(a, b, p) >= kAreaEpsilon && triangleArea(b, c, p) >= kAreaEpsilon && triangleArea(c, a, p) >= kAreaEpsilon; + } + + Array<int> m_polygonVertices; + Array<float> m_polygonAngles; + Array<Vector2> m_polygonPoints; +}; + +class UniformGrid2 { public: - void reset(const Vector2 *positions, const uint32_t *indices = nullptr, uint32_t reserveEdgeCount = 0) - { + // indices are optional. + void reset(ConstArrayView<Vector2> positions, ConstArrayView<uint32_t> indices = ConstArrayView<uint32_t>(), uint32_t reserveEdgeCount = 0) { m_edges.clear(); if (reserveEdgeCount > 0) m_edges.reserve(reserveEdgeCount); @@ -4062,14 +3369,12 @@ public: m_cellDataOffsets.clear(); } - void append(uint32_t edge) - { + void append(uint32_t edge) { XA_DEBUG_ASSERT(m_cellDataOffsets.isEmpty()); m_edges.push_back(edge); } - bool intersect(Vector2 v1, Vector2 v2, float epsilon) - { + bool intersect(Vector2 v1, Vector2 v2, float epsilon) { const uint32_t edgeCount = m_edges.size(); bool bruteForce = edgeCount <= 20; if (!bruteForce && m_cellDataOffsets.isEmpty()) @@ -4096,8 +3401,7 @@ public: } // If edges is empty, checks for intersection with all edges in the grid. - bool intersect(float epsilon, ConstArrayView<uint32_t> edges = ConstArrayView<uint32_t>(), ConstArrayView<uint32_t> ignoreEdges = ConstArrayView<uint32_t>()) - { + bool intersect(float epsilon, ConstArrayView<uint32_t> edges = ConstArrayView<uint32_t>(), ConstArrayView<uint32_t> ignoreEdges = ConstArrayView<uint32_t>()) { bool bruteForce = m_edges.size() <= 20; if (!bruteForce && m_cellDataOffsets.isEmpty()) bruteForce = !createGrid(); @@ -4167,8 +3471,7 @@ public: } #if XA_DEBUG_EXPORT_BOUNDARY_GRID - void debugExport(const char *filename) - { + void debugExport(const char *filename) { Array<uint8_t> image; image.resize(m_gridWidth * m_gridHeight * 3); for (uint32_t y = 0; y < m_gridHeight; y++) { @@ -4190,8 +3493,7 @@ public: #endif private: - bool createGrid() - { + bool createGrid() { // Compute edge extents. Min will be the grid origin. const uint32_t edgeCount = m_edges.size(); Extents2 edgeExtents; @@ -4202,14 +3504,14 @@ private: edgeExtents.add(edgePosition1(edge)); } m_gridOrigin = edgeExtents.min; - // Size grid to approximately one edge per cell. + // Size grid to approximately one edge per cell in the largest dimension. const Vector2 extentsSize(edgeExtents.max - edgeExtents.min); - m_cellSize = min(extentsSize.x, extentsSize.y) / sqrtf((float)edgeCount); + m_cellSize = max(extentsSize.x, extentsSize.y) / (float)clamp(edgeCount, 32u, 512u); if (m_cellSize <= 0.0f) return false; m_gridWidth = uint32_t(ceilf(extentsSize.x / m_cellSize)); m_gridHeight = uint32_t(ceilf(extentsSize.y / m_cellSize)); - if (m_gridWidth == 0 || m_gridHeight == 0) + if (m_gridWidth <= 1 || m_gridHeight <= 1) return false; // Insert edges into cells. m_cellDataOffsets.resize(m_gridWidth * m_gridHeight); @@ -4243,8 +3545,7 @@ private: return true; } - void computePotentialEdges(Vector2 p1, Vector2 p2) - { + void computePotentialEdges(Vector2 p1, Vector2 p2) { m_potentialEdges.clear(); traverse(p1, p2); for (uint32_t j = 0; j < m_traversedCellOffsets.size(); j++) { @@ -4262,10 +3563,9 @@ private: } // "A Fast Voxel Traversal Algorithm for Ray Tracing" - void traverse(Vector2 p1, Vector2 p2) - { + void traverse(Vector2 p1, Vector2 p2) { const Vector2 dir = p2 - p1; - const Vector2 normal = normalizeSafe(dir, Vector2(0.0f), kEpsilon); + const Vector2 normal = normalizeSafe(dir, Vector2(0.0f)); const int stepX = dir.x >= 0 ? 1 : -1; const int stepY = dir.y >= 0 ? 1 : -1; const uint32_t firstCell[2] = { cellX(p1.x), cellY(p1.y) }; @@ -4284,14 +3584,12 @@ private: if (normal.x > kEpsilon || normal.x < -kEpsilon) { tMaxX = (distToNextCellX * stepX) / normal.x; tDeltaX = (m_cellSize * stepX) / normal.x; - } - else + } else tMaxX = tDeltaX = FLT_MAX; if (normal.y > kEpsilon || normal.y < -kEpsilon) { tMaxY = (distToNextCellY * stepY) / normal.y; tDeltaY = (m_cellSize * stepY) / normal.y; - } - else + } else tMaxY = tDeltaY = FLT_MAX; m_traversedCellOffsets.clear(); m_traversedCellOffsets.push_back(firstCell[0] + firstCell[1] * m_gridWidth); @@ -4318,34 +3616,29 @@ private: } } - uint32_t cellX(float x) const - { + uint32_t cellX(float x) const { return min((uint32_t)max(0.0f, (x - m_gridOrigin.x) / m_cellSize), m_gridWidth - 1u); } - uint32_t cellY(float y) const - { + uint32_t cellY(float y) const { return min((uint32_t)max(0.0f, (y - m_gridOrigin.y) / m_cellSize), m_gridHeight - 1u); } - Vector2 edgePosition0(uint32_t edge) const - { + Vector2 edgePosition0(uint32_t edge) const { return m_positions[vertexAt(meshEdgeIndex0(edge))]; } - Vector2 edgePosition1(uint32_t edge) const - { + Vector2 edgePosition1(uint32_t edge) const { return m_positions[vertexAt(meshEdgeIndex1(edge))]; } - uint32_t vertexAt(uint32_t index) const - { - return m_indices ? m_indices[index] : index; + uint32_t vertexAt(uint32_t index) const { + return m_indices.length > 0 ? m_indices[index] : index; } Array<uint32_t> m_edges; - const Vector2 *m_positions; - const uint32_t *m_indices; // Optional + ConstArrayView<Vector2> m_positions; + ConstArrayView<uint32_t> m_indices; // Optional. Empty if unused. float m_cellSize; Vector2 m_gridOrigin; uint32_t m_gridWidth, m_gridHeight; // in cells @@ -4355,26 +3648,25 @@ private: Array<uint32_t> m_traversedCellOffsets; }; -struct UvMeshChart -{ +struct UvMeshChart { Array<uint32_t> faces; Array<uint32_t> indices; uint32_t material; }; -struct UvMesh -{ +struct UvMesh { UvMeshDecl decl; + BitArray faceIgnore; + Array<uint32_t> faceMaterials; Array<uint32_t> indices; + Array<Vector2> texcoords; // Copied from input and never modified, UvMeshInstance::texcoords are. Used to restore UvMeshInstance::texcoords so packing can be run multiple times. Array<UvMeshChart *> charts; Array<uint32_t> vertexToChartMap; }; -struct UvMeshInstance -{ +struct UvMeshInstance { UvMesh *mesh; Array<Vector2> texcoords; - bool rotateCharts; }; /* @@ -4420,27 +3712,30 @@ struct UvMeshInstance * FRANCE */ namespace opennl { -#define NL_NEW(T) XA_ALLOC(MemTag::OpenNL, T) -#define NL_NEW_ARRAY(T,NB) XA_ALLOC_ARRAY(MemTag::OpenNL, T, NB) -#define NL_RENEW_ARRAY(T,x,NB) XA_REALLOC(MemTag::OpenNL, x, T, NB) -#define NL_DELETE(x) XA_FREE(x); x = nullptr -#define NL_DELETE_ARRAY(x) XA_FREE(x); x = nullptr -#define NL_CLEAR(x, T) memset(x, 0, sizeof(T)); -#define NL_CLEAR_ARRAY(T,x,NB) memset(x, 0, (size_t)(NB)*sizeof(T)) -#define NL_NEW_VECTOR(dim) XA_ALLOC_ARRAY(MemTag::OpenNL, double, dim) -#define NL_DELETE_VECTOR(ptr) XA_FREE(ptr) +#define NL_NEW(T) XA_ALLOC(MemTag::OpenNL, T) +#define NL_NEW_ARRAY(T, NB) XA_ALLOC_ARRAY(MemTag::OpenNL, T, NB) +#define NL_RENEW_ARRAY(T, x, NB) XA_REALLOC(MemTag::OpenNL, x, T, NB) +#define NL_DELETE(x) \ + XA_FREE(x); \ + x = nullptr +#define NL_DELETE_ARRAY(x) \ + XA_FREE(x); \ + x = nullptr +#define NL_CLEAR(x, T) memset(x, 0, sizeof(T)); +#define NL_CLEAR_ARRAY(T, x, NB) memset(x, 0, (size_t)(NB) * sizeof(T)) +#define NL_NEW_VECTOR(dim) XA_ALLOC_ARRAY(MemTag::OpenNL, double, dim) +#define NL_DELETE_VECTOR(ptr) XA_FREE(ptr) struct NLMatrixStruct; -typedef NLMatrixStruct * NLMatrix; +typedef NLMatrixStruct *NLMatrix; typedef void (*NLDestroyMatrixFunc)(NLMatrix M); -typedef void (*NLMultMatrixVectorFunc)(NLMatrix M, const double* x, double* y); +typedef void (*NLMultMatrixVectorFunc)(NLMatrix M, const double *x, double *y); #define NL_MATRIX_SPARSE_DYNAMIC 0x1001 -#define NL_MATRIX_CRS 0x1002 -#define NL_MATRIX_OTHER 0x1006 +#define NL_MATRIX_CRS 0x1002 +#define NL_MATRIX_OTHER 0x1006 -struct NLMatrixStruct -{ +struct NLMatrixStruct { uint32_t m; uint32_t n; uint32_t type; @@ -4450,39 +3745,35 @@ struct NLMatrixStruct /* Dynamic arrays for sparse row/columns */ -struct NLCoeff -{ +struct NLCoeff { uint32_t index; double value; }; -struct NLRowColumn -{ +struct NLRowColumn { uint32_t size; uint32_t capacity; - NLCoeff* coeff; + NLCoeff *coeff; }; /* Compressed Row Storage */ -struct NLCRSMatrix -{ +struct NLCRSMatrix { uint32_t m; uint32_t n; uint32_t type; NLDestroyMatrixFunc destroy_func; NLMultMatrixVectorFunc mult_func; - double* val; - uint32_t* rowptr; - uint32_t* colind; + double *val; + uint32_t *rowptr; + uint32_t *colind; uint32_t nslices; - uint32_t* sliceptr; + uint32_t *sliceptr; }; /* SparseMatrix data structure */ -struct NLSparseMatrix -{ +struct NLSparseMatrix { uint32_t m; uint32_t n; uint32_t type; @@ -4490,25 +3781,23 @@ struct NLSparseMatrix NLMultMatrixVectorFunc mult_func; uint32_t diag_size; uint32_t diag_capacity; - NLRowColumn* row; - NLRowColumn* column; - double* diag; + NLRowColumn *row; + NLRowColumn *column; + double *diag; uint32_t row_capacity; uint32_t column_capacity; }; /* NLContext data structure */ -struct NLBufferBinding -{ - void* base_address; +struct NLBufferBinding { + void *base_address; uint32_t stride; }; -#define NL_BUFFER_ITEM(B,i) *(double*)((void*)((char*)((B).base_address)+((i)*(B).stride))) +#define NL_BUFFER_ITEM(B, i) *(double *)((void *)((char *)((B).base_address) + ((i) * (B).stride))) -struct NLContext -{ +struct NLContext { NLBufferBinding *variable_buffer; double *variable_value; bool *variable_is_locked; @@ -4532,35 +3821,30 @@ struct NLContext double error; }; -static void nlDeleteMatrix(NLMatrix M) -{ +static void nlDeleteMatrix(NLMatrix M) { if (!M) return; M->destroy_func(M); NL_DELETE(M); } -static void nlMultMatrixVector(NLMatrix M, const double* x, double* y) -{ +static void nlMultMatrixVector(NLMatrix M, const double *x, double *y) { M->mult_func(M, x, y); } -static void nlRowColumnConstruct(NLRowColumn* c) -{ +static void nlRowColumnConstruct(NLRowColumn *c) { c->size = 0; c->capacity = 0; c->coeff = nullptr; } -static void nlRowColumnDestroy(NLRowColumn* c) -{ +static void nlRowColumnDestroy(NLRowColumn *c) { NL_DELETE_ARRAY(c->coeff); c->size = 0; c->capacity = 0; } -static void nlRowColumnGrow(NLRowColumn* c) -{ +static void nlRowColumnGrow(NLRowColumn *c) { if (c->capacity != 0) { c->capacity = 2 * c->capacity; c->coeff = NL_RENEW_ARRAY(NLCoeff, c->coeff, c->capacity); @@ -4571,8 +3855,7 @@ static void nlRowColumnGrow(NLRowColumn* c) } } -static void nlRowColumnAdd(NLRowColumn* c, uint32_t index, double value) -{ +static void nlRowColumnAdd(NLRowColumn *c, uint32_t index, double value) { for (uint32_t i = 0; i < c->size; i++) { if (c->coeff[i].index == index) { c->coeff[i].value += value; @@ -4587,8 +3870,7 @@ static void nlRowColumnAdd(NLRowColumn* c, uint32_t index, double value) } /* Does not check whether the index already exists */ -static void nlRowColumnAppend(NLRowColumn* c, uint32_t index, double value) -{ +static void nlRowColumnAppend(NLRowColumn *c, uint32_t index, double value) { if (c->size == c->capacity) nlRowColumnGrow(c); c->coeff[c->size].index = index; @@ -4596,32 +3878,27 @@ static void nlRowColumnAppend(NLRowColumn* c, uint32_t index, double value) c->size++; } -static void nlRowColumnZero(NLRowColumn* c) -{ +static void nlRowColumnZero(NLRowColumn *c) { c->size = 0; } -static void nlRowColumnClear(NLRowColumn* c) -{ +static void nlRowColumnClear(NLRowColumn *c) { c->size = 0; c->capacity = 0; NL_DELETE_ARRAY(c->coeff); } -static int nlCoeffCompare(const void* p1, const void* p2) -{ - return (((NLCoeff*)(p2))->index < ((NLCoeff*)(p1))->index); +static int nlCoeffCompare(const void *p1, const void *p2) { + return (((NLCoeff *)(p2))->index < ((NLCoeff *)(p1))->index); } -static void nlRowColumnSort(NLRowColumn* c) -{ +static void nlRowColumnSort(NLRowColumn *c) { qsort(c->coeff, c->size, sizeof(NLCoeff), nlCoeffCompare); } /* CRSMatrix data structure */ -static void nlCRSMatrixDestroy(NLCRSMatrix* M) -{ +static void nlCRSMatrixDestroy(NLCRSMatrix *M) { NL_DELETE_ARRAY(M->val); NL_DELETE_ARRAY(M->rowptr); NL_DELETE_ARRAY(M->colind); @@ -4631,8 +3908,7 @@ static void nlCRSMatrixDestroy(NLCRSMatrix* M) M->nslices = 0; } -static void nlCRSMatrixMultSlice(NLCRSMatrix* M, const double* x, double* y, uint32_t Ibegin, uint32_t Iend) -{ +static void nlCRSMatrixMultSlice(NLCRSMatrix *M, const double *x, double *y, uint32_t Ibegin, uint32_t Iend) { for (uint32_t i = Ibegin; i < Iend; ++i) { double sum = 0.0; for (uint32_t j = M->rowptr[i]; j < M->rowptr[i + 1]; ++j) @@ -4641,15 +3917,13 @@ static void nlCRSMatrixMultSlice(NLCRSMatrix* M, const double* x, double* y, uin } } -static void nlCRSMatrixMult(NLCRSMatrix* M, const double* x, double* y) -{ +static void nlCRSMatrixMult(NLCRSMatrix *M, const double *x, double *y) { int nslices = (int)(M->nslices); for (int slice = 0; slice < nslices; ++slice) nlCRSMatrixMultSlice(M, x, y, M->sliceptr[slice], M->sliceptr[slice + 1]); } -static void nlCRSMatrixConstruct(NLCRSMatrix* M, uint32_t m, uint32_t n, uint32_t nnz, uint32_t nslices) -{ +static void nlCRSMatrixConstruct(NLCRSMatrix *M, uint32_t m, uint32_t n, uint32_t nnz, uint32_t nslices) { M->m = m; M->n = n; M->type = NL_MATRIX_CRS; @@ -4668,22 +3942,19 @@ static void nlCRSMatrixConstruct(NLCRSMatrix* M, uint32_t m, uint32_t n, uint32_ /* SparseMatrix data structure */ -static void nlSparseMatrixDestroyRowColumns(NLSparseMatrix* M) -{ +static void nlSparseMatrixDestroyRowColumns(NLSparseMatrix *M) { for (uint32_t i = 0; i < M->m; i++) nlRowColumnDestroy(&(M->row[i])); NL_DELETE_ARRAY(M->row); } -static void nlSparseMatrixDestroy(NLSparseMatrix* M) -{ +static void nlSparseMatrixDestroy(NLSparseMatrix *M) { XA_DEBUG_ASSERT(M->type == NL_MATRIX_SPARSE_DYNAMIC); nlSparseMatrixDestroyRowColumns(M); NL_DELETE_ARRAY(M->diag); } -static void nlSparseMatrixAdd(NLSparseMatrix* M, uint32_t i, uint32_t j, double value) -{ +static void nlSparseMatrixAdd(NLSparseMatrix *M, uint32_t i, uint32_t j, double value) { XA_DEBUG_ASSERT(i >= 0 && i <= M->m - 1); XA_DEBUG_ASSERT(j >= 0 && j <= M->n - 1); if (i == j) @@ -4692,24 +3963,21 @@ static void nlSparseMatrixAdd(NLSparseMatrix* M, uint32_t i, uint32_t j, double } /* Returns the number of non-zero coefficients */ -static uint32_t nlSparseMatrixNNZ(NLSparseMatrix* M) -{ +static uint32_t nlSparseMatrixNNZ(NLSparseMatrix *M) { uint32_t nnz = 0; for (uint32_t i = 0; i < M->m; i++) nnz += M->row[i].size; return nnz; } -static void nlSparseMatrixSort(NLSparseMatrix* M) -{ +static void nlSparseMatrixSort(NLSparseMatrix *M) { for (uint32_t i = 0; i < M->m; i++) nlRowColumnSort(&(M->row[i])); } /* SparseMatrix x Vector routines, internal helper routines */ -static void nlSparseMatrix_mult_rows(NLSparseMatrix* A, const double* x, double* y) -{ +static void nlSparseMatrix_mult_rows(NLSparseMatrix *A, const double *x, double *y) { /* * Note: OpenMP does not like unsigned ints * (causes some floating point exceptions), @@ -4717,8 +3985,8 @@ static void nlSparseMatrix_mult_rows(NLSparseMatrix* A, const double* x, double* * indices. */ int m = (int)(A->m); - NLCoeff* c = nullptr; - NLRowColumn* Ri = nullptr; + NLCoeff *c = nullptr; + NLRowColumn *Ri = nullptr; for (int i = 0; i < m; i++) { Ri = &(A->row[i]); y[i] = 0; @@ -4729,14 +3997,12 @@ static void nlSparseMatrix_mult_rows(NLSparseMatrix* A, const double* x, double* } } -static void nlSparseMatrixMult(NLSparseMatrix* A, const double* x, double* y) -{ +static void nlSparseMatrixMult(NLSparseMatrix *A, const double *x, double *y) { XA_DEBUG_ASSERT(A->type == NL_MATRIX_SPARSE_DYNAMIC); nlSparseMatrix_mult_rows(A, x, y); } -static void nlSparseMatrixConstruct(NLSparseMatrix* M, uint32_t m, uint32_t n) -{ +static void nlSparseMatrixConstruct(NLSparseMatrix *M, uint32_t m, uint32_t n) { M->m = m; M->n = n; M->type = NL_MATRIX_SPARSE_DYNAMIC; @@ -4756,24 +4022,23 @@ static void nlSparseMatrixConstruct(NLSparseMatrix* M, uint32_t m, uint32_t n) NL_CLEAR_ARRAY(double, M->diag, M->diag_size); } -static NLMatrix nlCRSMatrixNewFromSparseMatrix(NLSparseMatrix* M) -{ +static NLMatrix nlCRSMatrixNewFromSparseMatrix(NLSparseMatrix *M) { uint32_t nnz = nlSparseMatrixNNZ(M); uint32_t nslices = 8; /* TODO: get number of cores */ uint32_t slice, cur_bound, cur_NNZ, cur_row; uint32_t k; uint32_t slice_size = nnz / nslices; - NLCRSMatrix* CRS = NL_NEW(NLCRSMatrix); + NLCRSMatrix *CRS = NL_NEW(NLCRSMatrix); NL_CLEAR(CRS, NLCRSMatrix); nlCRSMatrixConstruct(CRS, M->m, M->n, nnz, nslices); nlSparseMatrixSort(M); /* Convert matrix to CRS format */ k = 0; for (uint32_t i = 0; i < M->m; ++i) { - NLRowColumn* Ri = &(M->row[i]); + NLRowColumn *Ri = &(M->row[i]); CRS->rowptr[i] = k; for (uint32_t ij = 0; ij < Ri->size; ij++) { - NLCoeff* c = &(Ri->coeff[ij]); + NLCoeff *c = &(Ri->coeff[ij]); CRS->val[k] = c->value; CRS->colind[k] = c->index; ++k; @@ -4799,19 +4064,17 @@ static NLMatrix nlCRSMatrixNewFromSparseMatrix(NLSparseMatrix* M) return (NLMatrix)CRS; } -static void nlMatrixCompress(NLMatrix* M) -{ +static void nlMatrixCompress(NLMatrix *M) { NLMatrix CRS = nullptr; if ((*M)->type != NL_MATRIX_SPARSE_DYNAMIC) return; - CRS = nlCRSMatrixNewFromSparseMatrix((NLSparseMatrix*)*M); + CRS = nlCRSMatrixNewFromSparseMatrix((NLSparseMatrix *)*M); nlDeleteMatrix(*M); *M = CRS; } -static NLContext *nlNewContext() -{ - NLContext* result = NL_NEW(NLContext); +static NLContext *nlNewContext() { + NLContext *result = NL_NEW(NLContext); NL_CLEAR(result, NLContext); result->max_iterations = 100; result->threshold = 1e-6; @@ -4820,8 +4083,7 @@ static NLContext *nlNewContext() return result; } -static void nlDeleteContext(NLContext *context) -{ +static void nlDeleteContext(NLContext *context) { nlDeleteMatrix(context->M); context->M = nullptr; nlDeleteMatrix(context->P); @@ -4839,22 +4101,19 @@ static void nlDeleteContext(NLContext *context) NL_DELETE(context); } -static double ddot(int n, const double *x, const double *y) -{ +static double ddot(int n, const double *x, const double *y) { double sum = 0.0; for (int i = 0; i < n; i++) sum += x[i] * y[i]; return sum; } -static void daxpy(int n, double a, const double *x, double *y) -{ +static void daxpy(int n, double a, const double *x, double *y) { for (int i = 0; i < n; i++) y[i] = a * x[i] + y[i]; } -static void dscal(int n, double a, double *x) -{ +static void dscal(int n, double a, double *x) { for (int i = 0; i < n; i++) x[i] *= a; } @@ -4877,17 +4136,16 @@ static void dscal(int n, double a, double *x) * versions of matrix x vector product (CPU/GPU, sparse/dense ...) */ -static uint32_t nlSolveSystem_PRE_CG(NLMatrix M, NLMatrix P, double* b, double* x, double eps, uint32_t max_iter, double *sq_bnorm, double *sq_rnorm) -{ - int N = (int)M->n; - double* r = NL_NEW_VECTOR(N); - double* d = NL_NEW_VECTOR(N); - double* h = NL_NEW_VECTOR(N); +static uint32_t nlSolveSystem_PRE_CG(NLMatrix M, NLMatrix P, double *b, double *x, double eps, uint32_t max_iter, double *sq_bnorm, double *sq_rnorm) { + int N = (int)M->n; + double *r = NL_NEW_VECTOR(N); + double *d = NL_NEW_VECTOR(N); + double *h = NL_NEW_VECTOR(N); double *Ad = h; uint32_t its = 0; double rh, alpha, beta; double b_square = ddot(N, b, b); - double err = eps * eps*b_square; + double err = eps * eps * b_square; double curr_err; nlMultMatrixVector(M, x, r); daxpy(N, -1., b, r); @@ -4917,13 +4175,12 @@ static uint32_t nlSolveSystem_PRE_CG(NLMatrix M, NLMatrix P, double* b, double* return its; } -static uint32_t nlSolveSystemIterative(NLContext *context, NLMatrix M, NLMatrix P, double* b_in, double* x_in, double eps, uint32_t max_iter) -{ +static uint32_t nlSolveSystemIterative(NLContext *context, NLMatrix M, NLMatrix P, double *b_in, double *x_in, double eps, uint32_t max_iter) { uint32_t result = 0; double rnorm = 0.0; double bnorm = 0.0; - double* b = b_in; - double* x = x_in; + double *b = b_in; + double *x = x_in; XA_DEBUG_ASSERT(M->m == M->n); double sq_bnorm, sq_rnorm; result = nlSolveSystem_PRE_CG(M, P, b, x, eps, max_iter, &sq_bnorm, &sq_rnorm); @@ -4938,10 +4195,9 @@ static uint32_t nlSolveSystemIterative(NLContext *context, NLMatrix M, NLMatrix return result; } -static bool nlSolveIterative(NLContext *context) -{ - double* b = context->b; - double* x = context->x; +static bool nlSolveIterative(NLContext *context) { + double *b = context->b; + double *x = context->x; uint32_t n = context->n; NLMatrix M = context->M; NLMatrix P = context->P; @@ -4953,34 +4209,30 @@ static bool nlSolveIterative(NLContext *context) return true; } -struct NLJacobiPreconditioner -{ +struct NLJacobiPreconditioner { uint32_t m; uint32_t n; uint32_t type; NLDestroyMatrixFunc destroy_func; NLMultMatrixVectorFunc mult_func; - double* diag_inv; + double *diag_inv; }; -static void nlJacobiPreconditionerDestroy(NLJacobiPreconditioner* M) -{ +static void nlJacobiPreconditionerDestroy(NLJacobiPreconditioner *M) { NL_DELETE_ARRAY(M->diag_inv); } -static void nlJacobiPreconditionerMult(NLJacobiPreconditioner* M, const double* x, double* y) -{ +static void nlJacobiPreconditionerMult(NLJacobiPreconditioner *M, const double *x, double *y) { for (uint32_t i = 0; i < M->n; ++i) y[i] = x[i] * M->diag_inv[i]; } -static NLMatrix nlNewJacobiPreconditioner(NLMatrix M_in) -{ - NLSparseMatrix* M = nullptr; - NLJacobiPreconditioner* result = nullptr; +static NLMatrix nlNewJacobiPreconditioner(NLMatrix M_in) { + NLSparseMatrix *M = nullptr; + NLJacobiPreconditioner *result = nullptr; XA_DEBUG_ASSERT(M_in->type == NL_MATRIX_SPARSE_DYNAMIC); XA_DEBUG_ASSERT(M_in->m == M_in->n); - M = (NLSparseMatrix*)M_in; + M = (NLSparseMatrix *)M_in; result = NL_NEW(NLJacobiPreconditioner); NL_CLEAR(result, NLJacobiPreconditioner); result->m = M->m; @@ -4998,8 +4250,7 @@ static NLMatrix nlNewJacobiPreconditioner(NLMatrix M_in) #define NL_NB_VARIABLES 0x101 #define NL_MAX_ITERATIONS 0x103 -static void nlSolverParameteri(NLContext *context, uint32_t pname, int param) -{ +static void nlSolverParameteri(NLContext *context, uint32_t pname, int param) { if (pname == NL_NB_VARIABLES) { XA_DEBUG_ASSERT(param > 0); context->nb_variables = (uint32_t)param; @@ -5010,26 +4261,22 @@ static void nlSolverParameteri(NLContext *context, uint32_t pname, int param) } } -static void nlSetVariable(NLContext *context, uint32_t index, double value) -{ +static void nlSetVariable(NLContext *context, uint32_t index, double value) { XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1); NL_BUFFER_ITEM(context->variable_buffer[0], index) = value; } -static double nlGetVariable(NLContext *context, uint32_t index) -{ +static double nlGetVariable(NLContext *context, uint32_t index) { XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1); return NL_BUFFER_ITEM(context->variable_buffer[0], index); } -static void nlLockVariable(NLContext *context, uint32_t index) -{ +static void nlLockVariable(NLContext *context, uint32_t index) { XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1); context->variable_is_locked[index] = true; } -static void nlVariablesToVector(NLContext *context) -{ +static void nlVariablesToVector(NLContext *context) { uint32_t n = context->n; XA_DEBUG_ASSERT(context->x); for (uint32_t k = 0; k < context->nb_systems; ++k) { @@ -5044,8 +4291,7 @@ static void nlVariablesToVector(NLContext *context) } } -static void nlVectorToVariables(NLContext *context) -{ +static void nlVectorToVariables(NLContext *context) { uint32_t n = context->n; XA_DEBUG_ASSERT(context->x); for (uint32_t k = 0; k < context->nb_systems; ++k) { @@ -5060,8 +4306,7 @@ static void nlVectorToVariables(NLContext *context) } } -static void nlCoefficient(NLContext *context, uint32_t index, double value) -{ +static void nlCoefficient(NLContext *context, uint32_t index, double value) { XA_DEBUG_ASSERT(index >= 0 && index <= context->nb_variables - 1); if (context->variable_is_locked[index]) { /* @@ -5078,12 +4323,11 @@ static void nlCoefficient(NLContext *context, uint32_t index, double value) } } -#define NL_SYSTEM 0x0 -#define NL_MATRIX 0x1 -#define NL_ROW 0x2 +#define NL_SYSTEM 0x0 +#define NL_MATRIX 0x1 +#define NL_ROW 0x2 -static void nlBegin(NLContext *context, uint32_t prim) -{ +static void nlBegin(NLContext *context, uint32_t prim) { if (prim == NL_SYSTEM) { XA_DEBUG_ASSERT(context->nb_variables > 0); context->variable_buffer = NL_NEW_ARRAY(NLBufferBinding, context->nb_systems); @@ -5092,8 +4336,8 @@ static void nlBegin(NLContext *context, uint32_t prim) NL_CLEAR_ARRAY(double, context->variable_value, context->nb_variables * context->nb_systems); for (uint32_t k = 0; k < context->nb_systems; ++k) { context->variable_buffer[k].base_address = - context->variable_value + - k * context->nb_variables; + context->variable_value + + k * context->nb_variables; context->variable_buffer[k].stride = sizeof(double); } context->variable_is_locked = NL_NEW_ARRAY(bool, context->nb_variables); @@ -5116,11 +4360,11 @@ static void nlBegin(NLContext *context, uint32_t prim) context->max_iterations = n * 5; context->M = (NLMatrix)(NL_NEW(NLSparseMatrix)); NL_CLEAR(context->M, NLSparseMatrix); - nlSparseMatrixConstruct((NLSparseMatrix*)(context->M), n, n); - context->x = NL_NEW_ARRAY(double, n*context->nb_systems); - NL_CLEAR_ARRAY(double, context->x, n*context->nb_systems); - context->b = NL_NEW_ARRAY(double, n*context->nb_systems); - NL_CLEAR_ARRAY(double, context->b, n*context->nb_systems); + nlSparseMatrixConstruct((NLSparseMatrix *)(context->M), n, n); + context->x = NL_NEW_ARRAY(double, n * context->nb_systems); + NL_CLEAR_ARRAY(double, context->x, n * context->nb_systems); + context->b = NL_NEW_ARRAY(double, n * context->nb_systems); + NL_CLEAR_ARRAY(double, context->b, n * context->nb_systems); nlVariablesToVector(context); nlRowColumnConstruct(&context->af); nlRowColumnConstruct(&context->al); @@ -5131,16 +4375,15 @@ static void nlBegin(NLContext *context, uint32_t prim) } } -static void nlEnd(NLContext *context, uint32_t prim) -{ +static void nlEnd(NLContext *context, uint32_t prim) { if (prim == NL_MATRIX) { nlRowColumnClear(&context->af); nlRowColumnClear(&context->al); } else if (prim == NL_ROW) { - NLRowColumn* af = &context->af; - NLRowColumn* al = &context->al; - NLSparseMatrix* M = (NLSparseMatrix*)context->M; - double* b = context->b; + NLRowColumn *af = &context->af; + NLRowColumn *al = &context->al; + NLSparseMatrix *M = (NLSparseMatrix *)context->M; + double *b = context->b; uint32_t nf = af->size; uint32_t nl = al->size; uint32_t n = context->n; @@ -5161,14 +4404,13 @@ static void nlEnd(NLContext *context, uint32_t prim) S += al->coeff[jj].value * NL_BUFFER_ITEM(context->variable_buffer[k], j); } for (uint32_t jj = 0; jj < nf; jj++) - b[k*n + af->coeff[jj].index] -= af->coeff[jj].value * S; + b[k * n + af->coeff[jj].index] -= af->coeff[jj].value * S; } context->current_row++; } } -static bool nlSolve(NLContext *context) -{ +static bool nlSolve(NLContext *context) { nlDeleteMatrix(context->P); context->P = nlNewJacobiPreconditioner(context->M); nlMatrixCompress(&context->M); @@ -5179,11 +4421,9 @@ static bool nlSolve(NLContext *context) } // namespace opennl namespace raster { -class ClippedTriangle -{ +class ClippedTriangle { public: - ClippedTriangle(const Vector2 &a, const Vector2 &b, const Vector2 &c) - { + ClippedTriangle(const Vector2 &a, const Vector2 &b, const Vector2 &c) { m_numVertices = 3; m_activeVertexBuffer = 0; m_verticesA[0] = a; @@ -5194,20 +4434,20 @@ public: m_area = 0; } - void clipHorizontalPlane(float offset, float clipdirection) - { - Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; + void clipHorizontalPlane(float offset, float clipdirection) { + Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; m_activeVertexBuffer ^= 1; Vector2 *v2 = m_vertexBuffers[m_activeVertexBuffer]; v[m_numVertices] = v[0]; - float dy2, dy1 = offset - v[0].y; - int dy2in, dy1in = clipdirection * dy1 >= 0; - uint32_t p = 0; + float dy2, dy1 = offset - v[0].y; + int dy2in, dy1in = clipdirection * dy1 >= 0; + uint32_t p = 0; for (uint32_t k = 0; k < m_numVertices; k++) { - dy2 = offset - v[k + 1].y; + dy2 = offset - v[k + 1].y; dy2in = clipdirection * dy2 >= 0; - if (dy1in) v2[p++] = v[k]; - if ( dy1in + dy2in == 1 ) { // not both in/out + if (dy1in) + v2[p++] = v[k]; + if (dy1in + dy2in == 1) { // not both in/out float dx = v[k + 1].x - v[k].x; float dy = v[k + 1].y - v[k].y; v2[p++] = Vector2(v[k].x + dy1 * (dx / dy), offset); @@ -5218,20 +4458,20 @@ public: m_numVertices = p; } - void clipVerticalPlane(float offset, float clipdirection) - { - Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; + void clipVerticalPlane(float offset, float clipdirection) { + Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; m_activeVertexBuffer ^= 1; Vector2 *v2 = m_vertexBuffers[m_activeVertexBuffer]; v[m_numVertices] = v[0]; - float dx2, dx1 = offset - v[0].x; - int dx2in, dx1in = clipdirection * dx1 >= 0; - uint32_t p = 0; + float dx2, dx1 = offset - v[0].x; + int dx2in, dx1in = clipdirection * dx1 >= 0; + uint32_t p = 0; for (uint32_t k = 0; k < m_numVertices; k++) { dx2 = offset - v[k + 1].x; dx2in = clipdirection * dx2 >= 0; - if (dx1in) v2[p++] = v[k]; - if ( dx1in + dx2in == 1 ) { // not both in/out + if (dx1in) + v2[p++] = v[k]; + if (dx1in + dx2in == 1) { // not both in/out float dx = v[k + 1].x - v[k].x; float dy = v[k + 1].y - v[k].y; v2[p++] = Vector2(offset, v[k].y + dx1 * (dy / dx)); @@ -5242,9 +4482,8 @@ public: m_numVertices = p; } - void computeArea() - { - Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; + void computeArea() { + Vector2 *v = m_vertexBuffers[m_activeVertexBuffer]; v[m_numVertices] = v[0]; m_area = 0; float centroidx = 0, centroidy = 0; @@ -5258,8 +4497,7 @@ public: m_area = 0.5f * fabsf(m_area); } - void clipAABox(float x0, float y0, float x1, float y1) - { + void clipAABox(float x0, float y0, float x1, float y1) { clipVerticalPlane(x0, -1); clipHorizontalPlane(y0, -1); clipVerticalPlane(x1, 1); @@ -5267,8 +4505,7 @@ public: computeArea(); } - float area() const - { + float area() const { return m_area; } @@ -5285,10 +4522,9 @@ private: typedef bool (*SamplingCallback)(void *param, int x, int y); /// A triangle for rasterization. -struct Triangle -{ - Triangle(const Vector2 &_v0, const Vector2 &_v1, const Vector2 &_v2) : v1(_v0), v2(_v2), v3(_v1) - { +struct Triangle { + Triangle(const Vector2 &_v0, const Vector2 &_v1, const Vector2 &_v2) : + v1(_v0), v2(_v2), v3(_v1), n1(0.0f), n2(0.0f), n3(0.0f) { // make sure every triangle is front facing. flipBackface(); // Compute deltas. @@ -5296,8 +4532,7 @@ struct Triangle computeUnitInwardNormals(); } - bool isValid() - { + bool isValid() { const Vector2 e0 = v3 - v1; const Vector2 e1 = v2 - v1; const float area = e0.y * e1.x - e1.y * e0.x; @@ -5305,18 +4540,17 @@ struct Triangle } // extents has to be multiple of BK_SIZE!! - bool drawAA(const Vector2 &extents, SamplingCallback cb, void *param) - { - const float PX_INSIDE = 1.0f/sqrtf(2.0f); - const float PX_OUTSIDE = -1.0f/sqrtf(2.0f); + bool drawAA(const Vector2 &extents, SamplingCallback cb, void *param) { + const float PX_INSIDE = 1.0f / sqrtf(2.0f); + const float PX_OUTSIDE = -1.0f / sqrtf(2.0f); const float BK_SIZE = 8; - const float BK_INSIDE = sqrtf(BK_SIZE*BK_SIZE/2.0f); - const float BK_OUTSIDE = -sqrtf(BK_SIZE*BK_SIZE/2.0f); + const float BK_INSIDE = sqrtf(BK_SIZE * BK_SIZE / 2.0f); + const float BK_OUTSIDE = -sqrtf(BK_SIZE * BK_SIZE / 2.0f); // Bounding rectangle float minx = floorf(max(min3(v1.x, v2.x, v3.x), 0.0f)); float miny = floorf(max(min3(v1.y, v2.y, v3.y), 0.0f)); - float maxx = ceilf( min(max3(v1.x, v2.x, v3.x), extents.x - 1.0f)); - float maxy = ceilf( min(max3(v1.y, v2.y, v3.y), extents.y - 1.0f)); + float maxx = ceilf(min(max3(v1.x, v2.x, v3.x), extents.x - 1.0f)); + float maxy = ceilf(min(max3(v1.y, v2.y, v3.y), extents.y - 1.0f)); // There's no reason to align the blocks to the viewport, instead we align them to the origin of the triangle bounds. minx = floorf(minx); miny = floorf(miny); @@ -5341,9 +4575,10 @@ struct Triangle float bC = C2 + n2.x * xc + n2.y * yc; float cC = C3 + n3.x * xc + n3.y * yc; // Skip block when outside an edge - if ( (aC <= BK_OUTSIDE) || (bC <= BK_OUTSIDE) || (cC <= BK_OUTSIDE) ) continue; + if ((aC <= BK_OUTSIDE) || (bC <= BK_OUTSIDE) || (cC <= BK_OUTSIDE)) + continue; // Accept whole block when totally covered - if ( (aC >= BK_INSIDE) && (bC >= BK_INSIDE) && (cC >= BK_INSIDE) ) { + if ((aC >= BK_INSIDE) && (bC >= BK_INSIDE) && (cC >= BK_INSIDE)) { for (float y = y0; y < y0 + BK_SIZE; y++) { for (float x = x0; x < x0 + BK_SIZE; x++) { if (!cb(param, (int)x, (int)y)) @@ -5386,10 +4621,9 @@ struct Triangle } private: - void flipBackface() - { + void flipBackface() { // check if triangle is backfacing, if so, swap two vertices - if ( ((v3.x - v1.x) * (v2.y - v1.y) - (v3.y - v1.y) * (v2.x - v1.x)) < 0 ) { + if (((v3.x - v1.x) * (v2.y - v1.y) - (v3.y - v1.y) * (v2.x - v1.x)) < 0) { Vector2 hv = v1; v1 = v2; v2 = hv; // swap pos @@ -5397,8 +4631,7 @@ private: } // compute unit inward normals for each edge. - void computeUnitInwardNormals() - { + void computeUnitInwardNormals() { n1 = v1 - v2; n1 = Vector2(-n1.y, n1.x); n1 = n1 * (1.0f / sqrtf(dot(n1, n1))); @@ -5416,8 +4649,7 @@ private: }; // Process the given triangle. Returns false if rasterization was interrupted by the callback. -static bool drawTriangle(const Vector2 &extents, const Vector2 v[3], SamplingCallback cb, void *param) -{ +static bool drawTriangle(const Vector2 &extents, const Vector2 v[3], SamplingCallback cb, void *param) { Triangle tri(v[0], v[1], v[2]); // @@ It would be nice to have a conservative drawing mode that enlarges the triangle extents by one texel and is able to handle degenerate triangles. // @@ Maybe the simplest thing to do would be raster triangle edges. @@ -5432,22 +4664,19 @@ namespace segment { // - Insertion is o(n) // - Smallest element goes at the end, so that popping it is o(1). -struct CostQueue -{ - CostQueue(uint32_t size = UINT32_MAX) : m_maxSize(size), m_pairs(MemTag::SegmentAtlasChartCandidates) {} +struct CostQueue { + CostQueue(uint32_t size = UINT32_MAX) : + m_maxSize(size), m_pairs(MemTag::SegmentAtlasChartCandidates) {} - float peekCost() const - { + float peekCost() const { return m_pairs.back().cost; } - uint32_t peekFace() const - { + uint32_t peekFace() const { return m_pairs.back().face; } - void push(float cost, uint32_t face) - { + void push(float cost, uint32_t face) { const Pair p = { cost, face }; if (m_pairs.isEmpty() || cost < peekCost()) m_pairs.push_back(p); @@ -5464,29 +4693,25 @@ struct CostQueue } } - uint32_t pop() - { + uint32_t pop() { XA_DEBUG_ASSERT(!m_pairs.isEmpty()); uint32_t f = m_pairs.back().face; m_pairs.pop_back(); return f; } - XA_INLINE void clear() - { + XA_INLINE void clear() { m_pairs.clear(); } - XA_INLINE uint32_t count() const - { + XA_INLINE uint32_t count() const { return m_pairs.size(); } private: const uint32_t m_maxSize; - struct Pair - { + struct Pair { float cost; uint32_t face; }; @@ -5494,25 +4719,27 @@ private: Array<Pair> m_pairs; }; -struct AtlasData -{ +struct AtlasData { ChartOptions options; const Mesh *mesh = nullptr; Array<float> edgeDihedralAngles; Array<float> edgeLengths; Array<float> faceAreas; + Array<float> faceUvAreas; // Can be negative. Array<Vector3> faceNormals; BitArray isFaceInChart; - AtlasData() : edgeDihedralAngles(MemTag::SegmentAtlasMeshData), edgeLengths(MemTag::SegmentAtlasMeshData), faceAreas(MemTag::SegmentAtlasMeshData), faceNormals(MemTag::SegmentAtlasMeshData) {} + AtlasData() : + edgeDihedralAngles(MemTag::SegmentAtlasMeshData), edgeLengths(MemTag::SegmentAtlasMeshData), faceAreas(MemTag::SegmentAtlasMeshData), faceNormals(MemTag::SegmentAtlasMeshData) {} - void compute() - { + void compute() { const uint32_t faceCount = mesh->faceCount(); const uint32_t edgeCount = mesh->edgeCount(); edgeDihedralAngles.resize(edgeCount); edgeLengths.resize(edgeCount); faceAreas.resize(faceCount); + if (options.useInputMeshUvs) + faceUvAreas.resize(faceCount); faceNormals.resize(faceCount); isFaceInChart.resize(faceCount); isFaceInChart.zeroOutMemory(); @@ -5526,6 +4753,8 @@ struct AtlasData } faceAreas[f] = mesh->computeFaceArea(f); XA_DEBUG_ASSERT(faceAreas[f] > 0.0f); + if (options.useInputMeshUvs) + faceUvAreas[f] = mesh->computeFaceParametricArea(f); faceNormals[f] = mesh->computeFaceNormal(f); } for (uint32_t face = 0; face < faceCount; face++) { @@ -5543,19 +4772,109 @@ struct AtlasData } }; +// If MeshDecl::vertexUvData is set on input meshes, find charts by floodfilling faces in world/model space without crossing UV seams. +struct OriginalUvCharts { + OriginalUvCharts(AtlasData &data) : + m_data(data) {} + uint32_t chartCount() const { return m_charts.size(); } + const Basis &chartBasis(uint32_t chartIndex) const { return m_chartBasis[chartIndex]; } + + ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const { + const Chart &chart = m_charts[chartIndex]; + return ConstArrayView<uint32_t>(&m_chartFaces[chart.firstFace], chart.faceCount); + } + + void compute() { + m_charts.clear(); + m_chartFaces.clear(); + const Mesh *mesh = m_data.mesh; + const uint32_t faceCount = mesh->faceCount(); + for (uint32_t f = 0; f < faceCount; f++) { + if (m_data.isFaceInChart.get(f)) + continue; + if (isZero(m_data.faceUvAreas[f], kAreaEpsilon)) + continue; // Face must have valid UVs. + // Found an unassigned face, create a new chart. + Chart chart; + chart.firstFace = m_chartFaces.size(); + chart.faceCount = 1; + m_chartFaces.push_back(f); + m_data.isFaceInChart.set(f); + floodfillFaces(chart); + m_charts.push_back(chart); + } + // Compute basis for each chart. + m_chartBasis.resize(m_charts.size()); + for (uint32_t c = 0; c < m_charts.size(); c++) { + const Chart &chart = m_charts[c]; + m_tempPoints.resize(chart.faceCount * 3); + for (uint32_t f = 0; f < chart.faceCount; f++) { + const uint32_t face = m_chartFaces[chart.firstFace + f]; + for (uint32_t i = 0; i < 3; i++) + m_tempPoints[f * 3 + i] = m_data.mesh->position(m_data.mesh->vertexAt(face * 3 + i)); + } + Fit::computeBasis(m_tempPoints, &m_chartBasis[c]); + } + } + +private: + struct Chart { + uint32_t firstFace, faceCount; + }; + + void floodfillFaces(Chart &chart) { + const bool isFaceAreaNegative = m_data.faceUvAreas[m_chartFaces[chart.firstFace]] < 0.0f; + for (;;) { + bool newFaceAdded = false; + const uint32_t faceCount = chart.faceCount; + for (uint32_t f = 0; f < faceCount; f++) { + const uint32_t sourceFace = m_chartFaces[chart.firstFace + f]; + for (Mesh::FaceEdgeIterator edgeIt(m_data.mesh, sourceFace); !edgeIt.isDone(); edgeIt.advance()) { + const uint32_t face = edgeIt.oppositeFace(); + if (face == UINT32_MAX) + continue; // Boundary edge. + if (m_data.isFaceInChart.get(face)) + continue; // Already assigned to a chart. + if (isZero(m_data.faceUvAreas[face], kAreaEpsilon)) + continue; // Face must have valid UVs. + if ((m_data.faceUvAreas[face] < 0.0f) != isFaceAreaNegative) + continue; // Face winding is opposite of the first chart face. + const Vector2 &uv0 = m_data.mesh->texcoord(edgeIt.vertex0()); + const Vector2 &uv1 = m_data.mesh->texcoord(edgeIt.vertex1()); + const Vector2 &ouv0 = m_data.mesh->texcoord(m_data.mesh->vertexAt(meshEdgeIndex0(edgeIt.oppositeEdge()))); + const Vector2 &ouv1 = m_data.mesh->texcoord(m_data.mesh->vertexAt(meshEdgeIndex1(edgeIt.oppositeEdge()))); + if (!equal(uv0, ouv1, m_data.mesh->epsilon()) || !equal(uv1, ouv0, m_data.mesh->epsilon())) + continue; // UVs must match exactly. + m_chartFaces.push_back(face); + chart.faceCount++; + m_data.isFaceInChart.set(face); + newFaceAdded = true; + } + } + if (!newFaceAdded) + break; + } + } + + AtlasData &m_data; + Array<Chart> m_charts; + Array<Basis> m_chartBasis; + Array<uint32_t> m_chartFaces; + Array<Vector3> m_tempPoints; +}; + #if XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS static uint32_t s_planarRegionsCurrentRegion; static uint32_t s_planarRegionsCurrentVertex; #endif -struct PlanarCharts -{ - PlanarCharts(AtlasData &data) : m_data(data), m_nextRegionFace(MemTag::SegmentAtlasPlanarRegions), m_faceToRegionId(MemTag::SegmentAtlasPlanarRegions) {} +struct PlanarCharts { + PlanarCharts(AtlasData &data) : + m_data(data), m_nextRegionFace(MemTag::SegmentAtlasPlanarRegions), m_faceToRegionId(MemTag::SegmentAtlasPlanarRegions) {} const Basis &chartBasis(uint32_t chartIndex) const { return m_chartBasis[chartIndex]; } uint32_t chartCount() const { return m_charts.size(); } - - ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const - { + + ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const { const Chart &chart = m_charts[chartIndex]; return ConstArrayView<uint32_t>(&m_chartFaces[chart.firstFace], chart.faceCount); } @@ -5564,8 +4883,7 @@ struct PlanarCharts uint32_t nextRegionFace(uint32_t face) const { return m_nextRegionFace[face]; } float regionArea(uint32_t region) const { return m_regionAreas[region]; } - void compute() - { + void compute() { const uint32_t faceCount = m_data.mesh->faceCount(); // Precompute regions of coplanar incident faces. m_regionFirstFace.clear(); @@ -5581,6 +4899,8 @@ struct PlanarCharts for (uint32_t f = 0; f < faceCount; f++) { if (m_nextRegionFace[f] != f) continue; // Already assigned. + if (m_data.isFaceInChart.get(f)) + continue; // Already in a chart. faceStack.clear(); faceStack.push_back(f); for (;;) { @@ -5595,6 +4915,8 @@ struct PlanarCharts continue; if (m_nextRegionFace[oface] != oface) continue; // Already assigned. + if (m_data.isFaceInChart.get(oface)) + continue; // Already in a chart. if (!equal(dot(m_data.faceNormals[face], m_data.faceNormals[oface]), 1.0f, kEpsilon)) continue; // Not coplanar. const uint32_t next = m_nextRegionFace[face]; @@ -5632,8 +4954,11 @@ struct PlanarCharts // Precompute planar region areas. m_regionAreas.resize(regionCount); m_regionAreas.zeroOutMemory(); - for (uint32_t f = 0; f < faceCount; f++) + for (uint32_t f = 0; f < faceCount; f++) { + if (m_faceToRegionId[f] == UINT32_MAX) + continue; m_regionAreas[m_faceToRegionId[f]] += m_data.faceAreas[f]; + } // Create charts from suitable planar regions. // The dihedral angle of all boundary edges must be >= 90 degrees. m_charts.clear(); @@ -5658,8 +4983,7 @@ struct PlanarCharts if (!createChart) break; face = m_nextRegionFace[face]; - } - while (face != firstRegionFace); + } while (face != firstRegionFace); // Create a chart. if (createChart) { Chart chart; @@ -5671,15 +4995,13 @@ struct PlanarCharts m_chartFaces.push_back(face); chart.faceCount++; face = m_nextRegionFace[face]; - } - while (face != firstRegionFace); + } while (face != firstRegionFace); m_charts.push_back(chart); } } // Compute basis for each chart using the first face normal (all faces have the same normal). m_chartBasis.resize(m_charts.size()); - for (uint32_t c = 0; c < m_charts.size(); c++) - { + for (uint32_t c = 0; c < m_charts.size(); c++) { const uint32_t face = m_chartFaces[m_charts[c].firstFace]; Basis &basis = m_chartBasis[c]; basis.normal = m_data.faceNormals[face]; @@ -5689,8 +5011,7 @@ struct PlanarCharts } private: - struct Chart - { + struct Chart { uint32_t firstFace, faceCount; }; @@ -5704,12 +5025,11 @@ private: Array<Basis> m_chartBasis; }; -struct ClusteredCharts -{ - ClusteredCharts(AtlasData &data, const PlanarCharts &planarCharts) : m_data(data), m_planarCharts(planarCharts), m_texcoords(MemTag::SegmentAtlasMeshData), m_bestTriangles(10), m_placingSeeds(false) {} +struct ClusteredCharts { + ClusteredCharts(AtlasData &data, const PlanarCharts &planarCharts) : + m_data(data), m_planarCharts(planarCharts), m_texcoords(MemTag::SegmentAtlasMeshData), m_bestTriangles(10), m_placingSeeds(false) {} - ~ClusteredCharts() - { + ~ClusteredCharts() { const uint32_t chartCount = m_charts.size(); for (uint32_t i = 0; i < chartCount; i++) { m_charts[i]->~Chart(); @@ -5721,8 +5041,7 @@ struct ClusteredCharts ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const { return m_charts[chartIndex]->faces; } const Basis &chartBasis(uint32_t chartIndex) const { return m_charts[chartIndex]->basis; } - void compute() - { + void compute() { const uint32_t faceCount = m_data.mesh->faceCount(); m_facesLeft = 0; for (uint32_t i = 0; i < faceCount; i++) { @@ -5768,9 +5087,9 @@ struct ClusteredCharts } private: - struct Chart - { - Chart() : faces(MemTag::SegmentAtlasChartFaces) {} + struct Chart { + Chart() : + faces(MemTag::SegmentAtlasChartFaces) {} int id = -1; Basis basis; // Best fit normal. @@ -5784,8 +5103,7 @@ private: uint32_t seed; }; - void placeSeeds(float threshold) - { + void placeSeeds(float threshold) { XA_PROFILE_START(clusteredChartsPlaceSeeds) m_placingSeeds = true; // Instead of using a predefiened number of seeds: @@ -5801,8 +5119,7 @@ private: } // Returns true if any of the charts can grow more. - void growCharts(float threshold) - { + void growCharts(float threshold) { XA_PROFILE_START(clusteredChartsGrow) for (;;) { if (m_facesLeft == 0) @@ -5848,8 +5165,7 @@ private: XA_PROFILE_END(clusteredChartsGrow) } - void resetCharts() - { + void resetCharts() { XA_PROFILE_START(clusteredChartsReset) const uint32_t faceCount = m_data.mesh->faceCount(); for (uint32_t i = 0; i < faceCount; i++) { @@ -5880,8 +5196,7 @@ private: XA_PROFILE_END(clusteredChartsReset) } - bool relocateSeeds() - { + bool relocateSeeds() { XA_PROFILE_START(clusteredChartsRelocateSeeds) bool anySeedChanged = false; const uint32_t chartCount = m_charts.size(); @@ -5894,8 +5209,7 @@ private: return anySeedChanged; } - void fillHoles(float threshold) - { + void fillHoles(float threshold) { XA_PROFILE_START(clusteredChartsFillHoles) while (m_facesLeft > 0) createChart(threshold); @@ -5903,8 +5217,7 @@ private: } #if XA_MERGE_CHARTS - void mergeCharts() - { + void mergeCharts() { XA_PROFILE_START(clusteredChartsMerge) const uint32_t chartCount = m_charts.size(); // Merge charts progressively until there's none left to merge. @@ -5964,7 +5277,7 @@ private: // Merge if chart2 has a single face. // chart1 must have more than 1 face. // chart2 area must be <= 10% of chart1 area. - if (m_sharedBoundaryLengthsNoSeams[cc] > 0.0f && chart->faces.size() > 1 && chart2->faces.size() == 1 && chart2->area <= chart->area * 0.1f) + if (m_sharedBoundaryLengthsNoSeams[cc] > 0.0f && chart->faces.size() > 1 && chart2->faces.size() == 1 && chart2->area <= chart->area * 0.1f) goto merge; // Merge if chart2 has two faces (probably a quad), and chart1 bounds at least 2 of its edges. if (chart2->faces.size() == 2 && m_sharedBoundaryEdgeCountNoSeams[cc] >= 2) @@ -5972,8 +5285,8 @@ private: // Merge if chart2 is wholely inside chart1, ignoring seams. if (m_sharedBoundaryLengthsNoSeams[cc] > 0.0f && equal(m_sharedBoundaryLengthsNoSeams[cc], chart2->boundaryLength, kEpsilon)) goto merge; - if (m_sharedBoundaryLengths[cc] > 0.2f * max(0.0f, chart->boundaryLength - externalBoundaryLength) || - m_sharedBoundaryLengths[cc] > 0.75f * chart2->boundaryLength) + if (m_sharedBoundaryLengths[cc] > 0.2f * max(0.0f, chart->boundaryLength - externalBoundaryLength) || + m_sharedBoundaryLengths[cc] > 0.75f * chart2->boundaryLength) goto merge; continue; merge: @@ -6011,8 +5324,7 @@ private: #endif private: - void createChart(float threshold) - { + void createChart(float threshold) { Chart *chart = XA_NEW(MemTag::Default, Chart); chart->id = (int)m_charts.size(); m_charts.push_back(chart); @@ -6043,15 +5355,13 @@ private: } } - bool isChartBoundaryEdge(const Chart *chart, uint32_t edge) const - { + bool isChartBoundaryEdge(const Chart *chart, uint32_t edge) const { const uint32_t oppositeEdge = m_data.mesh->oppositeEdge(edge); const uint32_t oppositeFace = meshEdgeFace(oppositeEdge); return oppositeEdge == UINT32_MAX || m_faceCharts[oppositeFace] != chart->id; } - bool computeChartBasis(Chart *chart, Basis *basis) - { + bool computeChartBasis(Chart *chart, Basis *basis) { const uint32_t faceCount = chart->faces.size(); m_tempPoints.resize(chart->faces.size() * 3); for (uint32_t i = 0; i < faceCount; i++) { @@ -6059,11 +5369,10 @@ private: for (uint32_t j = 0; j < 3; j++) m_tempPoints[i * 3 + j] = m_data.mesh->position(m_data.mesh->vertexAt(f * 3 + j)); } - return Fit::computeBasis(m_tempPoints.data(), m_tempPoints.size(), basis); + return Fit::computeBasis(m_tempPoints, basis); } - bool isFaceFlipped(uint32_t face) const - { + bool isFaceFlipped(uint32_t face) const { const Vector2 &v1 = m_texcoords[face * 3 + 0]; const Vector2 &v2 = m_texcoords[face * 3 + 1]; const Vector2 &v3 = m_texcoords[face * 3 + 2]; @@ -6071,8 +5380,7 @@ private: return parametricArea < 0.0f; } - void parameterizeChart(const Chart *chart) - { + void parameterizeChart(const Chart *chart) { const uint32_t faceCount = chart->faces.size(); for (uint32_t i = 0; i < faceCount; i++) { const uint32_t face = chart->faces[i]; @@ -6085,8 +5393,7 @@ private: } // m_faceCharts for the chart faces must be set to the chart ID. Needed to compute boundary edges. - bool isChartParameterizationValid(const Chart *chart) - { + bool isChartParameterizationValid(const Chart *chart) { const uint32_t faceCount = chart->faces.size(); // Check for flipped faces in the parameterization. OK if all are flipped. uint32_t flippedFaceCount = 0; @@ -6099,7 +5406,7 @@ private: // Check for boundary intersection in the parameterization. XA_PROFILE_START(clusteredChartsPlaceSeedsBoundaryIntersection) XA_PROFILE_START(clusteredChartsGrowBoundaryIntersection) - m_boundaryGrid.reset(m_texcoords.data()); + m_boundaryGrid.reset(m_texcoords); for (uint32_t i = 0; i < faceCount; i++) { const uint32_t f = chart->faces[i]; for (uint32_t j = 0; j < 3; j++) { @@ -6120,15 +5427,14 @@ private: return true; } - bool addFaceToChart(Chart *chart, uint32_t face) - { + bool addFaceToChart(Chart *chart, uint32_t face) { XA_DEBUG_ASSERT(!m_data.isFaceInChart.get(face)); const uint32_t oldFaceCount = chart->faces.size(); const bool firstFace = oldFaceCount == 0; // Append the face and any coplanar connected faces to the chart faces array. chart->faces.push_back(face); uint32_t coplanarFace = m_planarCharts.nextRegionFace(face); - while (coplanarFace != face) { + while (coplanarFace != face) { XA_DEBUG_ASSERT(!m_data.isFaceInChart.get(coplanarFace)); chart->faces.push_back(coplanarFace); coplanarFace = m_planarCharts.nextRegionFace(coplanarFace); @@ -6140,7 +5446,7 @@ private: // Use the first face normal. // Use any edge as the tangent vector. basis.normal = m_data.faceNormals[face]; - basis.tangent = normalize(m_data.mesh->position(m_data.mesh->vertexAt(face * 3 + 0)) - m_data.mesh->position(m_data.mesh->vertexAt(face * 3 + 1)), kEpsilon); + basis.tangent = normalize(m_data.mesh->position(m_data.mesh->vertexAt(face * 3 + 0)) - m_data.mesh->position(m_data.mesh->vertexAt(face * 3 + 1))); basis.bitangent = cross(basis.normal, basis.tangent); } else { // Use best fit normal. @@ -6199,8 +5505,7 @@ private: } // Returns true if the seed has changed. - bool relocateSeed(Chart *chart) - { + bool relocateSeed(Chart *chart) { // Find the first N triangles that fit the proxy best. const uint32_t faceCount = chart->faces.size(); m_bestTriangles.clear(); @@ -6230,8 +5535,7 @@ private: } // Cost is combined metrics * weights. - float computeCost(Chart *chart, uint32_t face) const - { + float computeCost(Chart *chart, uint32_t face) const { // Estimate boundary length and area: const float newChartArea = computeArea(chart, face); const float newBoundaryLength = computeBoundaryLength(chart, face); @@ -6267,28 +5571,25 @@ private: // Returns a value in [0-1]. // 0 if face normal is coplanar to the chart's best fit normal. // 1 if face normal is perpendicular. - float computeNormalDeviationMetric(Chart *chart, uint32_t face) const - { + float computeNormalDeviationMetric(Chart *chart, uint32_t face) const { // All faces in coplanar regions have the same normal, can use any face. const Vector3 faceNormal = m_data.faceNormals[face]; // Use plane fitting metric for now: return min(1.0f - dot(faceNormal, chart->basis.normal), 1.0f); // @@ normal deviations should be weighted by face area } - float computeRoundnessMetric(Chart *chart, float newBoundaryLength, float newChartArea) const - { + float computeRoundnessMetric(Chart *chart, float newBoundaryLength, float newChartArea) const { const float oldRoundness = square(chart->boundaryLength) / chart->area; const float newRoundness = square(newBoundaryLength) / newChartArea; return 1.0f - oldRoundness / newRoundness; } - float computeStraightnessMetric(Chart *chart, uint32_t firstFace) const - { + float computeStraightnessMetric(Chart *chart, uint32_t firstFace) const { float l_out = 0.0f; // Length of firstFace planar region boundary that doesn't border the chart. float l_in = 0.0f; // Length that does border the chart. const uint32_t planarRegionId = m_planarCharts.regionIdFromFace(firstFace); uint32_t face = firstFace; - for (;;) { + for (;;) { for (Mesh::FaceEdgeIterator it(m_data.mesh, face); !it.isDone(); it.advance()) { const float l = m_data.edgeLengths[it.edge()]; if (it.isBoundary()) { @@ -6305,7 +5606,6 @@ private: break; } #if 1 - XA_DEBUG_ASSERT(l_in != 0.0f); // Candidate face must be adjacent to chart. @@ This is not true if the input mesh has zero-length edges. float ratio = (l_out - l_in) / (l_out + l_in); return min(ratio, 0.0f); // Only use the straightness metric to close gaps. #else @@ -6313,8 +5613,7 @@ private: #endif } - bool isNormalSeam(uint32_t edge) const - { + bool isNormalSeam(uint32_t edge) const { const uint32_t oppositeEdge = m_data.mesh->oppositeEdge(edge); if (oppositeEdge == UINT32_MAX) return false; // boundary edge @@ -6334,11 +5633,10 @@ private: return !equal(m_data.faceNormals[f0], m_data.faceNormals[f1], kNormalEpsilon); } - float computeNormalSeamMetric(Chart *chart, uint32_t firstFace) const - { + float computeNormalSeamMetric(Chart *chart, uint32_t firstFace) const { float seamFactor = 0.0f, totalLength = 0.0f; uint32_t face = firstFace; - for (;;) { + for (;;) { for (Mesh::FaceEdgeIterator it(m_data.mesh, face); !it.isDone(); it.advance()) { if (it.isBoundary()) continue; @@ -6375,11 +5673,10 @@ private: return seamFactor / totalLength; } - float computeTextureSeamMetric(Chart *chart, uint32_t firstFace) const - { + float computeTextureSeamMetric(Chart *chart, uint32_t firstFace) const { float seamLength = 0.0f, totalLength = 0.0f; uint32_t face = firstFace; - for (;;) { + for (;;) { for (Mesh::FaceEdgeIterator it(m_data.mesh, face); !it.isDone(); it.advance()) { if (it.isBoundary()) continue; @@ -6402,11 +5699,10 @@ private: return seamLength / totalLength; } - float computeArea(Chart *chart, uint32_t firstFace) const - { + float computeArea(Chart *chart, uint32_t firstFace) const { float area = chart->area; uint32_t face = firstFace; - for (;;) { + for (;;) { area += m_data.faceAreas[face]; face = m_planarCharts.nextRegionFace(face); if (face == firstFace) @@ -6415,13 +5711,12 @@ private: return area; } - float computeBoundaryLength(Chart *chart, uint32_t firstFace) const - { + float computeBoundaryLength(Chart *chart, uint32_t firstFace) const { float boundaryLength = chart->boundaryLength; // Add new edges, subtract edges shared with the chart. const uint32_t planarRegionId = m_planarCharts.regionIdFromFace(firstFace); uint32_t face = firstFace; - for (;;) { + for (;;) { for (Mesh::FaceEdgeIterator it(m_data.mesh, face); !it.isDone(); it.advance()) { const float edgeLength = m_data.edgeLengths[it.edge()]; if (it.isBoundary()) { @@ -6437,11 +5732,10 @@ private: if (face == firstFace) break; } - return max(0.0f, boundaryLength); // @@ Hack! + return max(0.0f, boundaryLength); // @@ Hack! } - bool mergeChart(Chart *owner, Chart *chart, float sharedBoundaryLength) - { + bool mergeChart(Chart *owner, Chart *chart, float sharedBoundaryLength) { const uint32_t oldOwnerFaceCount = owner->faces.size(); const uint32_t chartFaceCount = chart->faces.size(); owner->faces.push_back(chart->faces); @@ -6499,33 +5793,53 @@ private: bool m_placingSeeds; }; -struct Atlas -{ - Atlas() : m_planarCharts(m_data), m_clusteredCharts(m_data, m_planarCharts) {} +struct ChartGeneratorType { + enum Enum { + OriginalUv, + Planar, + Clustered, + Piecewise + }; +}; - uint32_t chartCount() const - { - return m_planarCharts.chartCount() + m_clusteredCharts.chartCount(); +struct Atlas { + Atlas() : + m_originalUvCharts(m_data), m_planarCharts(m_data), m_clusteredCharts(m_data, m_planarCharts) {} + + uint32_t chartCount() const { + return m_originalUvCharts.chartCount() + m_planarCharts.chartCount() + m_clusteredCharts.chartCount(); } - ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const - { + ConstArrayView<uint32_t> chartFaces(uint32_t chartIndex) const { + if (chartIndex < m_originalUvCharts.chartCount()) + return m_originalUvCharts.chartFaces(chartIndex); + chartIndex -= m_originalUvCharts.chartCount(); if (chartIndex < m_planarCharts.chartCount()) return m_planarCharts.chartFaces(chartIndex); chartIndex -= m_planarCharts.chartCount(); return m_clusteredCharts.chartFaces(chartIndex); } - const Basis &chartBasis(uint32_t chartIndex) const - { + const Basis &chartBasis(uint32_t chartIndex) const { + if (chartIndex < m_originalUvCharts.chartCount()) + return m_originalUvCharts.chartBasis(chartIndex); + chartIndex -= m_originalUvCharts.chartCount(); if (chartIndex < m_planarCharts.chartCount()) return m_planarCharts.chartBasis(chartIndex); chartIndex -= m_planarCharts.chartCount(); return m_clusteredCharts.chartBasis(chartIndex); } - void reset(const Mesh *mesh, const ChartOptions &options) - { + ChartGeneratorType::Enum chartGeneratorType(uint32_t chartIndex) const { + if (chartIndex < m_originalUvCharts.chartCount()) + return ChartGeneratorType::OriginalUv; + chartIndex -= m_originalUvCharts.chartCount(); + if (chartIndex < m_planarCharts.chartCount()) + return ChartGeneratorType::Planar; + return ChartGeneratorType::Clustered; + } + + void reset(const Mesh *mesh, const ChartOptions &options) { XA_PROFILE_START(buildAtlasInit) m_data.options = options; m_data.mesh = mesh; @@ -6533,8 +5847,12 @@ struct Atlas XA_PROFILE_END(buildAtlasInit) } - void compute() - { + void compute() { + if (m_data.options.useInputMeshUvs) { + XA_PROFILE_START(originalUvCharts) + m_originalUvCharts.compute(); + XA_PROFILE_END(originalUvCharts) + } XA_PROFILE_START(planarCharts) m_planarCharts.compute(); XA_PROFILE_END(planarCharts) @@ -6545,17 +5863,143 @@ struct Atlas private: AtlasData m_data; + OriginalUvCharts m_originalUvCharts; PlanarCharts m_planarCharts; ClusteredCharts m_clusteredCharts; }; +struct ComputeUvMeshChartsTaskArgs { + UvMesh *mesh; + Progress *progress; +}; + +// Charts are found by floodfilling faces without crossing UV seams. +struct ComputeUvMeshChartsTask { + ComputeUvMeshChartsTask(ComputeUvMeshChartsTaskArgs *args) : + m_mesh(args->mesh), m_progress(args->progress), m_uvToEdgeMap(MemTag::Default, m_mesh->indices.size()), m_faceAssigned(m_mesh->indices.size() / 3) {} + + void run() { + const uint32_t vertexCount = m_mesh->texcoords.size(); + const uint32_t indexCount = m_mesh->indices.size(); + const uint32_t faceCount = indexCount / 3; + // A vertex can only be assigned to one chart. + m_mesh->vertexToChartMap.resize(vertexCount); + m_mesh->vertexToChartMap.fill(UINT32_MAX); + // Map vertex UV to edge. Face is then edge / 3. + for (uint32_t i = 0; i < indexCount; i++) + m_uvToEdgeMap.add(m_mesh->texcoords[m_mesh->indices[i]]); + // Find charts. + m_faceAssigned.zeroOutMemory(); + for (uint32_t f = 0; f < faceCount; f++) { + if (m_progress->cancel) + return; + m_progress->increment(1); + // Found an unassigned face, see if it can be added. + const uint32_t chartIndex = m_mesh->charts.size(); + if (!canAddFaceToChart(chartIndex, f)) + continue; + // Face is OK, create a new chart with the face. + UvMeshChart *chart = XA_NEW(MemTag::Default, UvMeshChart); + m_mesh->charts.push_back(chart); + chart->material = m_mesh->faceMaterials.isEmpty() ? 0 : m_mesh->faceMaterials[f]; + addFaceToChart(chartIndex, f); + // Walk incident faces and assign them to the chart. + uint32_t f2 = 0; + for (;;) { + bool newFaceAssigned = false; + const uint32_t faceCount2 = chart->faces.size(); + for (; f2 < faceCount2; f2++) { + const uint32_t face = chart->faces[f2]; + for (uint32_t i = 0; i < 3; i++) { + // Add any valid faces with colocal UVs to the chart. + const Vector2 &uv = m_mesh->texcoords[m_mesh->indices[face * 3 + i]]; + uint32_t edge = m_uvToEdgeMap.get(uv); + while (edge != UINT32_MAX) { + const uint32_t newFace = edge / 3; + if (canAddFaceToChart(chartIndex, newFace)) { + addFaceToChart(chartIndex, newFace); + newFaceAssigned = true; + } + edge = m_uvToEdgeMap.getNext(uv, edge); + } + } + } + if (!newFaceAssigned) + break; + } + } + } + +private: + // The chart at chartIndex doesn't have to exist yet. + bool canAddFaceToChart(uint32_t chartIndex, uint32_t face) const { + if (m_faceAssigned.get(face)) + return false; // Already assigned to a chart. + if (m_mesh->faceIgnore.get(face)) + return false; // Face is ignored (zero area or nan UVs). + if (!m_mesh->faceMaterials.isEmpty() && chartIndex < m_mesh->charts.size()) { + if (m_mesh->faceMaterials[face] != m_mesh->charts[chartIndex]->material) + return false; // Materials don't match. + } + for (uint32_t i = 0; i < 3; i++) { + const uint32_t vertex = m_mesh->indices[face * 3 + i]; + if (m_mesh->vertexToChartMap[vertex] != UINT32_MAX && m_mesh->vertexToChartMap[vertex] != chartIndex) + return false; // Vertex already assigned to another chart. + } + return true; + } + + void addFaceToChart(uint32_t chartIndex, uint32_t face) { + UvMeshChart *chart = m_mesh->charts[chartIndex]; + m_faceAssigned.set(face); + chart->faces.push_back(face); + for (uint32_t i = 0; i < 3; i++) { + const uint32_t vertex = m_mesh->indices[face * 3 + i]; + m_mesh->vertexToChartMap[vertex] = chartIndex; + chart->indices.push_back(vertex); + } + } + + UvMesh *const m_mesh; + Progress *const m_progress; + HashMap<Vector2> m_uvToEdgeMap; // Face is edge / 3. + BitArray m_faceAssigned; +}; + +static void runComputeUvMeshChartsTask(void * /*groupUserData*/, void *taskUserData) { + XA_PROFILE_START(computeChartsThread) + ComputeUvMeshChartsTask task((ComputeUvMeshChartsTaskArgs *)taskUserData); + task.run(); + XA_PROFILE_END(computeChartsThread) +} + +static bool computeUvMeshCharts(TaskScheduler *taskScheduler, ArrayView<UvMesh *> meshes, ProgressFunc progressFunc, void *progressUserData) { + uint32_t totalFaceCount = 0; + for (uint32_t i = 0; i < meshes.length; i++) + totalFaceCount += meshes[i]->indices.size() / 3; + Progress progress(ProgressCategory::ComputeCharts, progressFunc, progressUserData, totalFaceCount); + TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(nullptr, meshes.length); + Array<ComputeUvMeshChartsTaskArgs> taskArgs; + taskArgs.resize(meshes.length); + for (uint32_t i = 0; i < meshes.length; i++) { + ComputeUvMeshChartsTaskArgs &args = taskArgs[i]; + args.mesh = meshes[i]; + args.progress = &progress; + Task task; + task.userData = &args; + task.func = runComputeUvMeshChartsTask; + taskScheduler->run(taskGroup, task); + } + taskScheduler->wait(&taskGroup); + return !progress.cancel; +} + } // namespace segment namespace param { // Fast sweep in 3 directions -static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b) -{ +static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b) { XA_DEBUG_ASSERT(a != nullptr); XA_DEBUG_ASSERT(b != nullptr); const uint32_t vertexCount = mesh->vertexCount(); @@ -6612,10 +6056,9 @@ static bool findApproximateDiameterVertices(Mesh *mesh, uint32_t *a, uint32_t *b // From OpenNL LSCM example. // Computes the coordinates of the vertices of a triangle in a local 2D orthonormal basis of the triangle's plane. -static void projectTriangle(Vector3 p0, Vector3 p1, Vector3 p2, Vector2 *z0, Vector2 *z1, Vector2 *z2, float epsilon) -{ - Vector3 X = normalize(p1 - p0, epsilon); - Vector3 Z = normalize(cross(X, p2 - p0), epsilon); +static void projectTriangle(Vector3 p0, Vector3 p1, Vector3 p2, Vector2 *z0, Vector2 *z1, Vector2 *z2) { + Vector3 X = normalize(p1 - p0); + Vector3 Z = normalize(cross(X, p2 - p0)); Vector3 Y = cross(Z, X); Vector3 &O = p0; *z0 = Vector2(0, 0); @@ -6623,8 +6066,83 @@ static void projectTriangle(Vector3 p0, Vector3 p1, Vector3 p2, Vector2 *z0, Vec *z2 = Vector2(dot(p2 - O, X), dot(p2 - O, Y)); } -static bool computeLeastSquaresConformalMap(Mesh *mesh) -{ +// Conformal relations from Brecht Van Lommel (based on ABF): + +static float vec_angle_cos(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3) { + Vector3 d1 = v1 - v2; + Vector3 d2 = v3 - v2; + return clamp(dot(d1, d2) / (length(d1) * length(d2)), -1.0f, 1.0f); +} + +static float vec_angle(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3) { + float dot = vec_angle_cos(v1, v2, v3); + return acosf(dot); +} + +static void triangle_angles(const Vector3 &v1, const Vector3 &v2, const Vector3 &v3, float *a1, float *a2, float *a3) { + *a1 = vec_angle(v3, v1, v2); + *a2 = vec_angle(v1, v2, v3); + *a3 = kPi - *a2 - *a1; +} + +static bool setup_abf_relations(opennl::NLContext *context, int id0, int id1, int id2, const Vector3 &p0, const Vector3 &p1, const Vector3 &p2) { + // @@ IC: Wouldn't it be more accurate to return cos and compute 1-cos^2? + // It does indeed seem to be a little bit more robust. + // @@ Need to revisit this more carefully! + float a0, a1, a2; + triangle_angles(p0, p1, p2, &a0, &a1, &a2); + if (a0 == 0.0f || a1 == 0.0f || a2 == 0.0f) + return false; + float s0 = sinf(a0); + float s1 = sinf(a1); + float s2 = sinf(a2); + if (s1 > s0 && s1 > s2) { + swap(s1, s2); + swap(s0, s1); + swap(a1, a2); + swap(a0, a1); + swap(id1, id2); + swap(id0, id1); + } else if (s0 > s1 && s0 > s2) { + swap(s0, s2); + swap(s0, s1); + swap(a0, a2); + swap(a0, a1); + swap(id0, id2); + swap(id0, id1); + } + float c0 = cosf(a0); + float ratio = (s2 == 0.0f) ? 1.0f : s1 / s2; + float cosine = c0 * ratio; + float sine = s0 * ratio; + // Note : 2*id + 0 --> u + // 2*id + 1 --> v + int u0_id = 2 * id0 + 0; + int v0_id = 2 * id0 + 1; + int u1_id = 2 * id1 + 0; + int v1_id = 2 * id1 + 1; + int u2_id = 2 * id2 + 0; + int v2_id = 2 * id2 + 1; + // Real part + opennl::nlBegin(context, NL_ROW); + opennl::nlCoefficient(context, u0_id, cosine - 1.0f); + opennl::nlCoefficient(context, v0_id, -sine); + opennl::nlCoefficient(context, u1_id, -cosine); + opennl::nlCoefficient(context, v1_id, sine); + opennl::nlCoefficient(context, u2_id, 1); + opennl::nlEnd(context, NL_ROW); + // Imaginary part + opennl::nlBegin(context, NL_ROW); + opennl::nlCoefficient(context, u0_id, sine); + opennl::nlCoefficient(context, v0_id, cosine - 1.0f); + opennl::nlCoefficient(context, u1_id, -sine); + opennl::nlCoefficient(context, v1_id, -cosine); + opennl::nlCoefficient(context, v2_id, 1); + opennl::nlEnd(context, NL_ROW); + return true; +} + +static bool computeLeastSquaresConformalMap(Mesh *mesh) { uint32_t lockedVertex0, lockedVertex1; if (!findApproximateDiameterVertices(mesh, &lockedVertex0, &lockedVertex1)) { // Mesh has no boundaries. @@ -6635,55 +6153,57 @@ static bool computeLeastSquaresConformalMap(Mesh *mesh) opennl::nlSolverParameteri(context, NL_NB_VARIABLES, int(2 * vertexCount)); opennl::nlSolverParameteri(context, NL_MAX_ITERATIONS, int(5 * vertexCount)); opennl::nlBegin(context, NL_SYSTEM); - const Vector2 *texcoords = mesh->texcoords(); + ArrayView<Vector2> texcoords = mesh->texcoords(); for (uint32_t i = 0; i < vertexCount; i++) { opennl::nlSetVariable(context, 2 * i, texcoords[i].x); opennl::nlSetVariable(context, 2 * i + 1, texcoords[i].y); if (i == lockedVertex0 || i == lockedVertex1) { opennl::nlLockVariable(context, 2 * i); opennl::nlLockVariable(context, 2 * i + 1); - } + } } opennl::nlBegin(context, NL_MATRIX); const uint32_t faceCount = mesh->faceCount(); - const Vector3 *positions = mesh->positions(); - const uint32_t *indices = mesh->indices(); + ConstArrayView<Vector3> positions = mesh->positions(); + ConstArrayView<uint32_t> indices = mesh->indices(); for (uint32_t f = 0; f < faceCount; f++) { const uint32_t v0 = indices[f * 3 + 0]; const uint32_t v1 = indices[f * 3 + 1]; const uint32_t v2 = indices[f * 3 + 2]; - Vector2 z0, z1, z2; - projectTriangle(positions[v0], positions[v1], positions[v2], &z0, &z1, &z2, mesh->epsilon()); - double a = z1.x - z0.x; - double b = z1.y - z0.y; - double c = z2.x - z0.x; - double d = z2.y - z0.y; - XA_DEBUG_ASSERT(b == 0.0); - // Note : 2*id + 0 --> u - // 2*id + 1 --> v - uint32_t u0_id = 2 * v0; - uint32_t v0_id = 2 * v0 + 1; - uint32_t u1_id = 2 * v1; - uint32_t v1_id = 2 * v1 + 1; - uint32_t u2_id = 2 * v2; - uint32_t v2_id = 2 * v2 + 1; - // Note : b = 0 - // Real part - opennl::nlBegin(context, NL_ROW); - opennl::nlCoefficient(context, u0_id, -a+c) ; - opennl::nlCoefficient(context, v0_id, b-d) ; - opennl::nlCoefficient(context, u1_id, -c) ; - opennl::nlCoefficient(context, v1_id, d) ; - opennl::nlCoefficient(context, u2_id, a); - opennl::nlEnd(context, NL_ROW); - // Imaginary part - opennl::nlBegin(context, NL_ROW); - opennl::nlCoefficient(context, u0_id, -b+d); - opennl::nlCoefficient(context, v0_id, -a+c); - opennl::nlCoefficient(context, u1_id, -d); - opennl::nlCoefficient(context, v1_id, -c); - opennl::nlCoefficient(context, v2_id, a); - opennl::nlEnd(context, NL_ROW); + if (!setup_abf_relations(context, v0, v1, v2, positions[v0], positions[v1], positions[v2])) { + Vector2 z0, z1, z2; + projectTriangle(positions[v0], positions[v1], positions[v2], &z0, &z1, &z2); + double a = z1.x - z0.x; + double b = z1.y - z0.y; + double c = z2.x - z0.x; + double d = z2.y - z0.y; + XA_DEBUG_ASSERT(b == 0.0); + // Note : 2*id + 0 --> u + // 2*id + 1 --> v + uint32_t u0_id = 2 * v0; + uint32_t v0_id = 2 * v0 + 1; + uint32_t u1_id = 2 * v1; + uint32_t v1_id = 2 * v1 + 1; + uint32_t u2_id = 2 * v2; + uint32_t v2_id = 2 * v2 + 1; + // Note : b = 0 + // Real part + opennl::nlBegin(context, NL_ROW); + opennl::nlCoefficient(context, u0_id, -a + c); + opennl::nlCoefficient(context, v0_id, b - d); + opennl::nlCoefficient(context, u1_id, -c); + opennl::nlCoefficient(context, v1_id, d); + opennl::nlCoefficient(context, u2_id, a); + opennl::nlEnd(context, NL_ROW); + // Imaginary part + opennl::nlBegin(context, NL_ROW); + opennl::nlCoefficient(context, u0_id, -b + d); + opennl::nlCoefficient(context, v0_id, -a + c); + opennl::nlCoefficient(context, u1_id, -d); + opennl::nlCoefficient(context, v1_id, -c); + opennl::nlCoefficient(context, v2_id, a); + opennl::nlEnd(context, NL_ROW); + } } opennl::nlEnd(context, NL_MATRIX); opennl::nlEnd(context, NL_SYSTEM); @@ -6694,7 +6214,7 @@ static bool computeLeastSquaresConformalMap(Mesh *mesh) for (uint32_t i = 0; i < vertexCount; i++) { const double u = opennl::nlGetVariable(context, 2 * i); const double v = opennl::nlGetVariable(context, 2 * i + 1); - mesh->texcoord(i) = Vector2((float)u, (float)v); + texcoords[i] = Vector2((float)u, (float)v); XA_DEBUG_ASSERT(!isNan(mesh->texcoord(i).x)); XA_DEBUG_ASSERT(!isNan(mesh->texcoord(i).y)); } @@ -6702,30 +6222,26 @@ static bool computeLeastSquaresConformalMap(Mesh *mesh) return true; } -#if XA_RECOMPUTE_CHARTS -struct PiecewiseParam -{ - void reset(const Mesh *mesh, uint32_t faceCount) - { +struct PiecewiseParam { + void reset(const Mesh *mesh) { m_mesh = mesh; - m_faceCount = faceCount; + const uint32_t faceCount = m_mesh->faceCount(); const uint32_t vertexCount = m_mesh->vertexCount(); m_texcoords.resize(vertexCount); - m_patch.reserve(m_faceCount); - m_candidates.reserve(m_faceCount); - m_faceInAnyPatch.resize(m_faceCount); + m_patch.reserve(faceCount); + m_candidates.reserve(faceCount); + m_faceInAnyPatch.resize(faceCount); m_faceInAnyPatch.zeroOutMemory(); - m_faceInvalid.resize(m_faceCount); - m_faceInPatch.resize(m_faceCount); + m_faceInvalid.resize(faceCount); + m_faceInPatch.resize(faceCount); m_vertexInPatch.resize(vertexCount); - m_faceToCandidate.resize(m_faceCount); + m_faceToCandidate.resize(faceCount); } ConstArrayView<uint32_t> chartFaces() const { return m_patch; } - const Vector2 *texcoords() const { return m_texcoords.data(); } + ConstArrayView<Vector2> texcoords() const { return m_texcoords; } - bool computeChart() - { + bool computeChart() { // Clear per-patch state. m_patch.clear(); m_candidates.clear(); @@ -6734,8 +6250,9 @@ struct PiecewiseParam m_faceInPatch.zeroOutMemory(); m_vertexInPatch.zeroOutMemory(); // Add the seed face (first unassigned face) to the patch. + const uint32_t faceCount = m_mesh->faceCount(); uint32_t seed = UINT32_MAX; - for (uint32_t f = 0; f < m_faceCount; f++) { + for (uint32_t f = 0; f < faceCount; f++) { if (m_faceInAnyPatch.get(f)) continue; seed = f; @@ -6749,7 +6266,7 @@ struct PiecewiseParam } addFaceToPatch(seed); // Initialize the boundary grid. - m_boundaryGrid.reset(m_texcoords.data(), m_mesh->indices()); + m_boundaryGrid.reset(m_texcoords, m_mesh->indices()); for (Mesh::FaceEdgeIterator it(m_mesh, seed); !it.isDone(); it.advance()) m_boundaryGrid.append(it.edge()); break; @@ -6793,22 +6310,34 @@ struct PiecewiseParam break; } } + // Check for zero area and flipped faces (using area). + for (CandidateIterator it(bestCandidate); !it.isDone(); it.advance()) { + const Vector2 a = m_texcoords[m_mesh->vertexAt(it.current()->face * 3 + 0)]; + const Vector2 b = m_texcoords[m_mesh->vertexAt(it.current()->face * 3 + 1)]; + const Vector2 c = m_texcoords[m_mesh->vertexAt(it.current()->face * 3 + 2)]; + const float area = triangleArea(a, b, c); + if (area <= 0.0f) { + invalid = true; + break; + } + } // Check for boundary intersection. if (!invalid) { XA_PROFILE_START(parameterizeChartsPiecewiseBoundaryIntersection) // Test candidate edges that would form part of the new patch boundary. // Ignore boundary edges that would become internal if the candidate faces were added to the patch. - Array<uint32_t> newBoundaryEdges, ignoreEdges; + m_newBoundaryEdges.clear(); + m_ignoreBoundaryEdges.clear(); for (CandidateIterator candidateIt(bestCandidate); !candidateIt.isDone(); candidateIt.advance()) { for (Mesh::FaceEdgeIterator it(m_mesh, candidateIt.current()->face); !it.isDone(); it.advance()) { const uint32_t oface = it.oppositeFace(); - if (oface == UINT32_MAX || oface >= m_faceCount || !m_faceInPatch.get(oface)) - newBoundaryEdges.push_back(it.edge()); - if (oface != UINT32_MAX && oface < m_faceCount && m_faceInPatch.get(oface)) - ignoreEdges.push_back(it.oppositeEdge()); + if (oface == UINT32_MAX || !m_faceInPatch.get(oface)) + m_newBoundaryEdges.push_back(it.edge()); + if (oface != UINT32_MAX && m_faceInPatch.get(oface)) + m_ignoreBoundaryEdges.push_back(it.oppositeEdge()); } } - invalid = m_boundaryGrid.intersect(m_mesh->epsilon(), newBoundaryEdges, ignoreEdges); + invalid = m_boundaryGrid.intersect(m_mesh->epsilon(), m_newBoundaryEdges, m_ignoreBoundaryEdges); XA_PROFILE_END(parameterizeChartsPiecewiseBoundaryIntersection) } if (invalid) { @@ -6826,11 +6355,11 @@ struct PiecewiseParam removeLinkedCandidates(bestCandidate); // Reset the grid with all edges on the patch boundary. XA_PROFILE_START(parameterizeChartsPiecewiseBoundaryIntersection) - m_boundaryGrid.reset(m_texcoords.data(), m_mesh->indices()); + m_boundaryGrid.reset(m_texcoords, m_mesh->indices()); for (uint32_t i = 0; i < m_patch.size(); i++) { for (Mesh::FaceEdgeIterator it(m_mesh, m_patch[i]); !it.isDone(); it.advance()) { const uint32_t oface = it.oppositeFace(); - if (oface == UINT32_MAX || oface >= m_faceCount || !m_faceInPatch.get(oface)) + if (oface == UINT32_MAX || !m_faceInPatch.get(oface)) m_boundaryGrid.append(it.edge()); } } @@ -6841,8 +6370,7 @@ struct PiecewiseParam } private: - struct Candidate - { + struct Candidate { uint32_t face, vertex; Candidate *prev, *next; // The previous/next candidate with the same vertex. Vector2 position; @@ -6852,10 +6380,14 @@ private: float patchVertexOrient; }; - struct CandidateIterator - { - CandidateIterator(Candidate *head) : m_current(head) { XA_DEBUG_ASSERT(!head->prev); } - void advance() { if (m_current != nullptr) { m_current = m_current->next; } } + struct CandidateIterator { + CandidateIterator(Candidate *head) : + m_current(head) { XA_DEBUG_ASSERT(!head->prev); } + void advance() { + if (m_current != nullptr) { + m_current = m_current->next; + } + } bool isDone() const { return !m_current; } Candidate *current() { return m_current; } @@ -6864,7 +6396,6 @@ private: }; const Mesh *m_mesh; - uint32_t m_faceCount; Array<Vector2> m_texcoords; BitArray m_faceInAnyPatch; // Face is in a previous chart patch or the current patch. Array<Candidate *> m_candidates; // Incident faces to the patch. @@ -6873,9 +6404,9 @@ private: BitArray m_faceInPatch, m_vertexInPatch; // Face/vertex is in the current patch. BitArray m_faceInvalid; // Face cannot be added to the patch - flipped, cost too high or causes boundary intersection. UniformGrid2 m_boundaryGrid; + Array<uint32_t> m_newBoundaryEdges, m_ignoreBoundaryEdges; // Temp arrays used when testing for boundary intersection. - void addFaceToPatch(uint32_t face) - { + void addFaceToPatch(uint32_t face) { XA_DEBUG_ASSERT(!m_faceInPatch.get(face)); XA_DEBUG_ASSERT(!m_faceInAnyPatch.get(face)); m_patch.push_back(face); @@ -6884,7 +6415,7 @@ private: // Find new candidate faces on the patch incident to the newly added face. for (Mesh::FaceEdgeIterator it(m_mesh, face); !it.isDone(); it.advance()) { const uint32_t oface = it.oppositeFace(); - if (oface == UINT32_MAX || oface >= m_faceCount || m_faceInAnyPatch.get(oface) || m_faceToCandidate[oface]) + if (oface == UINT32_MAX || m_faceInAnyPatch.get(oface) || m_faceToCandidate[oface]) continue; // Found an active edge on the patch front. // Find the free vertex (the vertex that isn't on the active edge). @@ -6900,12 +6431,14 @@ private: } } XA_DEBUG_ASSERT(freeVertex != UINT32_MAX); - // If the free vertex is already in the patch, the face is enclosed by the patch. Add the face to the patch - don't need to assign texcoords. - /*if (m_vertexInPatch.get(freeVertex)) { + if (m_vertexInPatch.get(freeVertex)) { +#if 0 + // If the free vertex is already in the patch, the face is enclosed by the patch. Add the face to the patch - don't need to assign texcoords. freeVertex = UINT32_MAX; - addFaceToPatch(oface, false); + addFaceToPatch(oface); +#endif continue; - }*/ + } // Check this here rather than above so faces enclosed by the patch are always added. if (m_faceInvalid.get(oface)) continue; @@ -6913,8 +6446,7 @@ private: } } - void addCandidateFace(uint32_t patchEdge, float patchVertexOrient, uint32_t face, uint32_t edge, uint32_t freeVertex) - { + void addCandidateFace(uint32_t patchEdge, float patchVertexOrient, uint32_t face, uint32_t edge, uint32_t freeVertex) { XA_DEBUG_ASSERT(!m_faceToCandidate[face]); Vector2 texcoords[3]; orthoProjectFace(face, texcoords); @@ -6960,8 +6492,10 @@ private: uv.x = x + texcoords[localVertex0].x; uv.y = y + texcoords[localVertex0].y; } - if (isNan(texcoords[localFreeVertex].x) || isNan(texcoords[localFreeVertex].y)) + if (isNan(texcoords[localFreeVertex].x) || isNan(texcoords[localFreeVertex].y)) { + m_faceInvalid.set(face); return; + } // Check for local overlap (flipped triangle). // The patch face vertex that isn't on the active edge and the free vertex should be oriented on opposite sides to the active edge. const float freeVertexOrient = orientToEdge(m_texcoords[vertex0], m_texcoords[vertex1], texcoords[localFreeVertex]); @@ -6975,12 +6509,10 @@ private: return; } const float cost = fabsf(stretch - 1.0f); -#if 0 - if (cost > 0.25f) { + if (cost > 0.5f) { m_faceInvalid.set(face); return; } -#endif // Add the candidate. Candidate *candidate = XA_ALLOC(MemTag::Default, Candidate); candidate->face = face; @@ -7017,8 +6549,7 @@ private: it.current()->maxCost = maxCost; } - Candidate *linkedCandidateHead(Candidate *candidate) - { + Candidate *linkedCandidateHead(Candidate *candidate) { Candidate *current = candidate; for (;;) { if (!current->prev) @@ -7028,8 +6559,7 @@ private: return current; } - void removeLinkedCandidates(Candidate *head) - { + void removeLinkedCandidates(Candidate *head) { XA_DEBUG_ASSERT(!head->prev); Candidate *current = head; while (current) { @@ -7046,10 +6576,9 @@ private: } } - void orthoProjectFace(uint32_t face, Vector2 *texcoords) const - { - const Vector3 normal = m_mesh->computeFaceNormal(face); - const Vector3 tangent = normalize(m_mesh->position(m_mesh->vertexAt(face * 3 + 1)) - m_mesh->position(m_mesh->vertexAt(face * 3 + 0)), kEpsilon); + void orthoProjectFace(uint32_t face, Vector2 *texcoords) const { + const Vector3 normal = -m_mesh->computeFaceNormal(face); + const Vector3 tangent = normalize(m_mesh->position(m_mesh->vertexAt(face * 3 + 1)) - m_mesh->position(m_mesh->vertexAt(face * 3 + 0))); const Vector3 bitangent = cross(normal, tangent); for (uint32_t i = 0; i < 3; i++) { const Vector3 &pos = m_mesh->position(m_mesh->vertexAt(face * 3 + i)); @@ -7057,16 +6586,14 @@ private: } } - float parametricArea(const Vector2 *texcoords) const - { + float parametricArea(const Vector2 *texcoords) const { const Vector2 &v1 = texcoords[0]; const Vector2 &v2 = texcoords[1]; const Vector2 &v3 = texcoords[2]; return ((v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y)) * 0.5f; } - float computeStretch(Vector3 p1, Vector3 p2, Vector3 p3, Vector2 t1, Vector2 t2, Vector2 t3) const - { + float computeStretch(Vector3 p1, Vector3 p2, Vector3 p3, Vector2 t1, Vector2 t2, Vector2 t3) const { float parametricArea = ((t2.y - t1.y) * (t3.x - t1.x) - (t3.y - t1.y) * (t2.x - t1.x)) * 0.5f; if (isZero(parametricArea, kAreaEpsilon)) return FLT_MAX; @@ -7080,16 +6607,13 @@ private: } // Return value is positive if the point is one side of the edge, negative if on the other side. - float orientToEdge(Vector2 edgeVertex0, Vector2 edgeVertex1, Vector2 point) const - { + float orientToEdge(Vector2 edgeVertex0, Vector2 edgeVertex1, Vector2 point) const { return (edgeVertex0.x - point.x) * (edgeVertex1.y - point.y) - (edgeVertex0.y - point.y) * (edgeVertex1.x - point.x); } }; -#endif // Estimate quality of existing parameterization. -struct Quality -{ +struct Quality { // computeBoundaryIntersection bool boundaryIntersection = false; @@ -7106,8 +6630,7 @@ struct Quality float conformalMetric = 0.0f; float authalicMetric = 0.0f; - void computeBoundaryIntersection(const Mesh *mesh, UniformGrid2 &boundaryGrid) - { + void computeBoundaryIntersection(const Mesh *mesh, UniformGrid2 &boundaryGrid) { const Array<uint32_t> &boundaryEdges = mesh->boundaryEdges(); const uint32_t boundaryEdgeCount = boundaryEdges.size(); boundaryGrid.reset(mesh->texcoords(), mesh->indices(), boundaryEdgeCount); @@ -7123,11 +6646,11 @@ struct Quality #endif } - void computeFlippedFaces(const Mesh *mesh, uint32_t faceCount, Array<uint32_t> *flippedFaces) - { + void computeFlippedFaces(const Mesh *mesh, Array<uint32_t> *flippedFaces) { totalTriangleCount = flippedTriangleCount = zeroAreaTriangleCount = 0; if (flippedFaces) flippedFaces->clear(); + const uint32_t faceCount = mesh->faceCount(); for (uint32_t f = 0; f < faceCount; f++) { Vector2 texcoord[3]; for (int i = 0; i < 3; i++) { @@ -7159,8 +6682,7 @@ struct Quality flippedFaces->clear(); flippedTriangleCount = 0; } - if (flippedTriangleCount > totalTriangleCount / 2) - { + if (flippedTriangleCount > totalTriangleCount / 2) { // If more than half the triangles are flipped, reverse the flipped / not flipped classification. flippedTriangleCount = totalTriangleCount - flippedTriangleCount; if (flippedFaces) { @@ -7182,10 +6704,10 @@ struct Quality } } - void computeMetrics(const Mesh *mesh, uint32_t faceCount) - { + void computeMetrics(const Mesh *mesh) { totalGeometricArea = totalParametricArea = 0.0f; stretchMetric = maxStretchMetric = conformalMetric = authalicMetric = 0.0f; + const uint32_t faceCount = mesh->faceCount(); for (uint32_t f = 0; f < faceCount; f++) { Vector3 pos[3]; Vector2 texcoord[3]; @@ -7214,7 +6736,7 @@ struct Quality const float a = dot(Ss, Ss); // E const float b = dot(Ss, St); // F const float c = dot(St, St); // G - // Compute eigen-values of the first fundamental form: + // Compute eigen-values of the first fundamental form: const float sigma1 = sqrtf(0.5f * max(0.0f, a + c - sqrtf(square(a - c) + 4 * square(b)))); // gamma uppercase, min eigenvalue. const float sigma2 = sqrtf(0.5f * max(0.0f, a + c + sqrtf(square(a - c) + 4 * square(b)))); // gamma lowercase, max eigenvalue. XA_ASSERT(sigma2 > sigma1 || equal(sigma1, sigma2, kEpsilon)); @@ -7245,347 +6767,261 @@ struct Quality if (totalGeometricArea > 0.0f) { const float normFactor = sqrtf(totalParametricArea / totalGeometricArea); stretchMetric = sqrtf(stretchMetric / totalGeometricArea) * normFactor; - maxStretchMetric *= normFactor; + maxStretchMetric *= normFactor; conformalMetric = sqrtf(conformalMetric / totalGeometricArea); authalicMetric = sqrtf(authalicMetric / totalGeometricArea); } } }; -struct ChartWarningFlags -{ - enum Enum - { - CloseHolesFailed = 1<<1, - FixTJunctionsDuplicatedEdge = 1<<2, - FixTJunctionsFailed = 1<<3, - TriangulateDuplicatedEdge = 1<<4, - }; -}; - -struct ChartCtorBuffers -{ +struct ChartCtorBuffers { Array<uint32_t> chartMeshIndices; Array<uint32_t> unifiedMeshIndices; - Array<uint32_t> boundaryLoops; }; -class Chart -{ +class Chart { public: - Chart(ChartCtorBuffers &buffers, const ParameterizeOptions &options, const Basis &basis, ConstArrayView<uint32_t> faces, const Mesh *sourceMesh, uint32_t chartGroupId, uint32_t chartId) : m_basis(basis), m_mesh(nullptr), m_unifiedMesh(nullptr), m_unmodifiedUnifiedMesh(nullptr), m_type(ChartType::LSCM), m_warningFlags(0), m_closedHolesCount(0), m_fixedTJunctionsCount(0), m_isInvalid(false) - { + Chart(const Basis &basis, segment::ChartGeneratorType::Enum generatorType, ConstArrayView<uint32_t> faces, const Mesh *sourceMesh, uint32_t chartGroupId, uint32_t chartId) : + m_basis(basis), m_unifiedMesh(nullptr), m_type(ChartType::LSCM), m_generatorType(generatorType), m_tjunctionCount(0), m_originalVertexCount(0), m_isInvalid(false) { XA_UNUSED(chartGroupId); XA_UNUSED(chartId); m_faceToSourceFaceMap.copyFrom(faces.data, faces.length); const uint32_t approxVertexCount = min(faces.length * 3, sourceMesh->vertexCount()); - m_mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, sourceMesh->epsilon(), approxVertexCount, faces.length); m_unifiedMesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, sourceMesh->epsilon(), approxVertexCount, faces.length); HashMap<uint32_t, PassthroughHash<uint32_t>> sourceVertexToUnifiedVertexMap(MemTag::Mesh, approxVertexCount), sourceVertexToChartVertexMap(MemTag::Mesh, approxVertexCount); - // Add vertices. - const uint32_t faceCount = m_initialFaceCount = faces.length; + m_originalIndices.resize(faces.length * 3); + // Add geometry. + const uint32_t faceCount = faces.length; for (uint32_t f = 0; f < faceCount; f++) { + uint32_t unifiedIndices[3]; for (uint32_t i = 0; i < 3; i++) { const uint32_t sourceVertex = sourceMesh->vertexAt(m_faceToSourceFaceMap[f] * 3 + i); - const uint32_t sourceUnifiedVertex = sourceMesh->firstColocal(sourceVertex); + uint32_t sourceUnifiedVertex = sourceMesh->firstColocalVertex(sourceVertex); + if (m_generatorType == segment::ChartGeneratorType::OriginalUv && sourceVertex != sourceUnifiedVertex) { + // Original UVs: don't unify vertices with different UVs; we want to preserve UVs. + if (!equal(sourceMesh->texcoord(sourceVertex), sourceMesh->texcoord(sourceUnifiedVertex), sourceMesh->epsilon())) + sourceUnifiedVertex = sourceVertex; + } uint32_t unifiedVertex = sourceVertexToUnifiedVertexMap.get(sourceUnifiedVertex); if (unifiedVertex == UINT32_MAX) { unifiedVertex = sourceVertexToUnifiedVertexMap.add(sourceUnifiedVertex); - m_unifiedMesh->addVertex(sourceMesh->position(sourceVertex)); + m_unifiedMesh->addVertex(sourceMesh->position(sourceVertex), Vector3(0.0f), sourceMesh->texcoord(sourceVertex)); } if (sourceVertexToChartVertexMap.get(sourceVertex) == UINT32_MAX) { sourceVertexToChartVertexMap.add(sourceVertex); m_vertexToSourceVertexMap.push_back(sourceVertex); m_chartVertexToUnifiedVertexMap.push_back(unifiedVertex); - m_mesh->addVertex(sourceMesh->position(sourceVertex), Vector3(0.0f), sourceMesh->texcoord(sourceVertex)); + m_originalVertexCount++; } - } - } - // Add faces. - for (uint32_t f = 0; f < faceCount; f++) { - uint32_t indices[3], unifiedIndices[3]; - for (uint32_t i = 0; i < 3; i++) { - const uint32_t sourceVertex = sourceMesh->vertexAt(m_faceToSourceFaceMap[f] * 3 + i); - const uint32_t sourceUnifiedVertex = sourceMesh->firstColocal(sourceVertex); - indices[i] = sourceVertexToChartVertexMap.get(sourceVertex); - XA_DEBUG_ASSERT(indices[i] != UINT32_MAX); + m_originalIndices[f * 3 + i] = sourceVertexToChartVertexMap.get(sourceVertex); + ; + XA_DEBUG_ASSERT(m_originalIndices[f * 3 + i] != UINT32_MAX); unifiedIndices[i] = sourceVertexToUnifiedVertexMap.get(sourceUnifiedVertex); XA_DEBUG_ASSERT(unifiedIndices[i] != UINT32_MAX); } - Mesh::AddFaceResult::Enum result = m_mesh->addFace(indices); - XA_UNUSED(result); - XA_DEBUG_ASSERT(result == Mesh::AddFaceResult::OK); -#if XA_DEBUG - // Unifying colocals may create degenerate edges. e.g. if two triangle vertices are colocal. - for (int i = 0; i < 3; i++) { - const uint32_t index1 = unifiedIndices[i]; - const uint32_t index2 = unifiedIndices[(i + 1) % 3]; - XA_DEBUG_ASSERT(index1 != index2); - } -#endif - result = m_unifiedMesh->addFace(unifiedIndices); - XA_UNUSED(result); - XA_DEBUG_ASSERT(result == Mesh::AddFaceResult::OK); + m_unifiedMesh->addFace(unifiedIndices); } - m_mesh->createBoundaries(); // For AtlasPacker::computeBoundingBox - m_mesh->destroyEdgeMap(); // Only needed it for createBoundaries. m_unifiedMesh->createBoundaries(); - if (meshIsPlanar(*m_unifiedMesh)) { + if (m_generatorType == segment::ChartGeneratorType::Planar) { m_type = ChartType::Planar; return; } - m_unifiedMesh->linkBoundaries(); -#if XA_DEBUG_EXPORT_OBJ_BEFORE_FIX_TJUNCTION - m_unifiedMesh->writeObjFile("debug_before_fix_tjunction.obj"); -#endif - bool duplicatedEdge = false, failed = false; - if (options.fixTJunctions) { - XA_PROFILE_START(fixChartMeshTJunctions) - Mesh *fixedUnifiedMesh = meshFixTJunctions(*m_unifiedMesh, &duplicatedEdge, &failed, &m_fixedTJunctionsCount); - XA_PROFILE_END(fixChartMeshTJunctions) - if (fixedUnifiedMesh) { - if (duplicatedEdge) - m_warningFlags |= ChartWarningFlags::FixTJunctionsDuplicatedEdge; - if (failed) - m_warningFlags |= ChartWarningFlags::FixTJunctionsFailed; - m_unmodifiedUnifiedMesh = m_unifiedMesh; - m_unifiedMesh = fixedUnifiedMesh; - m_unifiedMesh->createBoundaries(); - m_unifiedMesh->linkBoundaries(); - m_initialFaceCount = m_unifiedMesh->faceCount(); // Fixing t-junctions rewrites faces. - } - } - if (options.closeHoles) { - // See if there are any holes that need closing. - Array<uint32_t> &boundaryLoops = buffers.boundaryLoops; - meshGetBoundaryLoops(*m_unifiedMesh, boundaryLoops); - if (boundaryLoops.size() > 1) { -#if XA_DEBUG_EXPORT_OBJ_CLOSE_HOLES_ERROR - const uint32_t faceCountBeforeHolesClosed = m_unifiedMesh->faceCount(); +#if XA_CHECK_T_JUNCTIONS + m_tjunctionCount = meshCheckTJunctions(*m_unifiedMesh); +#if XA_DEBUG_EXPORT_OBJ_TJUNCTION + if (m_tjunctionCount > 0) { + char filename[256]; + XA_SPRINTF(filename, sizeof(filename), "debug_mesh_%03u_chartgroup_%03u_chart_%03u_tjunction.obj", sourceMesh->id(), chartGroupId, chartId); + m_unifiedMesh->writeObjFile(filename); + } #endif - // Closing the holes is not always the best solution and does not fix all the problems. - // We need to do some analysis of the holes and the genus to: - // - Find cuts that reduce genus. - // - Find cuts to connect holes. - // - Use minimal spanning trees or seamster. - XA_PROFILE_START(closeChartMeshHoles) - uint32_t holeCount = 0; -#if XA_DEBUG_EXPORT_OBJ_CLOSE_HOLES_ERROR - Array<uint32_t> holeFaceCounts; - failed = !meshCloseHoles(m_unifiedMesh, boundaryLoops, m_basis.normal, &holeFaceCounts); -#else - failed = !meshCloseHoles(m_unifiedMesh, boundaryLoops, m_basis.normal, &holeCount, nullptr); #endif - XA_PROFILE_END(closeChartMeshHoles) - m_unifiedMesh->createBoundaries(); - m_unifiedMesh->linkBoundaries(); - meshGetBoundaryLoops(*m_unifiedMesh, boundaryLoops); - if (failed || boundaryLoops.size() > 1) - m_warningFlags |= ChartWarningFlags::CloseHolesFailed; - m_closedHolesCount = holeCount; -#if XA_DEBUG_EXPORT_OBJ_CLOSE_HOLES_ERROR - if (m_warningFlags & ChartWarningFlags::CloseHolesFailed) { - char filename[256]; - XA_SPRINTF(filename, sizeof(filename), "debug_mesh_%03u_chartgroup_%03u_chart_%03u_close_holes_error.obj", sourceMesh->id(), chartGroupId, chartId); - FILE *file; - XA_FOPEN(file, filename, "w"); - if (file) { - m_unifiedMesh->writeObjVertices(file); - fprintf(file, "s off\n"); - fprintf(file, "o object\n"); - for (uint32_t i = 0; i < faceCountBeforeHolesClosed; i++) - m_unifiedMesh->writeObjFace(file, i); - uint32_t face = faceCountBeforeHolesClosed; - for (uint32_t i = 0; i < holeFaceCounts.size(); i++) { - fprintf(file, "s off\n"); - fprintf(file, "o hole%u\n", i); - for (uint32_t j = 0; j < holeFaceCounts[i]; j++) { - m_unifiedMesh->writeObjFace(file, face); - face++; - } - } - m_unifiedMesh->writeObjBoundaryEges(file); - m_unifiedMesh->writeObjLinkedBoundaries(file); - fclose(file); - } - } -#endif - } - } } -#if XA_RECOMPUTE_CHARTS - Chart(ChartCtorBuffers &buffers, const Chart *parent, const Mesh *parentMesh, ConstArrayView<uint32_t> faces, const Vector2 *texcoords, const Mesh *sourceMesh) : m_mesh(nullptr), m_unifiedMesh(nullptr), m_unmodifiedUnifiedMesh(nullptr), m_type(ChartType::Piecewise), m_warningFlags(0), m_closedHolesCount(0), m_fixedTJunctionsCount(0), m_isInvalid(false) - { - const uint32_t faceCount = m_initialFaceCount = faces.length; + Chart(ChartCtorBuffers &buffers, const Chart *parent, const Mesh *parentMesh, ConstArrayView<uint32_t> faces, ConstArrayView<Vector2> texcoords, const Mesh *sourceMesh) : + m_unifiedMesh(nullptr), m_type(ChartType::Piecewise), m_generatorType(segment::ChartGeneratorType::Piecewise), m_tjunctionCount(0), m_originalVertexCount(0), m_isInvalid(false) { + const uint32_t faceCount = faces.length; m_faceToSourceFaceMap.resize(faceCount); for (uint32_t i = 0; i < faceCount; i++) m_faceToSourceFaceMap[i] = parent->m_faceToSourceFaceMap[faces[i]]; // Map faces to parent chart source mesh. // Copy face indices. - m_mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, sourceMesh->epsilon(), m_faceToSourceFaceMap.size() * 3, m_faceToSourceFaceMap.size()); Array<uint32_t> &chartMeshIndices = buffers.chartMeshIndices; chartMeshIndices.resize(sourceMesh->vertexCount()); chartMeshIndices.fillBytes(0xff); + m_unifiedMesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, sourceMesh->epsilon(), m_faceToSourceFaceMap.size() * 3, m_faceToSourceFaceMap.size()); + HashMap<uint32_t, PassthroughHash<uint32_t>> sourceVertexToUnifiedVertexMap(MemTag::Mesh, m_faceToSourceFaceMap.size() * 3); // Add vertices. for (uint32_t f = 0; f < faceCount; f++) { for (uint32_t i = 0; i < 3; i++) { const uint32_t vertex = sourceMesh->vertexAt(m_faceToSourceFaceMap[f] * 3 + i); + const uint32_t sourceUnifiedVertex = sourceMesh->firstColocalVertex(vertex); const uint32_t parentVertex = parentMesh->vertexAt(faces[f] * 3 + i); - if (chartMeshIndices[vertex] == (uint32_t)~0) { - chartMeshIndices[vertex] = m_mesh->vertexCount(); + uint32_t unifiedVertex = sourceVertexToUnifiedVertexMap.get(sourceUnifiedVertex); + if (unifiedVertex == UINT32_MAX) { + unifiedVertex = sourceVertexToUnifiedVertexMap.add(sourceUnifiedVertex); + m_unifiedMesh->addVertex(sourceMesh->position(vertex), Vector3(0.0f), texcoords[parentVertex]); + } + if (chartMeshIndices[vertex] == UINT32_MAX) { + chartMeshIndices[vertex] = m_originalVertexCount; + m_originalVertexCount++; m_vertexToSourceVertexMap.push_back(vertex); - m_mesh->addVertex(sourceMesh->position(vertex), Vector3(0.0f), texcoords[parentVertex]); + m_chartVertexToUnifiedVertexMap.push_back(unifiedVertex); } } } // Add faces. + m_originalIndices.resize(faceCount * 3); for (uint32_t f = 0; f < faceCount; f++) { - uint32_t indices[3]; + uint32_t unifiedIndices[3]; for (uint32_t i = 0; i < 3; i++) { const uint32_t vertex = sourceMesh->vertexAt(m_faceToSourceFaceMap[f] * 3 + i); - indices[i] = chartMeshIndices[vertex]; + m_originalIndices[f * 3 + i] = chartMeshIndices[vertex]; + const uint32_t unifiedVertex = sourceMesh->firstColocalVertex(vertex); + unifiedIndices[i] = sourceVertexToUnifiedVertexMap.get(unifiedVertex); } - Mesh::AddFaceResult::Enum result = m_mesh->addFace(indices); - XA_UNUSED(result); - XA_DEBUG_ASSERT(result == Mesh::AddFaceResult::OK); + m_unifiedMesh->addFace(unifiedIndices); } - m_mesh->createBoundaries(); // For AtlasPacker::computeBoundingBox - m_mesh->destroyEdgeMap(); // Only needed it for createBoundaries. + m_unifiedMesh->createBoundaries(); // Need to store texcoords for backup/restore so packing can be run multiple times. backupTexcoords(); } -#endif - ~Chart() - { - if (m_mesh) { - m_mesh->~Mesh(); - XA_FREE(m_mesh); + ~Chart() { + if (m_unifiedMesh) { + m_unifiedMesh->~Mesh(); + XA_FREE(m_unifiedMesh); + m_unifiedMesh = nullptr; } - destroyUnifiedMesh(); } bool isInvalid() const { return m_isInvalid; } - ChartType::Enum type() const { return m_type; } - uint32_t warningFlags() const { return m_warningFlags; } - uint32_t closedHolesCount() const { return m_closedHolesCount; } - uint32_t fixedTJunctionsCount() const { return m_fixedTJunctionsCount; } + ChartType type() const { return m_type; } + segment::ChartGeneratorType::Enum generatorType() const { return m_generatorType; } + uint32_t tjunctionCount() const { return m_tjunctionCount; } const Quality &quality() const { return m_quality; } - uint32_t initialFaceCount() const { return m_initialFaceCount; } #if XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION const Array<uint32_t> ¶mFlippedFaces() const { return m_paramFlippedFaces; } #endif uint32_t mapFaceToSourceFace(uint32_t i) const { return m_faceToSourceFaceMap[i]; } uint32_t mapChartVertexToSourceVertex(uint32_t i) const { return m_vertexToSourceVertexMap[i]; } - const Mesh *mesh() const { return m_mesh; } - Mesh *mesh() { return m_mesh; } const Mesh *unifiedMesh() const { return m_unifiedMesh; } - const Mesh *unmodifiedUnifiedMesh() const { return m_unmodifiedUnifiedMesh; } + Mesh *unifiedMesh() { return m_unifiedMesh; } - void parameterize(const ParameterizeOptions &options, UniformGrid2 &boundaryGrid) - { - XA_PROFILE_START(parameterizeChartsOrthogonal) - { + // Vertex count of the chart mesh before unifying vertices. + uint32_t originalVertexCount() const { return m_originalVertexCount; } + + uint32_t originalVertexToUnifiedVertex(uint32_t v) const { return m_chartVertexToUnifiedVertexMap[v]; } + + ConstArrayView<uint32_t> originalVertices() const { return m_originalIndices; } + + void parameterize(const ChartOptions &options, UniformGrid2 &boundaryGrid) { + const uint32_t unifiedVertexCount = m_unifiedMesh->vertexCount(); + if (m_generatorType == segment::ChartGeneratorType::OriginalUv) { + } else { // Project vertices to plane. - const uint32_t vertexCount = m_unifiedMesh->vertexCount(); - for (uint32_t i = 0; i < vertexCount; i++) + XA_PROFILE_START(parameterizeChartsOrthogonal) + for (uint32_t i = 0; i < unifiedVertexCount; i++) m_unifiedMesh->texcoord(i) = Vector2(dot(m_basis.tangent, m_unifiedMesh->position(i)), dot(m_basis.bitangent, m_unifiedMesh->position(i))); - } - XA_PROFILE_END(parameterizeChartsOrthogonal) - // Computing charts checks for flipped triangles and boundary intersection. Don't need to do that again here if chart is planar. - if (m_type != ChartType::Planar) { - XA_PROFILE_START(parameterizeChartsEvaluateQuality) - m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid); - m_quality.computeFlippedFaces(m_unifiedMesh, m_initialFaceCount, nullptr); - m_quality.computeMetrics(m_unifiedMesh, m_initialFaceCount); - XA_PROFILE_END(parameterizeChartsEvaluateQuality) - // Use orthogonal parameterization if quality is acceptable. - if (!m_quality.boundaryIntersection && m_quality.flippedTriangleCount == 0 && m_quality.totalGeometricArea > 0.0f && m_quality.stretchMetric <= 1.1f && m_quality.maxStretchMetric <= 1.25f) - m_type = ChartType::Ortho; - } - if (m_type == ChartType::LSCM) { - XA_PROFILE_START(parameterizeChartsLSCM) - if (options.func) { - options.func(&m_unifiedMesh->position(0).x, &m_unifiedMesh->texcoord(0).x, m_unifiedMesh->vertexCount(), m_unifiedMesh->indices(), m_unifiedMesh->indexCount()); - } - else - computeLeastSquaresConformalMap(m_unifiedMesh); - XA_PROFILE_END(parameterizeChartsLSCM) - XA_PROFILE_START(parameterizeChartsEvaluateQuality) - m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid); + XA_PROFILE_END(parameterizeChartsOrthogonal) + // Computing charts checks for flipped triangles and boundary intersection. Don't need to do that again here if chart is planar. + if (m_type != ChartType::Planar && m_generatorType != segment::ChartGeneratorType::OriginalUv) { + XA_PROFILE_START(parameterizeChartsEvaluateQuality) + m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid); + m_quality.computeFlippedFaces(m_unifiedMesh, nullptr); + m_quality.computeMetrics(m_unifiedMesh); + XA_PROFILE_END(parameterizeChartsEvaluateQuality) + // Use orthogonal parameterization if quality is acceptable. + if (!m_quality.boundaryIntersection && m_quality.flippedTriangleCount == 0 && m_quality.zeroAreaTriangleCount == 0 && m_quality.totalGeometricArea > 0.0f && m_quality.stretchMetric <= 1.1f && m_quality.maxStretchMetric <= 1.25f) + m_type = ChartType::Ortho; + } + if (m_type == ChartType::LSCM) { + XA_PROFILE_START(parameterizeChartsLSCM) + if (options.paramFunc) { + options.paramFunc(&m_unifiedMesh->position(0).x, &m_unifiedMesh->texcoord(0).x, m_unifiedMesh->vertexCount(), m_unifiedMesh->indices().data, m_unifiedMesh->indexCount()); + } else + computeLeastSquaresConformalMap(m_unifiedMesh); + XA_PROFILE_END(parameterizeChartsLSCM) + XA_PROFILE_START(parameterizeChartsEvaluateQuality) + m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid); #if XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION - m_quality.computeFlippedFaces(m_unifiedMesh, m_initialFaceCount, &m_paramFlippedFaces); + m_quality.computeFlippedFaces(m_unifiedMesh, &m_paramFlippedFaces); #else - m_quality.computeFlippedFaces(m_unifiedMesh, m_initialFaceCount, nullptr); + m_quality.computeFlippedFaces(m_unifiedMesh, nullptr); #endif - // Don't need to call computeMetrics here, that's only used in evaluateOrthoQuality to determine if quality is acceptable enough to use ortho projection. - if (m_quality.boundaryIntersection || m_quality.flippedTriangleCount > 0) - m_isInvalid = true; - XA_PROFILE_END(parameterizeChartsEvaluateQuality) + // Don't need to call computeMetrics here, that's only used in evaluateOrthoQuality to determine if quality is acceptable enough to use ortho projection. + if (m_quality.boundaryIntersection || m_quality.flippedTriangleCount > 0 || m_quality.zeroAreaTriangleCount > 0) + m_isInvalid = true; + XA_PROFILE_END(parameterizeChartsEvaluateQuality) + } } + if (options.fixWinding && m_unifiedMesh->computeFaceParametricArea(0) < 0.0f) { + for (uint32_t i = 0; i < unifiedVertexCount; i++) + m_unifiedMesh->texcoord(i).x *= -1.0f; + } +#if XA_CHECK_PARAM_WINDING + const uint32_t faceCount = m_unifiedMesh->faceCount(); + uint32_t flippedCount = 0; + for (uint32_t i = 0; i < faceCount; i++) { + const float area = m_unifiedMesh->computeFaceParametricArea(i); + if (area < 0.0f) + flippedCount++; + } + if (flippedCount == faceCount) { + XA_PRINT_WARNING("param: all faces flipped\n"); + } else if (flippedCount > 0) { + XA_PRINT_WARNING("param: %u / %u faces flipped\n", flippedCount, faceCount); + } +#endif + #if XA_DEBUG_ALL_CHARTS_INVALID m_isInvalid = true; #endif - // Transfer parameterization from unified mesh to chart mesh. - const uint32_t vertexCount = m_mesh->vertexCount(); - for (uint32_t v = 0; v < vertexCount; v++) - m_mesh->texcoord(v) = m_unifiedMesh->texcoord(m_chartVertexToUnifiedVertexMap[v]); - // Can destroy unified mesh now. - // But not if the parameterization is invalid, the unified mesh will be needed for PiecewiseParameterization. - if (!m_isInvalid) - destroyUnifiedMesh(); // Need to store texcoords for backup/restore so packing can be run multiple times. backupTexcoords(); } - Vector2 computeParametricBounds() const - { + Vector2 computeParametricBounds() const { Vector2 minCorner(FLT_MAX, FLT_MAX); Vector2 maxCorner(-FLT_MAX, -FLT_MAX); - const uint32_t vertexCount = m_mesh->vertexCount(); + const uint32_t vertexCount = m_unifiedMesh->vertexCount(); for (uint32_t v = 0; v < vertexCount; v++) { - minCorner = min(minCorner, m_mesh->texcoord(v)); - maxCorner = max(maxCorner, m_mesh->texcoord(v)); + minCorner = min(minCorner, m_unifiedMesh->texcoord(v)); + maxCorner = max(maxCorner, m_unifiedMesh->texcoord(v)); } return (maxCorner - minCorner) * 0.5f; } - void restoreTexcoords() - { - memcpy(m_mesh->texcoords(), m_backupTexcoords.data(), m_mesh->vertexCount() * sizeof(Vector2)); +#if XA_CHECK_PIECEWISE_CHART_QUALITY + void evaluateQuality(UniformGrid2 &boundaryGrid) { + m_quality.computeBoundaryIntersection(m_unifiedMesh, boundaryGrid); +#if XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION + m_quality.computeFlippedFaces(m_unifiedMesh, &m_paramFlippedFaces); +#else + m_quality.computeFlippedFaces(m_unifiedMesh, nullptr); +#endif + if (m_quality.boundaryIntersection || m_quality.flippedTriangleCount > 0 || m_quality.zeroAreaTriangleCount > 0) + m_isInvalid = true; } +#endif -private: - void backupTexcoords() - { - m_backupTexcoords.resize(m_mesh->vertexCount()); - memcpy(m_backupTexcoords.data(), m_mesh->texcoords(), m_mesh->vertexCount() * sizeof(Vector2)); + void restoreTexcoords() { + memcpy(m_unifiedMesh->texcoords().data, m_backupTexcoords.data(), m_unifiedMesh->vertexCount() * sizeof(Vector2)); } - void destroyUnifiedMesh() - { - if (m_unifiedMesh) { - m_unifiedMesh->~Mesh(); - XA_FREE(m_unifiedMesh); - m_unifiedMesh = nullptr; - } - if (m_unmodifiedUnifiedMesh) { - m_unmodifiedUnifiedMesh->~Mesh(); - XA_FREE(m_unmodifiedUnifiedMesh); - m_unmodifiedUnifiedMesh = nullptr; - } - // Don't need this when unified meshes are destroyed. - m_chartVertexToUnifiedVertexMap.destroy(); +private: + void backupTexcoords() { + m_backupTexcoords.resize(m_unifiedMesh->vertexCount()); + memcpy(m_backupTexcoords.data(), m_unifiedMesh->texcoords().data, m_unifiedMesh->vertexCount() * sizeof(Vector2)); } Basis m_basis; - Mesh *m_mesh; Mesh *m_unifiedMesh; - Mesh *m_unmodifiedUnifiedMesh; // Unified mesh before fixing t-junctions. Null if no t-junctions were fixed - ChartType::Enum m_type; - uint32_t m_warningFlags; - uint32_t m_initialFaceCount; // Before fixing T-junctions and/or closing holes. - uint32_t m_closedHolesCount, m_fixedTJunctionsCount; + ChartType m_type; + segment::ChartGeneratorType::Enum m_generatorType; + uint32_t m_tjunctionCount; + + uint32_t m_originalVertexCount; + Array<uint32_t> m_originalIndices; // List of faces of the source mesh that belong to this chart. Array<uint32_t> m_faceToSourceFaceMap; @@ -7604,47 +7040,46 @@ private: bool m_isInvalid; }; -struct CreateAndParameterizeChartTaskArgs -{ - const Basis *basis; +struct CreateAndParameterizeChartTaskGroupArgs { + Progress *progress; ThreadLocal<UniformGrid2> *boundaryGrid; + ThreadLocal<ChartCtorBuffers> *chartBuffers; + const ChartOptions *options; + ThreadLocal<PiecewiseParam> *pp; +}; + +struct CreateAndParameterizeChartTaskArgs { + const Basis *basis; Chart *chart; // output Array<Chart *> charts; // output (if more than one chart) - ThreadLocal<ChartCtorBuffers> *chartBuffers; + segment::ChartGeneratorType::Enum chartGeneratorType; const Mesh *mesh; - const ParameterizeOptions *options; -#if XA_RECOMPUTE_CHARTS - ThreadLocal<PiecewiseParam> *pp; -#endif ConstArrayView<uint32_t> faces; uint32_t chartGroupId; uint32_t chartId; }; -static void runCreateAndParameterizeChartTask(void *userData) -{ - auto args = (CreateAndParameterizeChartTaskArgs *)userData; +static void runCreateAndParameterizeChartTask(void *groupUserData, void *taskUserData) { + XA_PROFILE_START(createChartMeshAndParameterizeThread) + auto groupArgs = (CreateAndParameterizeChartTaskGroupArgs *)groupUserData; + auto args = (CreateAndParameterizeChartTaskArgs *)taskUserData; XA_PROFILE_START(createChartMesh) - args->chart = XA_NEW_ARGS(MemTag::Default, Chart, args->chartBuffers->get(), *args->options, *args->basis, args->faces, args->mesh, args->chartGroupId, args->chartId); + args->chart = XA_NEW_ARGS(MemTag::Default, Chart, *args->basis, args->chartGeneratorType, args->faces, args->mesh, args->chartGroupId, args->chartId); XA_PROFILE_END(createChartMesh) - args->chart->parameterize(*args->options, args->boundaryGrid->get()); + XA_PROFILE_START(parameterizeCharts) + args->chart->parameterize(*groupArgs->options, groupArgs->boundaryGrid->get()); + XA_PROFILE_END(parameterizeCharts) #if XA_RECOMPUTE_CHARTS - if (!args->chart->isInvalid()) + if (!args->chart->isInvalid()) { + XA_PROFILE_END(createChartMeshAndParameterizeThread) return; + } // Recompute charts with invalid parameterizations. XA_PROFILE_START(parameterizeChartsRecompute) Chart *invalidChart = args->chart; - // Fixing t-junctions rewrites unified mesh faces, and we need to map faces back to input mesh. So use the unmodified unified mesh. - const Mesh *invalidMesh = invalidChart->unmodifiedUnifiedMesh(); - uint32_t faceCount = 0; - if (invalidMesh) { - faceCount = invalidMesh->faceCount(); - } else { - invalidMesh = invalidChart->unifiedMesh(); - faceCount = invalidChart->initialFaceCount(); // Not invalidMesh->faceCount(). Don't want faces added by hole closing. - } - PiecewiseParam &pp = args->pp->get(); - pp.reset(invalidMesh, faceCount); + const Mesh *invalidMesh = invalidChart->unifiedMesh(); + PiecewiseParam &pp = groupArgs->pp->get(); + pp.reset(invalidMesh); #if XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS char filename[256]; XA_SPRINTF(filename, sizeof(filename), "debug_mesh_%03u_chartgroup_%03u_chart_%03u_recomputed.obj", args->mesh->id(), args->chartGroupId, args->chartId); @@ -7658,7 +7093,10 @@ static void runCreateAndParameterizeChartTask(void *userData) XA_PROFILE_END(parameterizeChartsPiecewise) if (!facesRemaining) break; - Chart *chart = XA_NEW_ARGS(MemTag::Default, Chart, args->chartBuffers->get(), invalidChart, invalidMesh, pp.chartFaces(), pp.texcoords(), args->mesh); + Chart *chart = XA_NEW_ARGS(MemTag::Default, Chart, groupArgs->chartBuffers->get(), invalidChart, invalidMesh, pp.chartFaces(), pp.texcoords(), args->mesh); +#if XA_CHECK_PIECEWISE_CHART_QUALITY + chart->evaluateQuality(args->boundaryGrid->get()); +#endif args->charts.push_back(chart); #if XA_DEBUG_EXPORT_OBJ_RECOMPUTED_CHARTS if (file) { @@ -7686,50 +7124,63 @@ static void runCreateAndParameterizeChartTask(void *userData) #endif XA_PROFILE_END(parameterizeChartsRecompute) #endif // XA_RECOMPUTE_CHARTS + XA_PROFILE_END(createChartMeshAndParameterizeThread) + // Update progress. + groupArgs->progress->increment(args->faces.length); } // Set of charts corresponding to mesh faces in the same face group. -class ChartGroup -{ +class ChartGroup { public: - ChartGroup(uint32_t id, const Mesh *sourceMesh, const MeshFaceGroups *sourceMeshFaceGroups, MeshFaceGroups::Handle faceGroup) : m_id(id), m_sourceMesh(sourceMesh), m_sourceMeshFaceGroups(sourceMeshFaceGroups), m_faceGroup(faceGroup), m_faceCount(0), m_paramAddedChartsCount(0), m_paramDeletedChartsCount(0) - { + ChartGroup(uint32_t id, const Mesh *sourceMesh, const MeshFaceGroups *sourceMeshFaceGroups, MeshFaceGroups::Handle faceGroup) : + m_id(id), m_sourceMesh(sourceMesh), m_sourceMeshFaceGroups(sourceMeshFaceGroups), m_faceGroup(faceGroup) { } - ~ChartGroup() - { + ~ChartGroup() { for (uint32_t i = 0; i < m_charts.size(); i++) { m_charts[i]->~Chart(); XA_FREE(m_charts[i]); } } - uint32_t segmentChartCount() const { return m_chartBasis.size(); } uint32_t chartCount() const { return m_charts.size(); } Chart *chartAt(uint32_t i) const { return m_charts[i]; } - uint32_t faceCount() const { return m_faceCount; } - uint32_t paramAddedChartsCount() const { return m_paramAddedChartsCount; } - uint32_t paramDeletedChartsCount() const { return m_paramDeletedChartsCount; } + uint32_t faceCount() const { return m_sourceMeshFaceGroups->faceCount(m_faceGroup); } - void computeChartFaces(const ChartOptions &options, segment::Atlas &atlas) - { + void computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, Progress *progress, segment::Atlas &atlas, ThreadLocal<UniformGrid2> *boundaryGrid, ThreadLocal<ChartCtorBuffers> *chartBuffers, ThreadLocal<PiecewiseParam> *piecewiseParam) { + // This function may be called multiple times, so destroy existing charts. + for (uint32_t i = 0; i < m_charts.size(); i++) { + m_charts[i]->~Chart(); + XA_FREE(m_charts[i]); + } // Create mesh from source mesh, using only the faces in this face group. XA_PROFILE_START(createChartGroupMesh) Mesh *mesh = createMesh(); XA_PROFILE_END(createChartGroupMesh) // Segment mesh into charts (arrays of faces). #if XA_DEBUG_SINGLE_CHART - m_chartBasis.resize(1); - Fit::computeBasis(&mesh->position(0), mesh->vertexCount(), &m_chartBasis[0]); - m_chartFaces.resize(1 + mesh->faceCount()); - m_chartFaces[0] = mesh->faceCount(); - for (uint32_t i = 0; i < m_chartFaces.size(); i++) - m_chartFaces[i + 1] = i; + XA_UNUSED(options); + XA_UNUSED(atlas); + const uint32_t chartCount = 1; + uint32_t offset; + Basis chartBasis; + Fit::computeBasis(&mesh->position(0), mesh->vertexCount(), &chartBasis); + Array<uint32_t> chartFaces; + chartFaces.resize(1 + mesh->faceCount()); + chartFaces[0] = mesh->faceCount(); + for (uint32_t i = 0; i < chartFaces.size() - 1; i++) + chartFaces[i + 1] = m_faceToSourceFaceMap[i]; + // Destroy mesh. + const uint32_t faceCount = mesh->faceCount(); + mesh->~Mesh(); + XA_FREE(mesh); #else XA_PROFILE_START(buildAtlas) atlas.reset(mesh, options); atlas.compute(); XA_PROFILE_END(buildAtlas) + // Update progress. + progress->increment(faceCount()); #if XA_DEBUG_EXPORT_OBJ_CHARTS char filename[256]; XA_SPRINTF(filename, sizeof(filename), "debug_mesh_%03u_chartgroup_%03u_charts.obj", m_sourceMesh->id(), m_id); @@ -7745,7 +7196,6 @@ public: mesh->writeObjFace(file, faces[f]); } mesh->writeObjBoundaryEges(file); - mesh->writeObjLinkedBoundaries(file); fclose(file); } #endif @@ -7754,65 +7204,57 @@ public: mesh->~Mesh(); XA_FREE(mesh); XA_PROFILE_START(copyChartFaces) - // Copy basis. - const uint32_t chartCount = atlas.chartCount(); - m_chartBasis.resize(chartCount); - for (uint32_t i = 0; i < chartCount; i++) - m_chartBasis[i] = atlas.chartBasis(i); + if (progress->cancel) + return; // Copy faces from segment::Atlas to m_chartFaces array with <chart 0 face count> <face 0> <face n> <chart 1 face count> etc. encoding. // segment::Atlas faces refer to the chart group mesh. Map them to the input mesh instead. - m_chartFaces.resize(chartCount + faceCount); + const uint32_t chartCount = atlas.chartCount(); + Array<uint32_t> chartFaces; + chartFaces.resize(chartCount + faceCount); uint32_t offset = 0; for (uint32_t i = 0; i < chartCount; i++) { ConstArrayView<uint32_t> faces = atlas.chartFaces(i); - m_chartFaces[offset++] = faces.length; + chartFaces[offset++] = faces.length; for (uint32_t j = 0; j < faces.length; j++) - m_chartFaces[offset++] = m_faceToSourceFaceMap[faces[j]]; + chartFaces[offset++] = m_faceToSourceFaceMap[faces[j]]; } XA_PROFILE_END(copyChartFaces) #endif - } - -#if XA_RECOMPUTE_CHARTS - void parameterizeCharts(TaskScheduler *taskScheduler, const ParameterizeOptions &options, ThreadLocal<UniformGrid2> *boundaryGrid, ThreadLocal<ChartCtorBuffers> *chartBuffers, ThreadLocal<PiecewiseParam> *piecewiseParam) -#else - void parameterizeCharts(TaskScheduler* taskScheduler, const ParameterizeOptions &options, ThreadLocal<UniformGrid2>* boundaryGrid, ThreadLocal<ChartCtorBuffers>* chartBuffers) -#endif - { - // This function may be called multiple times, so destroy existing charts. - for (uint32_t i = 0; i < m_charts.size(); i++) { - m_charts[i]->~Chart(); - XA_FREE(m_charts[i]); - } - m_paramAddedChartsCount = 0; - const uint32_t chartCount = m_chartBasis.size(); + XA_PROFILE_START(createChartMeshAndParameterizeReal) + CreateAndParameterizeChartTaskGroupArgs groupArgs; + groupArgs.progress = progress; + groupArgs.boundaryGrid = boundaryGrid; + groupArgs.chartBuffers = chartBuffers; + groupArgs.options = &options; + groupArgs.pp = piecewiseParam; + TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(&groupArgs, chartCount); Array<CreateAndParameterizeChartTaskArgs> taskArgs; taskArgs.resize(chartCount); taskArgs.runCtors(); // Has Array member. - TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(chartCount); - uint32_t offset = 0; + offset = 0; for (uint32_t i = 0; i < chartCount; i++) { CreateAndParameterizeChartTaskArgs &args = taskArgs[i]; - args.basis = &m_chartBasis[i]; - args.boundaryGrid = boundaryGrid; +#if XA_DEBUG_SINGLE_CHART + args.basis = &chartBasis; + args.isPlanar = false; +#else + args.basis = &atlas.chartBasis(i); + args.chartGeneratorType = atlas.chartGeneratorType(i); +#endif args.chart = nullptr; args.chartGroupId = m_id; args.chartId = i; - args.chartBuffers = chartBuffers; - const uint32_t faceCount = m_chartFaces[offset++]; - args.faces = ConstArrayView<uint32_t>(&m_chartFaces[offset], faceCount); - offset += faceCount; + const uint32_t chartFaceCount = chartFaces[offset++]; + args.faces = ConstArrayView<uint32_t>(&chartFaces[offset], chartFaceCount); + offset += chartFaceCount; args.mesh = m_sourceMesh; - args.options = &options; -#if XA_RECOMPUTE_CHARTS - args.pp = piecewiseParam; -#endif Task task; task.userData = &args; task.func = runCreateAndParameterizeChartTask; taskScheduler->run(taskGroup, task); } taskScheduler->wait(&taskGroup); + XA_PROFILE_END(createChartMeshAndParameterizeReal) #if XA_RECOMPUTE_CHARTS // Count charts. Skip invalid ones and include new ones added by recomputing. uint32_t newChartCount = 0; @@ -7830,7 +7272,6 @@ public: if (chart->isInvalid()) { chart->~Chart(); XA_FREE(chart); - m_paramDeletedChartsCount++; continue; } m_charts[current++] = chart; @@ -7838,10 +7279,8 @@ public: // Now add new charts. for (uint32_t i = 0; i < chartCount; i++) { CreateAndParameterizeChartTaskArgs &args = taskArgs[i]; - for (uint32_t j = 0; j < args.charts.size(); j++) { + for (uint32_t j = 0; j < args.charts.size(); j++) m_charts[current++] = args.charts[j]; - m_paramAddedChartsCount++; - } } #else // XA_RECOMPUTE_CHARTS m_charts.resize(chartCount); @@ -7852,15 +7291,14 @@ public: } private: - Mesh *createMesh() - { + Mesh *createMesh() { XA_DEBUG_ASSERT(m_faceGroup != MeshFaceGroups::kInvalid); // Create new mesh from the source mesh, using faces that belong to this group. m_faceToSourceFaceMap.reserve(m_sourceMeshFaceGroups->faceCount(m_faceGroup)); for (MeshFaceGroups::Iterator it(m_sourceMeshFaceGroups, m_faceGroup); !it.isDone(); it.advance()) m_faceToSourceFaceMap.push_back(it.face()); // Only initial meshes has ignored faces. The only flag we care about is HasNormals. - const uint32_t faceCount = m_faceCount = m_faceToSourceFaceMap.size(); + const uint32_t faceCount = m_faceToSourceFaceMap.size(); XA_DEBUG_ASSERT(faceCount > 0); const uint32_t approxVertexCount = min(faceCount * 3, m_sourceMesh->vertexCount()); Mesh *mesh = XA_NEW_ARGS(MemTag::Mesh, Mesh, m_sourceMesh->epsilon(), approxVertexCount, faceCount, m_sourceMesh->flags() & MeshFlags::HasNormals); @@ -7889,9 +7327,7 @@ private: XA_DEBUG_ASSERT(indices[i] != UINT32_MAX); } // Don't copy flags - ignored faces aren't used by chart groups, they are handled by InvalidMeshGeometry. - Mesh::AddFaceResult::Enum result = mesh->addFace(indices); - XA_UNUSED(result); - XA_DEBUG_ASSERT(result == Mesh::AddFaceResult::OK); + mesh->addFace(indices); } XA_PROFILE_START(createChartGroupMeshColocals) mesh->createColocals(); @@ -7909,98 +7345,57 @@ private: } const uint32_t m_id; - const Mesh * const m_sourceMesh; - const MeshFaceGroups * const m_sourceMeshFaceGroups; + const Mesh *const m_sourceMesh; + const MeshFaceGroups *const m_sourceMeshFaceGroups; const MeshFaceGroups::Handle m_faceGroup; Array<uint32_t> m_faceToSourceFaceMap; // List of faces of the source mesh that belong to this chart group. - Array<Basis> m_chartBasis; // Copied from segment::Atlas. - Array<uint32_t> m_chartFaces; // Copied from segment::Atlas. Encoding: <chart 0 face count> <face 0> <face n> <chart 1 face count> etc. Array<Chart *> m_charts; - uint32_t m_faceCount; // Set by createMesh(). Used for sorting. - uint32_t m_paramAddedChartsCount; // Number of new charts added by recomputing charts with invalid parameterizations. - uint32_t m_paramDeletedChartsCount; // Number of charts with invalid parameterizations that were deleted, after charts were recomputed. -}; - -// References invalid faces and vertices in a mesh. -struct InvalidMeshGeometry -{ - // Invalid faces have the face groups MeshFaceGroups::kInvalid. - void extract(const Mesh *mesh, const MeshFaceGroups *meshFaceGroups) - { - // Copy invalid faces. - m_faces.clear(); - const uint32_t meshFaceCount = mesh->faceCount(); - for (uint32_t f = 0; f < meshFaceCount; f++) { - if (meshFaceGroups->groupAt(f) == MeshFaceGroups::kInvalid) - m_faces.push_back(f); - } - // Create *unique* list of vertices of invalid faces. - const uint32_t faceCount = m_faces.size(); - m_indices.resize(faceCount * 3); - const uint32_t approxVertexCount = min(faceCount * 3, mesh->vertexCount()); - m_vertexToSourceVertexMap.clear(); - m_vertexToSourceVertexMap.reserve(approxVertexCount); - HashMap<uint32_t, PassthroughHash<uint32_t>> sourceVertexToVertexMap(MemTag::Mesh, approxVertexCount); - for (uint32_t f = 0; f < faceCount; f++) { - const uint32_t face = m_faces[f]; - for (uint32_t i = 0; i < 3; i++) { - const uint32_t vertex = mesh->vertexAt(face * 3 + i); - uint32_t newVertex = sourceVertexToVertexMap.get(vertex); - if (newVertex == UINT32_MAX) { - newVertex = sourceVertexToVertexMap.add(vertex); - m_vertexToSourceVertexMap.push_back(vertex); - } - m_indices[f * 3 + i] = newVertex; - } - } - } - - ConstArrayView<uint32_t> faces() const { return m_faces; } - ConstArrayView<uint32_t> indices() const { return m_indices; } - ConstArrayView<uint32_t> vertices() const { return m_vertexToSourceVertexMap; } - -private: - Array<uint32_t> m_faces, m_indices; - Array<uint32_t> m_vertexToSourceVertexMap; // Map face vertices to vertices of the source mesh. }; -struct ChartGroupComputeChartFacesTaskArgs -{ +struct ChartGroupComputeChartsTaskGroupArgs { ThreadLocal<segment::Atlas> *atlas; - ChartGroup *chartGroup; const ChartOptions *options; Progress *progress; + TaskScheduler *taskScheduler; + ThreadLocal<UniformGrid2> *boundaryGrid; + ThreadLocal<ChartCtorBuffers> *chartBuffers; + ThreadLocal<PiecewiseParam> *piecewiseParam; }; -static void runChartGroupComputeChartFacesJob(void *userData) -{ - auto args = (ChartGroupComputeChartFacesTaskArgs *)userData; +static void runChartGroupComputeChartsTask(void *groupUserData, void *taskUserData) { + auto args = (ChartGroupComputeChartsTaskGroupArgs *)groupUserData; + auto chartGroup = (ChartGroup *)taskUserData; if (args->progress->cancel) return; XA_PROFILE_START(chartGroupComputeChartsThread) - args->chartGroup->computeChartFaces(*args->options, args->atlas->get()); + chartGroup->computeCharts(args->taskScheduler, *args->options, args->progress, args->atlas->get(), args->boundaryGrid, args->chartBuffers, args->piecewiseParam); XA_PROFILE_END(chartGroupComputeChartsThread) } -struct MeshComputeChartFacesTaskArgs -{ - Array<ChartGroup *> *chartGroups; // output - InvalidMeshGeometry *invalidMeshGeometry; // output +struct MeshComputeChartsTaskGroupArgs { ThreadLocal<segment::Atlas> *atlas; const ChartOptions *options; Progress *progress; - const Mesh *sourceMesh; TaskScheduler *taskScheduler; + ThreadLocal<UniformGrid2> *boundaryGrid; + ThreadLocal<ChartCtorBuffers> *chartBuffers; + ThreadLocal<PiecewiseParam> *piecewiseParam; +}; + +struct MeshComputeChartsTaskArgs { + const Mesh *sourceMesh; + Array<ChartGroup *> *chartGroups; // output + InvalidMeshGeometry *invalidMeshGeometry; // output }; #if XA_DEBUG_EXPORT_OBJ_FACE_GROUPS static uint32_t s_faceGroupsCurrentVertex = 0; #endif -static void runMeshComputeChartFacesJob(void *userData) -{ - auto args = (MeshComputeChartFacesTaskArgs *)userData; - if (args->progress->cancel) +static void runMeshComputeChartsTask(void *groupUserData, void *taskUserData) { + auto groupArgs = (MeshComputeChartsTaskGroupArgs *)groupUserData; + auto args = (MeshComputeChartsTaskArgs *)taskUserData; + if (groupArgs->progress->cancel) return; XA_PROFILE_START(computeChartsThread) // Create face groups. @@ -8009,7 +7404,7 @@ static void runMeshComputeChartFacesJob(void *userData) meshFaceGroups->compute(); const uint32_t chartGroupCount = meshFaceGroups->groupCount(); XA_PROFILE_END(createFaceGroups) - if (args->progress->cancel) + if (groupArgs->progress->cancel) goto cleanup; #if XA_DEBUG_EXPORT_OBJ_FACE_GROUPS { @@ -8053,33 +7448,41 @@ static void runMeshComputeChartFacesJob(void *userData) for (uint32_t i = 0; i < chartGroupCount; i++) (*args->chartGroups)[i] = XA_NEW_ARGS(MemTag::Default, ChartGroup, i, args->sourceMesh, meshFaceGroups, MeshFaceGroups::Handle(i)); // Extract invalid geometry via the invalid face group (MeshFaceGroups::kInvalid). - XA_PROFILE_START(extractInvalidMeshGeometry) - args->invalidMeshGeometry->extract(args->sourceMesh, meshFaceGroups); - XA_PROFILE_END(extractInvalidMeshGeometry) - // One task for each chart group - compute chart faces. + { + XA_PROFILE_START(extractInvalidMeshGeometry) + args->invalidMeshGeometry->extract(args->sourceMesh, meshFaceGroups); + XA_PROFILE_END(extractInvalidMeshGeometry) + } + // One task for each chart group - compute charts. { XA_PROFILE_START(chartGroupComputeChartsReal) - Array<ChartGroupComputeChartFacesTaskArgs> taskArgs; - taskArgs.resize(chartGroupCount); - for (uint32_t i = 0; i < chartGroupCount; i++) { - taskArgs[i].atlas = args->atlas; - taskArgs[i].chartGroup = (*args->chartGroups)[i]; - taskArgs[i].options = args->options; - taskArgs[i].progress = args->progress; - } - TaskGroupHandle taskGroup = args->taskScheduler->createTaskGroup(chartGroupCount); + // Sort chart groups by face count. + Array<float> chartGroupSortData; + chartGroupSortData.resize(chartGroupCount); + for (uint32_t i = 0; i < chartGroupCount; i++) + chartGroupSortData[i] = (float)(*args->chartGroups)[i]->faceCount(); + RadixSort chartGroupSort; + chartGroupSort.sort(chartGroupSortData); + // Larger chart groups are added first to reduce the chance of thread starvation. + ChartGroupComputeChartsTaskGroupArgs taskGroupArgs; + taskGroupArgs.atlas = groupArgs->atlas; + taskGroupArgs.options = groupArgs->options; + taskGroupArgs.progress = groupArgs->progress; + taskGroupArgs.taskScheduler = groupArgs->taskScheduler; + taskGroupArgs.boundaryGrid = groupArgs->boundaryGrid; + taskGroupArgs.chartBuffers = groupArgs->chartBuffers; + taskGroupArgs.piecewiseParam = groupArgs->piecewiseParam; + TaskGroupHandle taskGroup = groupArgs->taskScheduler->createTaskGroup(&taskGroupArgs, chartGroupCount); for (uint32_t i = 0; i < chartGroupCount; i++) { Task task; - task.userData = &taskArgs[i]; - task.func = runChartGroupComputeChartFacesJob; - args->taskScheduler->run(taskGroup, task); + task.userData = (*args->chartGroups)[chartGroupCount - i - 1]; + task.func = runChartGroupComputeChartsTask; + groupArgs->taskScheduler->run(taskGroup, task); } - args->taskScheduler->wait(&taskGroup); + groupArgs->taskScheduler->wait(&taskGroup); XA_PROFILE_END(chartGroupComputeChartsReal) } XA_PROFILE_END(computeChartsThread) - args->progress->value++; - args->progress->update(); cleanup: if (meshFaceGroups) { meshFaceGroups->~MeshFaceGroups(); @@ -8087,43 +7490,13 @@ cleanup: } } -struct ParameterizeChartsTaskArgs -{ - TaskScheduler *taskScheduler; - ChartGroup *chartGroup; - const ParameterizeOptions *options; - ThreadLocal<UniformGrid2> *boundaryGrid; - ThreadLocal<ChartCtorBuffers> *chartBuffers; -#if XA_RECOMPUTE_CHARTS - ThreadLocal<PiecewiseParam> *piecewiseParam; -#endif - Progress *progress; -}; - -static void runParameterizeChartsJob(void *userData) -{ - auto args = (ParameterizeChartsTaskArgs *)userData; - if (args->progress->cancel) - return; - XA_PROFILE_START(parameterizeChartsThread) -#if XA_RECOMPUTE_CHARTS - args->chartGroup->parameterizeCharts(args->taskScheduler, *args->options, args->boundaryGrid, args->chartBuffers, args->piecewiseParam); -#else - args->chartGroup->parameterizeCharts(args->taskScheduler, *args->options, args->boundaryGrid, args->chartBuffers); -#endif - XA_PROFILE_END(parameterizeChartsThread) - args->progress->value++; - args->progress->update(); -} - /// An atlas is a set of chart groups. -class Atlas -{ +class Atlas { public: - Atlas() : m_chartsComputed(false), m_chartsParameterized(false) {} + Atlas() : + m_chartsComputed(false) {} - ~Atlas() - { + ~Atlas() { for (uint32_t i = 0; i < m_meshChartGroups.size(); i++) { for (uint32_t j = 0; j < m_meshChartGroups[i].size(); j++) { m_meshChartGroups[i][j]->~ChartGroup(); @@ -8137,22 +7510,25 @@ public: uint32_t meshCount() const { return m_meshes.size(); } const InvalidMeshGeometry &invalidMeshGeometry(uint32_t meshIndex) const { return m_invalidMeshGeometry[meshIndex]; } bool chartsComputed() const { return m_chartsComputed; } - bool chartsParameterized() const { return m_chartsParameterized; } uint32_t chartGroupCount(uint32_t mesh) const { return m_meshChartGroups[mesh].size(); } const ChartGroup *chartGroupAt(uint32_t mesh, uint32_t group) const { return m_meshChartGroups[mesh][group]; } - void addMesh(const Mesh *mesh) - { + void addMesh(const Mesh *mesh) { m_meshes.push_back(mesh); } - bool computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, ProgressFunc progressFunc, void *progressUserData) - { + bool computeCharts(TaskScheduler *taskScheduler, const ChartOptions &options, ProgressFunc progressFunc, void *progressUserData) { + XA_PROFILE_START(computeChartsReal) #if XA_DEBUG_EXPORT_OBJ_PLANAR_REGIONS segment::s_planarRegionsCurrentRegion = segment::s_planarRegionsCurrentVertex = 0; #endif + // Progress is per-face x 2 (1 for chart faces, 1 for parameterized chart faces). + const uint32_t meshCount = m_meshes.size(); + uint32_t totalFaceCount = 0; + for (uint32_t i = 0; i < meshCount; i++) + totalFaceCount += m_meshes[i]->faceCount(); + Progress progress(ProgressCategory::ComputeCharts, progressFunc, progressUserData, totalFaceCount * 2); m_chartsComputed = false; - m_chartsParameterized = false; // Clear chart groups, since this function may be called multiple times. if (!m_meshChartGroups.isEmpty()) { for (uint32_t i = 0; i < m_meshChartGroups.size(); i++) { @@ -8162,27 +7538,20 @@ public: } m_meshChartGroups[i].clear(); } - XA_ASSERT(m_meshChartGroups.size() == m_meshes.size()); // The number of meshes shouldn't have changed. + XA_ASSERT(m_meshChartGroups.size() == meshCount); // The number of meshes shouldn't have changed. } - m_meshChartGroups.resize(m_meshes.size()); + m_meshChartGroups.resize(meshCount); m_meshChartGroups.runCtors(); - m_invalidMeshGeometry.resize(m_meshes.size()); + m_invalidMeshGeometry.resize(meshCount); m_invalidMeshGeometry.runCtors(); // One task per mesh. - const uint32_t meshCount = m_meshes.size(); - Progress progress(ProgressCategory::ComputeCharts, progressFunc, progressUserData, meshCount); - ThreadLocal<segment::Atlas> atlas; - Array<MeshComputeChartFacesTaskArgs> taskArgs; + Array<MeshComputeChartsTaskArgs> taskArgs; taskArgs.resize(meshCount); for (uint32_t i = 0; i < meshCount; i++) { - MeshComputeChartFacesTaskArgs &args = taskArgs[i]; - args.atlas = &atlas; + MeshComputeChartsTaskArgs &args = taskArgs[i]; + args.sourceMesh = m_meshes[i]; args.chartGroups = &m_meshChartGroups[i]; args.invalidMeshGeometry = &m_invalidMeshGeometry[i]; - args.options = &options; - args.progress = &progress; - args.sourceMesh = m_meshes[i]; - args.taskScheduler = taskScheduler; } // Sort meshes by indexCount. Array<float> meshSortData; @@ -8192,105 +7561,53 @@ public: RadixSort meshSort; meshSort.sort(meshSortData); // Larger meshes are added first to reduce the chance of thread starvation. - TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(meshCount); - for (uint32_t i = 0; i < meshCount; i++) { - Task task; - task.userData = &taskArgs[meshSort.ranks()[meshCount - i - 1]]; - task.func = runMeshComputeChartFacesJob; - taskScheduler->run(taskGroup, task); - } - taskScheduler->wait(&taskGroup); - if (progress.cancel) - return false; - m_chartsComputed = true; - return true; - } - - bool parameterizeCharts(TaskScheduler *taskScheduler, const ParameterizeOptions &options, ProgressFunc progressFunc, void *progressUserData) - { - m_chartsParameterized = false; - uint32_t chartGroupCount = 0; - for (uint32_t i = 0; i < m_meshChartGroups.size(); i++) - chartGroupCount += m_meshChartGroups[i].size(); - Progress progress(ProgressCategory::ParameterizeCharts, progressFunc, progressUserData, chartGroupCount); + ThreadLocal<segment::Atlas> atlas; ThreadLocal<UniformGrid2> boundaryGrid; // For Quality boundary intersection. ThreadLocal<ChartCtorBuffers> chartBuffers; -#if XA_RECOMPUTE_CHARTS ThreadLocal<PiecewiseParam> piecewiseParam; -#endif - Array<ParameterizeChartsTaskArgs> taskArgs; - taskArgs.resize(chartGroupCount); - { - uint32_t k = 0; - for (uint32_t i = 0; i < m_meshChartGroups.size(); i++) { - const uint32_t count = m_meshChartGroups[i].size(); - for (uint32_t j = 0; j < count; j++) { - ParameterizeChartsTaskArgs &args = taskArgs[k]; - args.taskScheduler = taskScheduler; - args.chartGroup = m_meshChartGroups[i][j]; - args.options = &options; - args.boundaryGrid = &boundaryGrid; - args.chartBuffers = &chartBuffers; -#if XA_RECOMPUTE_CHARTS - args.piecewiseParam = &piecewiseParam; -#endif - args.progress = &progress; - k++; - } - } - } - // Sort chart groups by face count. - Array<float> chartGroupSortData; - chartGroupSortData.resize(chartGroupCount); - { - uint32_t k = 0; - for (uint32_t i = 0; i < m_meshChartGroups.size(); i++) { - const uint32_t count = m_meshChartGroups[i].size(); - for (uint32_t j = 0; j < count; j++) { - chartGroupSortData[k++] = (float)m_meshChartGroups[i][j]->faceCount(); - } - } - } - RadixSort chartGroupSort; - chartGroupSort.sort(chartGroupSortData); - // Larger chart groups are added first to reduce the chance of thread starvation. - TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(chartGroupCount); - for (uint32_t i = 0; i < chartGroupCount; i++) { + MeshComputeChartsTaskGroupArgs taskGroupArgs; + taskGroupArgs.atlas = &atlas; + taskGroupArgs.options = &options; + taskGroupArgs.progress = &progress; + taskGroupArgs.taskScheduler = taskScheduler; + taskGroupArgs.boundaryGrid = &boundaryGrid; + taskGroupArgs.chartBuffers = &chartBuffers; + taskGroupArgs.piecewiseParam = &piecewiseParam; + TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(&taskGroupArgs, meshCount); + for (uint32_t i = 0; i < meshCount; i++) { Task task; - task.userData = &taskArgs[chartGroupSort.ranks()[chartGroupCount - i - 1]]; - task.func = runParameterizeChartsJob; + task.userData = &taskArgs[meshSort.ranks()[meshCount - i - 1]]; + task.func = runMeshComputeChartsTask; taskScheduler->run(taskGroup, task); } taskScheduler->wait(&taskGroup); + XA_PROFILE_END(computeChartsReal) if (progress.cancel) return false; - m_chartsParameterized = true; + m_chartsComputed = true; return true; } private: Array<const Mesh *> m_meshes; Array<InvalidMeshGeometry> m_invalidMeshGeometry; // 1 per mesh. - Array<Array<ChartGroup *> > m_meshChartGroups; + Array<Array<ChartGroup *>> m_meshChartGroups; bool m_chartsComputed; - bool m_chartsParameterized; }; } // namespace param namespace pack { -class AtlasImage -{ +class AtlasImage { public: - AtlasImage(uint32_t width, uint32_t height) : m_width(width), m_height(height) - { + AtlasImage(uint32_t width, uint32_t height) : + m_width(width), m_height(height) { m_data.resize(m_width * m_height); memset(m_data.data(), 0, sizeof(uint32_t) * m_data.size()); } - void resize(uint32_t width, uint32_t height) - { + void resize(uint32_t width, uint32_t height) { Array<uint32_t> data; data.resize(width * height); memset(data.data(), 0, sizeof(uint32_t) * data.size()); @@ -8301,8 +7618,7 @@ public: data.moveTo(m_data); } - void addChart(uint32_t chartIndex, const BitImage *image, const BitImage *imageBilinear, const BitImage *imagePadding, int atlas_w, int atlas_h, int offset_x, int offset_y) - { + void addChart(uint32_t chartIndex, const BitImage *image, const BitImage *imageBilinear, const BitImage *imagePadding, int atlas_w, int atlas_h, int offset_x, int offset_y) { const int w = image->width(); const int h = image->height(); for (int y = 0; y < h; y++) { @@ -8328,15 +7644,13 @@ public: } } - void copyTo(uint32_t *dest, uint32_t destWidth, uint32_t destHeight, int padding) const - { + void copyTo(uint32_t *dest, uint32_t destWidth, uint32_t destHeight, int padding) const { for (uint32_t y = 0; y < destHeight; y++) memcpy(&dest[y * destWidth], &m_data[padding + (y + padding) * m_width], destWidth * sizeof(uint32_t)); } #if XA_DEBUG_EXPORT_ATLAS_IMAGES - void writeTga(const char *filename, uint32_t width, uint32_t height) const - { + void writeTga(const char *filename, uint32_t width, uint32_t height) const { Array<uint8_t> image; image.resize(width * height * 3); for (uint32_t y = 0; y < height; y++) { @@ -8378,18 +7692,14 @@ private: Array<uint32_t> m_data; }; -struct Chart -{ +struct Chart { int32_t atlasIndex; uint32_t material; - uint32_t indexCount; - const uint32_t *indices; + ConstArrayView<uint32_t> indices; float parametricArea; float surfaceArea; - Vector2 *vertices; - uint32_t vertexCount; + ArrayView<Vector2> vertices; Array<uint32_t> uniqueVertices; - bool allowRotate; // bounding box Vector2 majorAxis, minorAxis, minCorner, maxCorner; // Mesh only @@ -8398,29 +7708,26 @@ struct Chart Array<uint32_t> faces; Vector2 &uniqueVertexAt(uint32_t v) { return uniqueVertices.isEmpty() ? vertices[v] : vertices[uniqueVertices[v]]; } - uint32_t uniqueVertexCount() const { return uniqueVertices.isEmpty() ? vertexCount : uniqueVertices.size(); } + uint32_t uniqueVertexCount() const { return uniqueVertices.isEmpty() ? vertices.length : uniqueVertices.size(); } }; -struct AddChartTaskArgs -{ - ThreadLocal<BoundingBox2D> *boundingBox; +struct AddChartTaskArgs { param::Chart *paramChart; Chart *chart; // out }; -static void runAddChartTask(void *userData) -{ +static void runAddChartTask(void *groupUserData, void *taskUserData) { XA_PROFILE_START(packChartsAddChartsThread) - auto args = (AddChartTaskArgs *)userData; + auto boundingBox = (ThreadLocal<BoundingBox2D> *)groupUserData; + auto args = (AddChartTaskArgs *)taskUserData; param::Chart *paramChart = args->paramChart; XA_PROFILE_START(packChartsAddChartsRestoreTexcoords) paramChart->restoreTexcoords(); XA_PROFILE_END(packChartsAddChartsRestoreTexcoords) - Mesh *mesh = paramChart->mesh(); + Mesh *mesh = paramChart->unifiedMesh(); Chart *chart = args->chart = XA_NEW(MemTag::Default, Chart); chart->atlasIndex = -1; chart->material = 0; - chart->indexCount = mesh->indexCount(); chart->indices = mesh->indices(); chart->parametricArea = mesh->computeParametricArea(); if (chart->parametricArea < kAreaEpsilon) { @@ -8430,17 +7737,15 @@ static void runAddChartTask(void *userData) } chart->surfaceArea = mesh->computeSurfaceArea(); chart->vertices = mesh->texcoords(); - chart->vertexCount = mesh->vertexCount(); - chart->allowRotate = true; chart->boundaryEdges = &mesh->boundaryEdges(); // Compute bounding box of chart. - BoundingBox2D &bb = args->boundingBox->get(); + BoundingBox2D &bb = boundingBox->get(); bb.clear(); - for (uint32_t v = 0; v < chart->vertexCount; v++) { + for (uint32_t v = 0; v < chart->vertices.length; v++) { if (mesh->isBoundaryVertex(v)) bb.appendBoundaryVertex(mesh->texcoord(v)); } - bb.compute(mesh->texcoords(), mesh->vertexCount()); + bb.compute(mesh->texcoords()); chart->majorAxis = bb.majorAxis; chart->minorAxis = bb.minorAxis; chart->minCorner = bb.minCorner; @@ -8448,10 +7753,8 @@ static void runAddChartTask(void *userData) XA_PROFILE_END(packChartsAddChartsThread) } -struct Atlas -{ - ~Atlas() - { +struct Atlas { + ~Atlas() { for (uint32_t i = 0; i < m_atlasImages.size(); i++) { m_atlasImages[i]->~AtlasImage(); XA_FREE(m_atlasImages[i]); @@ -8475,8 +7778,7 @@ struct Atlas const Array<AtlasImage *> &getImages() const { return m_atlasImages; } float getUtilization(uint32_t atlas) const { return m_utilization[atlas]; } - void addCharts(TaskScheduler *taskScheduler, param::Atlas *paramAtlas) - { + void addCharts(TaskScheduler *taskScheduler, param::Atlas *paramAtlas) { // Count charts. uint32_t chartCount = 0; for (uint32_t i = 0; i < paramAtlas->meshCount(); i++) { @@ -8489,11 +7791,11 @@ struct Atlas if (chartCount == 0) return; // Run one task per chart. + ThreadLocal<BoundingBox2D> boundingBox; + TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(&boundingBox, chartCount); Array<AddChartTaskArgs> taskArgs; taskArgs.resize(chartCount); - TaskGroupHandle taskGroup = taskScheduler->createTaskGroup(chartCount); uint32_t chartIndex = 0; - ThreadLocal<BoundingBox2D> boundingBox; for (uint32_t i = 0; i < paramAtlas->meshCount(); i++) { const uint32_t chartGroupsCount = paramAtlas->chartGroupCount(i); for (uint32_t j = 0; j < chartGroupsCount; j++) { @@ -8501,7 +7803,6 @@ struct Atlas const uint32_t count = chartGroup->chartCount(); for (uint32_t k = 0; k < count; k++) { AddChartTaskArgs &args = taskArgs[chartIndex]; - args.boundingBox = &boundingBox; args.paramChart = chartGroup->chartAt(k); Task task; task.userData = &taskArgs[chartIndex]; @@ -8518,8 +7819,10 @@ struct Atlas m_charts[i] = taskArgs[i].chart; } - void addUvMeshCharts(UvMeshInstance *mesh) - { + void addUvMeshCharts(UvMeshInstance *mesh) { + // Copy texcoords from mesh. + mesh->texcoords.resize(mesh->mesh->texcoords.size()); + memcpy(mesh->texcoords.data(), mesh->mesh->texcoords.data(), mesh->texcoords.size() * sizeof(Vector2)); BitArray vertexUsed(mesh->texcoords.size()); BoundingBox2D boundingBox; for (uint32_t c = 0; c < mesh->mesh->charts.size(); c++) { @@ -8527,17 +7830,14 @@ struct Atlas Chart *chart = XA_NEW(MemTag::Default, Chart); chart->atlasIndex = -1; chart->material = uvChart->material; - chart->indexCount = uvChart->indices.size(); - chart->indices = uvChart->indices.data(); - chart->vertices = mesh->texcoords.data(); - chart->vertexCount = mesh->texcoords.size(); - chart->allowRotate = mesh->rotateCharts; + chart->indices = uvChart->indices; + chart->vertices = mesh->texcoords; chart->boundaryEdges = nullptr; chart->faces.resize(uvChart->faces.size()); memcpy(chart->faces.data(), uvChart->faces.data(), sizeof(uint32_t) * uvChart->faces.size()); // Find unique vertices. vertexUsed.zeroOutMemory(); - for (uint32_t i = 0; i < chart->indexCount; i++) { + for (uint32_t i = 0; i < chart->indices.length; i++) { const uint32_t vertex = chart->indices[i]; if (!vertexUsed.get(vertex)) { vertexUsed.set(vertex); @@ -8546,14 +7846,13 @@ struct Atlas } // Compute parametric and surface areas. chart->parametricArea = 0.0f; - for (uint32_t f = 0; f < chart->indexCount / 3; f++) { + for (uint32_t f = 0; f < chart->indices.length / 3; f++) { const Vector2 &v1 = chart->vertices[chart->indices[f * 3 + 0]]; const Vector2 &v2 = chart->vertices[chart->indices[f * 3 + 1]]; const Vector2 &v3 = chart->vertices[chart->indices[f * 3 + 2]]; chart->parametricArea += fabsf(triangleArea(v1, v2, v3)); } chart->parametricArea *= 0.5f; - chart->surfaceArea = chart->parametricArea; // Identical for UV meshes. if (chart->parametricArea < kAreaEpsilon) { // When the parametric area is too small we use a rough approximation to prevent divisions by very small numbers. Vector2 minCorner(FLT_MAX, FLT_MAX); @@ -8565,6 +7864,9 @@ struct Atlas const Vector2 bounds = (maxCorner - minCorner) * 0.5f; chart->parametricArea = bounds.x * bounds.y; } + XA_DEBUG_ASSERT(isFinite(chart->parametricArea)); + XA_DEBUG_ASSERT(!isNan(chart->parametricArea)); + chart->surfaceArea = chart->parametricArea; // Identical for UV meshes. // Compute bounding box of chart. // Using all unique vertices for simplicity, can compute real boundaries if this is too slow. boundingBox.clear(); @@ -8580,8 +7882,7 @@ struct Atlas } // Pack charts in the smallest possible rectangle. - bool packCharts(const PackOptions &options, ProgressFunc progressFunc, void *progressUserData) - { + bool packCharts(const PackOptions &options, ProgressFunc progressFunc, void *progressUserData) { if (progressFunc) { if (!progressFunc(ProgressCategory::PackCharts, 0, progressUserData)) return false; @@ -8627,19 +7928,19 @@ struct Atlas // Compute chart scale float scale = 1.0f; if (chart->parametricArea != 0.0f) { - scale = (chart->surfaceArea / chart->parametricArea) * m_texelsPerUnit; + scale = sqrtf(chart->surfaceArea / chart->parametricArea) * m_texelsPerUnit; XA_ASSERT(isFinite(scale)); } // Translate, rotate and scale vertices. Compute extents. Vector2 minCorner(FLT_MAX, FLT_MAX); - if (!chart->allowRotate) { + if (!options.rotateChartsToAxis) { for (uint32_t i = 0; i < chart->uniqueVertexCount(); i++) minCorner = min(minCorner, chart->uniqueVertexAt(i)); } Vector2 extents(0.0f); for (uint32_t i = 0; i < chart->uniqueVertexCount(); i++) { Vector2 &texcoord = chart->uniqueVertexAt(i); - if (chart->allowRotate) { + if (options.rotateChartsToAxis) { const float x = dot(texcoord, chart->majorAxis); const float y = dot(texcoord, chart->minorAxis); texcoord.x = x; @@ -8750,27 +8051,27 @@ struct Atlas // Resize and clear (discard = true) chart images. // Leave room for padding at extents. chartImage.resize(ftoi_ceil(chartExtents[c].x) + options.padding, ftoi_ceil(chartExtents[c].y) + options.padding, true); - if (chart->allowRotate) + if (options.rotateCharts) chartImageRotated.resize(chartImage.height(), chartImage.width(), true); if (options.bilinear) { chartImageBilinear.resize(chartImage.width(), chartImage.height(), true); - if (chart->allowRotate) + if (options.rotateCharts) chartImageBilinearRotated.resize(chartImage.height(), chartImage.width(), true); } // Rasterize chart faces. - const uint32_t faceCount = chart->indexCount / 3; + const uint32_t faceCount = chart->indices.length / 3; for (uint32_t f = 0; f < faceCount; f++) { Vector2 vertices[3]; for (uint32_t v = 0; v < 3; v++) vertices[v] = chart->vertices[chart->indices[f * 3 + v]]; DrawTriangleCallbackArgs args; args.chartBitImage = &chartImage; - args.chartBitImageRotated = chart->allowRotate ? &chartImageRotated : nullptr; + args.chartBitImageRotated = options.rotateCharts ? &chartImageRotated : nullptr; raster::drawTriangle(Vector2((float)chartImage.width(), (float)chartImage.height()), vertices, drawTriangleCallback, &args); } // Expand chart by pixels sampled by bilinear interpolation. if (options.bilinear) - bilinearExpand(chart, &chartImage, &chartImageBilinear, chart->allowRotate ? &chartImageBilinearRotated : nullptr, boundaryEdgeGrid); + bilinearExpand(chart, &chartImage, &chartImageBilinear, options.rotateCharts ? &chartImageBilinearRotated : nullptr, boundaryEdgeGrid); // Expand chart by padding pixels (dilation). if (options.padding > 0) { // Copy into the same BitImage instances for every chart to avoid reallocating BitImage buffers (largest chart is packed first). @@ -8780,7 +8081,7 @@ struct Atlas else chartImage.copyTo(chartImagePadding); chartImagePadding.dilate(options.padding); - if (chart->allowRotate) { + if (options.rotateCharts) { if (options.bilinear) chartImageBilinearRotated.copyTo(chartImagePaddingRotated); else @@ -8815,23 +8116,25 @@ struct Atlas int best_x = 0, best_y = 0; int best_cw = 0, best_ch = 0; int best_r = 0; - for (;;) - { + for (;;) { +#if XA_DEBUG bool firstChartInBitImage = false; - XA_UNUSED(firstChartInBitImage); +#endif if (currentAtlas + 1 > m_bitImages.size()) { // Chart doesn't fit in the current bitImage, create a new one. BitImage *bi = XA_NEW_ARGS(MemTag::Default, BitImage, resolution, resolution); m_bitImages.push_back(bi); atlasSizes.push_back(Vector2i(0, 0)); +#if XA_DEBUG firstChartInBitImage = true; +#endif if (createImage) m_atlasImages.push_back(XA_NEW_ARGS(MemTag::Default, AtlasImage, resolution, resolution)); // Start positions are per-atlas, so create a new one of those too. chartStartPositions.push_back(Vector2i(0, 0)); } XA_PROFILE_START(packChartsFindLocation) - const bool foundLocation = findChartLocation(chartStartPositions[currentAtlas], options.bruteForce, m_bitImages[currentAtlas], chartImageToPack, chartImageToPackRotated, atlasSizes[currentAtlas].x, atlasSizes[currentAtlas].y, &best_x, &best_y, &best_cw, &best_ch, &best_r, options.blockAlign, maxResolution, chart->allowRotate); + const bool foundLocation = findChartLocation(options, chartStartPositions[currentAtlas], m_bitImages[currentAtlas], chartImageToPack, chartImageToPackRotated, atlasSizes[currentAtlas].x, atlasSizes[currentAtlas].y, &best_x, &best_y, &best_cw, &best_ch, &best_r, maxResolution); XA_PROFILE_END(packChartsFindLocation) XA_DEBUG_ASSERT(!(firstChartInBitImage && !foundLocation)); // Chart doesn't fit in an empty, newly allocated bitImage. Shouldn't happen, since charts are resized if they are too big to fit in the atlas. if (maxResolution == 0) { @@ -8849,8 +8152,7 @@ struct Atlas if (best_x + best_cw > atlasSizes[currentAtlas].x || best_y + best_ch > atlasSizes[currentAtlas].y) { for (uint32_t j = 0; j < chartStartPositions.size(); j++) chartStartPositions[j] = Vector2i(0, 0); - } - else { + } else { chartStartPositions[currentAtlas] = Vector2i(best_x, best_y); } } @@ -8897,7 +8199,7 @@ struct Atlas Vector2 &texcoord = chart->uniqueVertexAt(v); Vector2 t = texcoord; if (best_r) { - XA_DEBUG_ASSERT(chart->allowRotate); + XA_DEBUG_ASSERT(options.rotateCharts); swap(t.x, t.y); } texcoord.x = best_x + t.x; @@ -8938,8 +8240,7 @@ struct Atlas } if (m_utilization.size() > 1) { XA_PRINT(" %u: %f%% utilization\n", i, m_utilization[i] * 100.0f); - } - else { + } else { XA_PRINT(" %f%% utilization\n", m_utilization[i] * 100.0f); } } @@ -8958,28 +8259,22 @@ struct Atlas } private: - // IC: Brute force is slow, and random may take too much time to converge. We start inserting large charts in a small atlas. Using brute force is lame, because most of the space - // is occupied at this point. At the end we have many small charts and a large atlas with sparse holes. Finding those holes randomly is slow. A better approach would be to - // start stacking large charts as if they were tetris pieces. Once charts get small try to place them randomly. It may be interesting to try a intermediate strategy, first try - // along one axis and then try exhaustively along that axis. - bool findChartLocation(const Vector2i &startPosition, bool bruteForce, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, bool blockAligned, uint32_t maxResolution, bool allowRotate) - { + bool findChartLocation(const PackOptions &options, const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, uint32_t maxResolution) { const int attempts = 4096; - if (bruteForce || attempts >= w * h) - return findChartLocation_bruteForce(startPosition, atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, blockAligned, maxResolution, allowRotate); - return findChartLocation_random(atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, attempts, blockAligned, maxResolution, allowRotate); + if (options.bruteForce || attempts >= w * h) + return findChartLocation_bruteForce(options, startPosition, atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, maxResolution); + return findChartLocation_random(options, atlasBitImage, chartBitImage, chartBitImageRotated, w, h, best_x, best_y, best_w, best_h, best_r, attempts, maxResolution); } - bool findChartLocation_bruteForce(const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, bool blockAligned, uint32_t maxResolution, bool allowRotate) - { - const int stepSize = blockAligned ? 4 : 1; + bool findChartLocation_bruteForce(const PackOptions &options, const Vector2i &startPosition, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, uint32_t maxResolution) { + const int stepSize = options.blockAlign ? 4 : 1; int best_metric = INT_MAX; // Try two different orientations. for (int r = 0; r < 2; r++) { int cw = chartBitImage->width(); int ch = chartBitImage->height(); if (r == 1) { - if (allowRotate) + if (options.rotateCharts) swap(cw, ch); else break; @@ -9016,15 +8311,14 @@ private: return best_metric != INT_MAX; } - bool findChartLocation_random(const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, int minTrialCount, bool blockAligned, uint32_t maxResolution, bool allowRotate) - { + bool findChartLocation_random(const PackOptions &options, const BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int w, int h, int *best_x, int *best_y, int *best_w, int *best_h, int *best_r, int attempts, uint32_t maxResolution) { bool result = false; const int BLOCK_SIZE = 4; int best_metric = INT_MAX; - for (int i = 0; i < minTrialCount; i++) { + for (int i = 0; i < attempts; i++) { int cw = chartBitImage->width(); int ch = chartBitImage->height(); - int r = allowRotate ? m_rand.getRange(1) : 0; + int r = options.rotateCharts ? m_rand.getRange(1) : 0; if (r == 1) swap(cw, ch); // + 1 to extend atlas in case atlas full. We may want to use a higher number to increase probability of extending atlas. @@ -9037,7 +8331,7 @@ private: } int x = m_rand.getRange(xRange); int y = m_rand.getRange(yRange); - if (blockAligned) { + if (options.blockAlign) { x = align(x, BLOCK_SIZE); y = align(y, BLOCK_SIZE); if (maxResolution > 0 && (x > (int)maxResolution - cw || y > (int)maxResolution - ch)) @@ -9062,7 +8356,7 @@ private: *best_y = y; *best_w = cw; *best_h = ch; - *best_r = allowRotate ? r : 0; + *best_r = options.rotateCharts ? r : 0; if (area == w * h) { // Chart is completely inside, do not look at any other location. break; @@ -9072,8 +8366,7 @@ private: return result; } - void addChart(BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int atlas_w, int atlas_h, int offset_x, int offset_y, int r) - { + void addChart(BitImage *atlasBitImage, const BitImage *chartBitImage, const BitImage *chartBitImageRotated, int atlas_w, int atlas_h, int offset_x, int offset_y, int r) { XA_DEBUG_ASSERT(r == 0 || r == 1); const BitImage *image = r == 0 ? chartBitImage : chartBitImageRotated; const int w = image->width(); @@ -9096,15 +8389,14 @@ private: } } - void bilinearExpand(const Chart *chart, BitImage *source, BitImage *dest, BitImage *destRotated, UniformGrid2 &boundaryEdgeGrid) const - { + void bilinearExpand(const Chart *chart, BitImage *source, BitImage *dest, BitImage *destRotated, UniformGrid2 &boundaryEdgeGrid) const { boundaryEdgeGrid.reset(chart->vertices, chart->indices); if (chart->boundaryEdges) { const uint32_t edgeCount = chart->boundaryEdges->size(); for (uint32_t i = 0; i < edgeCount; i++) boundaryEdgeGrid.append((*chart->boundaryEdges)[i]); } else { - for (uint32_t i = 0; i < chart->indexCount; i++) + for (uint32_t i = 0; i < chart->indices.length; i++) boundaryEdgeGrid.append(i); } const int xOffsets[] = { -1, 0, 1, -1, 1, -1, 0, 1 }; @@ -9152,13 +8444,11 @@ private: } } - struct DrawTriangleCallbackArgs - { + struct DrawTriangleCallbackArgs { BitImage *chartBitImage, *chartBitImageRotated; }; - static bool drawTriangleCallback(void *param, int x, int y) - { + static bool drawTriangleCallback(void *param, int x, int y) { auto args = (DrawTriangleCallbackArgs *)param; args->chartBitImage->set(x, y); if (args->chartBitImageRotated) @@ -9180,8 +8470,14 @@ private: } // namespace pack } // namespace internal -struct Context -{ +// Used to map triangulated polygons back to polygons. +struct MeshPolygonMapping { + internal::Array<uint8_t> faceVertexCount; // Copied from MeshDecl::faceVertexCount. + internal::Array<uint32_t> triangleToPolygonMap; // Triangle index (mesh face index) to polygon index. + internal::Array<uint32_t> triangleToPolygonIndicesMap; // Triangle indices to polygon indices. +}; + +struct Context { Atlas atlas; internal::Progress *addMeshProgress = nullptr; internal::TaskGroupHandle addMeshTaskGroup; @@ -9190,20 +8486,20 @@ struct Context void *progressUserData = nullptr; internal::TaskScheduler *taskScheduler; internal::Array<internal::Mesh *> meshes; + internal::Array<MeshPolygonMapping *> meshPolygonMappings; internal::Array<internal::UvMesh *> uvMeshes; internal::Array<internal::UvMeshInstance *> uvMeshInstances; + bool uvMeshChartsComputed = false; }; -Atlas *Create() -{ +Atlas *Create() { Context *ctx = XA_NEW(internal::MemTag::Default, Context); memset(&ctx->atlas, 0, sizeof(Atlas)); ctx->taskScheduler = XA_NEW(internal::MemTag::Default, internal::TaskScheduler); return &ctx->atlas; } -static void DestroyOutputMeshes(Context *ctx) -{ +static void DestroyOutputMeshes(Context *ctx) { if (!ctx->atlas.meshes) return; for (int i = 0; i < (int)ctx->atlas.meshCount; i++) { @@ -9224,8 +8520,7 @@ static void DestroyOutputMeshes(Context *ctx) ctx->atlas.meshes = nullptr; } -void Destroy(Atlas *atlas) -{ +void Destroy(Atlas *atlas) { XA_DEBUG_ASSERT(atlas); Context *ctx = (Context *)atlas; if (atlas->utilization) @@ -9244,6 +8539,13 @@ void Destroy(Atlas *atlas) mesh->~Mesh(); XA_FREE(mesh); } + for (uint32_t i = 0; i < ctx->meshPolygonMappings.size(); i++) { + MeshPolygonMapping *mapping = ctx->meshPolygonMappings[i]; + if (mapping) { + mapping->~MeshPolygonMapping(); + XA_FREE(mapping); + } + } for (uint32_t i = 0; i < ctx->uvMeshes.size(); i++) { internal::UvMesh *mesh = ctx->uvMeshes[i]; for (uint32_t j = 0; j < mesh->charts.size(); j++) { @@ -9265,66 +8567,52 @@ void Destroy(Atlas *atlas) #endif } -struct AddMeshTaskArgs -{ - Context *ctx; - internal::Mesh *mesh; -}; - -static void runAddMeshTask(void *userData) -{ +static void runAddMeshTask(void *groupUserData, void *taskUserData) { XA_PROFILE_START(addMeshThread) - auto args = (AddMeshTaskArgs *)userData; // Responsible for freeing this. - internal::Mesh *mesh = args->mesh; - internal::Progress *progress = args->ctx->addMeshProgress; - if (progress->cancel) - goto cleanup; - { - XA_PROFILE_START(addMeshCreateColocals) - mesh->createColocals(); - XA_PROFILE_END(addMeshCreateColocals) + auto ctx = (Context *)groupUserData; + auto mesh = (internal::Mesh *)taskUserData; + internal::Progress *progress = ctx->addMeshProgress; + if (progress->cancel) { + XA_PROFILE_END(addMeshThread) + return; } - if (progress->cancel) - goto cleanup; - progress->value++; - progress->update(); -cleanup: - args->~AddMeshTaskArgs(); - XA_FREE(args); + XA_PROFILE_START(addMeshCreateColocals) + mesh->createColocals(); + XA_PROFILE_END(addMeshCreateColocals) + if (progress->cancel) { + XA_PROFILE_END(addMeshThread) + return; + } + progress->increment(1); XA_PROFILE_END(addMeshThread) } -static internal::Vector3 DecodePosition(const MeshDecl &meshDecl, uint32_t index) -{ +static internal::Vector3 DecodePosition(const MeshDecl &meshDecl, uint32_t index) { XA_DEBUG_ASSERT(meshDecl.vertexPositionData); XA_DEBUG_ASSERT(meshDecl.vertexPositionStride > 0); return *((const internal::Vector3 *)&((const uint8_t *)meshDecl.vertexPositionData)[meshDecl.vertexPositionStride * index]); } -static internal::Vector3 DecodeNormal(const MeshDecl &meshDecl, uint32_t index) -{ +static internal::Vector3 DecodeNormal(const MeshDecl &meshDecl, uint32_t index) { XA_DEBUG_ASSERT(meshDecl.vertexNormalData); XA_DEBUG_ASSERT(meshDecl.vertexNormalStride > 0); return *((const internal::Vector3 *)&((const uint8_t *)meshDecl.vertexNormalData)[meshDecl.vertexNormalStride * index]); } -static internal::Vector2 DecodeUv(const MeshDecl &meshDecl, uint32_t index) -{ +static internal::Vector2 DecodeUv(const MeshDecl &meshDecl, uint32_t index) { XA_DEBUG_ASSERT(meshDecl.vertexUvData); XA_DEBUG_ASSERT(meshDecl.vertexUvStride > 0); return *((const internal::Vector2 *)&((const uint8_t *)meshDecl.vertexUvData)[meshDecl.vertexUvStride * index]); } -static uint32_t DecodeIndex(IndexFormat::Enum format, const void *indexData, int32_t offset, uint32_t i) -{ +static uint32_t DecodeIndex(IndexFormat format, const void *indexData, int32_t offset, uint32_t i) { XA_DEBUG_ASSERT(indexData); if (format == IndexFormat::UInt16) return uint16_t((int32_t)((const uint16_t *)indexData)[i] + offset); return uint32_t((int32_t)((const uint32_t *)indexData)[i] + offset); } -AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint) -{ +AddMeshError AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint) { XA_DEBUG_ASSERT(atlas); if (!atlas) { XA_PRINT_WARNING("AddMesh: atlas is null.\n"); @@ -9337,33 +8625,36 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh } #if XA_PROFILE if (ctx->meshes.isEmpty()) - internal::s_profile.addMeshReal = clock(); + internal::s_profile.addMeshRealStart = std::chrono::high_resolution_clock::now(); #endif // Don't know how many times AddMesh will be called, so progress needs to adjusted each time. if (!ctx->addMeshProgress) { ctx->addMeshProgress = XA_NEW_ARGS(internal::MemTag::Default, internal::Progress, ProgressCategory::AddMesh, ctx->progressFunc, ctx->progressUserData, 1); - } - else { + } else { ctx->addMeshProgress->setMaxValue(internal::max(ctx->meshes.size() + 1, meshCountHint)); } XA_PROFILE_START(addMeshCopyData) const bool hasIndices = meshDecl.indexCount > 0; const uint32_t indexCount = hasIndices ? meshDecl.indexCount : meshDecl.vertexCount; - XA_PRINT("Adding mesh %d: %u vertices, %u triangles\n", ctx->meshes.size(), meshDecl.vertexCount, indexCount / 3); - // Expecting triangle faces. - if ((indexCount % 3) != 0) - return AddMeshError::InvalidIndexCount; - if (hasIndices) { - // Check if any index is out of range. - for (uint32_t i = 0; i < indexCount; i++) { - const uint32_t index = DecodeIndex(meshDecl.indexFormat, meshDecl.indexData, meshDecl.indexOffset, i); - if (index >= meshDecl.vertexCount) - return AddMeshError::IndexOutOfRange; + uint32_t faceCount = indexCount / 3; + if (meshDecl.faceVertexCount) { + faceCount = meshDecl.faceCount; + XA_PRINT("Adding mesh %d: %u vertices, %u polygons\n", ctx->meshes.size(), meshDecl.vertexCount, faceCount); + for (uint32_t f = 0; f < faceCount; f++) { + if (meshDecl.faceVertexCount[f] < 3) + return AddMeshError::InvalidFaceVertexCount; } + } else { + XA_PRINT("Adding mesh %d: %u vertices, %u triangles\n", ctx->meshes.size(), meshDecl.vertexCount, faceCount); + // Expecting triangle faces unless otherwise specified. + if ((indexCount % 3) != 0) + return AddMeshError::InvalidIndexCount; } uint32_t meshFlags = internal::MeshFlags::HasIgnoredFaces; if (meshDecl.vertexNormalData) meshFlags |= internal::MeshFlags::HasNormals; + if (meshDecl.faceMaterialData) + meshFlags |= internal::MeshFlags::HasMaterials; internal::Mesh *mesh = XA_NEW_ARGS(internal::MemTag::Mesh, internal::Mesh, meshDecl.epsilon, meshDecl.vertexCount, indexCount / 3, meshFlags, ctx->meshes.size()); for (uint32_t i = 0; i < meshDecl.vertexCount; i++) { internal::Vector3 normal(0.0f); @@ -9374,17 +8665,42 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh texcoord = DecodeUv(meshDecl, i); mesh->addVertex(DecodePosition(meshDecl, i), normal, texcoord); } + MeshPolygonMapping *meshPolygonMapping = nullptr; + if (meshDecl.faceVertexCount) { + meshPolygonMapping = XA_NEW(internal::MemTag::Default, MeshPolygonMapping); + // Copy MeshDecl::faceVertexCount so it can be used later when building output meshes. + meshPolygonMapping->faceVertexCount.copyFrom(meshDecl.faceVertexCount, meshDecl.faceCount); + // There should be at least as many triangles as polygons. + meshPolygonMapping->triangleToPolygonMap.reserve(meshDecl.faceCount); + meshPolygonMapping->triangleToPolygonIndicesMap.reserve(meshDecl.indexCount); + } const uint32_t kMaxWarnings = 50; uint32_t warningCount = 0; - for (uint32_t i = 0; i < indexCount / 3; i++) { - uint32_t tri[3]; - for (int j = 0; j < 3; j++) - tri[j] = hasIndices ? DecodeIndex(meshDecl.indexFormat, meshDecl.indexData, meshDecl.indexOffset, i * 3 + j) : i * 3 + j; + internal::Array<uint32_t> triIndices; + uint32_t firstFaceIndex = 0; + internal::Triangulator triangulator; + for (uint32_t face = 0; face < faceCount; face++) { + // Decode face indices. + const uint32_t faceVertexCount = meshDecl.faceVertexCount ? (uint32_t)meshDecl.faceVertexCount[face] : 3; + uint32_t polygon[UINT8_MAX]; + for (uint32_t i = 0; i < faceVertexCount; i++) { + if (hasIndices) { + polygon[i] = DecodeIndex(meshDecl.indexFormat, meshDecl.indexData, meshDecl.indexOffset, face * faceVertexCount + i); + // Check if any index is out of range. + if (polygon[i] >= meshDecl.vertexCount) { + mesh->~Mesh(); + XA_FREE(mesh); + return AddMeshError::IndexOutOfRange; + } + } else { + polygon[i] = face * faceVertexCount + i; + } + } + // Ignore faces with degenerate or zero length edges. bool ignore = false; - // Check for degenerate or zero length edges. - for (int j = 0; j < 3; j++) { - const uint32_t index1 = tri[j]; - const uint32_t index2 = tri[(j + 1) % 3]; + for (uint32_t i = 0; i < faceVertexCount; i++) { + const uint32_t index1 = polygon[i]; + const uint32_t index2 = polygon[(i + 1) % 3]; if (index1 == index2) { ignore = true; if (++warningCount <= kMaxWarnings) @@ -9402,119 +8718,136 @@ AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t mesh } // Ignore faces with any nan vertex attributes. if (!ignore) { - for (int j = 0; j < 3; j++) { - const internal::Vector3 &pos = mesh->position(tri[j]); + for (uint32_t i = 0; i < faceVertexCount; i++) { + const internal::Vector3 &pos = mesh->position(polygon[i]); if (internal::isNan(pos.x) || internal::isNan(pos.y) || internal::isNan(pos.z)) { if (++warningCount <= kMaxWarnings) - XA_PRINT(" NAN position in face: %d\n", i); + XA_PRINT(" NAN position in face: %d\n", face); ignore = true; break; } if (meshDecl.vertexNormalData) { - const internal::Vector3 &normal = mesh->normal(tri[j]); + const internal::Vector3 &normal = mesh->normal(polygon[i]); if (internal::isNan(normal.x) || internal::isNan(normal.y) || internal::isNan(normal.z)) { if (++warningCount <= kMaxWarnings) - XA_PRINT(" NAN normal in face: %d\n", i); + XA_PRINT(" NAN normal in face: %d\n", face); ignore = true; break; } } if (meshDecl.vertexUvData) { - const internal::Vector2 &uv = mesh->texcoord(tri[j]); + const internal::Vector2 &uv = mesh->texcoord(polygon[i]); if (internal::isNan(uv.x) || internal::isNan(uv.y)) { if (++warningCount <= kMaxWarnings) - XA_PRINT(" NAN texture coordinate in face: %d\n", i); + XA_PRINT(" NAN texture coordinate in face: %d\n", face); ignore = true; break; } } } } - const internal::Vector3 &a = mesh->position(tri[0]); - const internal::Vector3 &b = mesh->position(tri[1]); - const internal::Vector3 &c = mesh->position(tri[2]); - // Check for zero area faces. - float area = 0.0f; - if (!ignore) { - area = internal::length(internal::cross(b - a, c - a)) * 0.5f; - if (area <= internal::kAreaEpsilon) { - ignore = true; - if (++warningCount <= kMaxWarnings) - XA_PRINT(" Zero area face: %d, indices (%d %d %d), area is %f\n", i, tri[0], tri[1], tri[2], area); - } + // Triangulate if necessary. + triIndices.clear(); + if (faceVertexCount == 3) { + triIndices.push_back(polygon[0]); + triIndices.push_back(polygon[1]); + triIndices.push_back(polygon[2]); + } else { + triangulator.triangulatePolygon(mesh->positions(), internal::ConstArrayView<uint32_t>(polygon, faceVertexCount), triIndices); } + // Check for zero area faces. if (!ignore) { - if (internal::equal(a, b, meshDecl.epsilon) || internal::equal(a, c, meshDecl.epsilon) || internal::equal(b, c, meshDecl.epsilon)) { - ignore = true; - if (++warningCount <= kMaxWarnings) - XA_PRINT(" Degenerate face: %d, area is %f\n", i, area); + for (uint32_t i = 0; i < triIndices.size(); i += 3) { + const internal::Vector3 &a = mesh->position(triIndices[i + 0]); + const internal::Vector3 &b = mesh->position(triIndices[i + 1]); + const internal::Vector3 &c = mesh->position(triIndices[i + 2]); + const float area = internal::length(internal::cross(b - a, c - a)) * 0.5f; + if (area <= internal::kAreaEpsilon) { + ignore = true; + if (++warningCount <= kMaxWarnings) + XA_PRINT(" Zero area face: %d, area is %f\n", face, area); + break; + } } } - if (meshDecl.faceIgnoreData && meshDecl.faceIgnoreData[i]) + // User face ignore. + if (meshDecl.faceIgnoreData && meshDecl.faceIgnoreData[face]) ignore = true; - mesh->addFace(tri[0], tri[1], tri[2], ignore); + // User material. + uint32_t material = UINT32_MAX; + if (meshDecl.faceMaterialData) + material = meshDecl.faceMaterialData[face]; + // Add the face(s). + for (uint32_t i = 0; i < triIndices.size(); i += 3) { + mesh->addFace(&triIndices[i], ignore, material); + if (meshPolygonMapping) + meshPolygonMapping->triangleToPolygonMap.push_back(face); + } + if (meshPolygonMapping) { + for (uint32_t i = 0; i < triIndices.size(); i++) + meshPolygonMapping->triangleToPolygonIndicesMap.push_back(triIndices[i]); + } + firstFaceIndex += faceVertexCount; } if (warningCount > kMaxWarnings) XA_PRINT(" %u additional warnings truncated\n", warningCount - kMaxWarnings); XA_PROFILE_END(addMeshCopyData) ctx->meshes.push_back(mesh); + ctx->meshPolygonMappings.push_back(meshPolygonMapping); ctx->paramAtlas.addMesh(mesh); if (ctx->addMeshTaskGroup.value == UINT32_MAX) - ctx->addMeshTaskGroup = ctx->taskScheduler->createTaskGroup(); - AddMeshTaskArgs *taskArgs = XA_NEW(internal::MemTag::Default, AddMeshTaskArgs); // The task frees this. - taskArgs->ctx = ctx; - taskArgs->mesh = mesh; + ctx->addMeshTaskGroup = ctx->taskScheduler->createTaskGroup(ctx); internal::Task task; - task.userData = taskArgs; + task.userData = mesh; task.func = runAddMeshTask; ctx->taskScheduler->run(ctx->addMeshTaskGroup, task); return AddMeshError::Success; } -void AddMeshJoin(Atlas *atlas) -{ +void AddMeshJoin(Atlas *atlas) { XA_DEBUG_ASSERT(atlas); if (!atlas) { XA_PRINT_WARNING("AddMeshJoin: atlas is null.\n"); return; } Context *ctx = (Context *)atlas; - if (!ctx->addMeshProgress) - return; - ctx->taskScheduler->wait(&ctx->addMeshTaskGroup); - ctx->addMeshProgress->~Progress(); - XA_FREE(ctx->addMeshProgress); - ctx->addMeshProgress = nullptr; + if (!ctx->uvMeshes.isEmpty()) { #if XA_PROFILE - XA_PRINT("Added %u meshes\n", ctx->meshes.size()); - internal::s_profile.addMeshReal = clock() - internal::s_profile.addMeshReal; + XA_PRINT("Added %u UV meshes\n", ctx->uvMeshes.size()); + internal::s_profile.addMeshReal = uint64_t(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - internal::s_profile.addMeshRealStart).count()); #endif - XA_PROFILE_PRINT_AND_RESET(" Total (real): ", addMeshReal) - XA_PROFILE_PRINT_AND_RESET(" Copy data: ", addMeshCopyData) - XA_PROFILE_PRINT_AND_RESET(" Total (thread): ", addMeshThread) - XA_PROFILE_PRINT_AND_RESET(" Create colocals: ", addMeshCreateColocals) + XA_PROFILE_PRINT_AND_RESET(" Total: ", addMeshReal) + XA_PROFILE_PRINT_AND_RESET(" Copy data: ", addMeshCopyData) #if XA_PROFILE_ALLOC - XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc) + XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc) #endif - XA_PRINT_MEM_USAGE + XA_PRINT_MEM_USAGE + } else { + if (!ctx->addMeshProgress) + return; + ctx->taskScheduler->wait(&ctx->addMeshTaskGroup); + ctx->addMeshProgress->~Progress(); + XA_FREE(ctx->addMeshProgress); + ctx->addMeshProgress = nullptr; +#if XA_PROFILE + XA_PRINT("Added %u meshes\n", ctx->meshes.size()); + internal::s_profile.addMeshReal = uint64_t(std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - internal::s_profile.addMeshRealStart).count()); +#endif + XA_PROFILE_PRINT_AND_RESET(" Total (real): ", addMeshReal) + XA_PROFILE_PRINT_AND_RESET(" Copy data: ", addMeshCopyData) + XA_PROFILE_PRINT_AND_RESET(" Total (thread): ", addMeshThread) + XA_PROFILE_PRINT_AND_RESET(" Create colocals: ", addMeshCreateColocals) +#if XA_PROFILE_ALLOC + XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc) +#endif + XA_PRINT_MEM_USAGE #if XA_DEBUG_EXPORT_OBJ_FACE_GROUPS - internal::param::s_faceGroupsCurrentVertex = 0; + internal::param::s_faceGroupsCurrentVertex = 0; #endif + } } -struct EdgeKey -{ - EdgeKey() {} - EdgeKey(const EdgeKey &k) : v0(k.v0), v1(k.v1) {} - EdgeKey(uint32_t v0, uint32_t v1) : v0(v0), v1(v1) {} - bool operator==(const EdgeKey &k) const { return v0 == k.v0 && v1 == k.v1; } - - uint32_t v0; - uint32_t v1; -}; - -AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl) -{ +AddMeshError AddUvMesh(Atlas *atlas, const UvMeshDecl &decl) { XA_DEBUG_ASSERT(atlas); if (!atlas) { XA_PRINT_WARNING("AddUvMesh: atlas is null.\n"); @@ -9525,13 +8858,18 @@ AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl) XA_PRINT_WARNING("AddUvMesh: Meshes and UV meshes cannot be added to the same atlas.\n"); return AddMeshError::Error; } - const bool decoded = (decl.indexCount <= 0); - const uint32_t indexCount = decoded ? decl.vertexCount : decl.indexCount; +#if XA_PROFILE + if (ctx->uvMeshInstances.isEmpty()) + internal::s_profile.addMeshRealStart = std::chrono::high_resolution_clock::now(); +#endif + XA_PROFILE_START(addMeshCopyData) + const bool hasIndices = decl.indexCount > 0; + const uint32_t indexCount = hasIndices ? decl.indexCount : decl.vertexCount; XA_PRINT("Adding UV mesh %d: %u vertices, %u triangles\n", ctx->uvMeshes.size(), decl.vertexCount, indexCount / 3); // Expecting triangle faces. if ((indexCount % 3) != 0) return AddMeshError::InvalidIndexCount; - if (!decoded) { + if (hasIndices) { // Check if any index is out of range. for (uint32_t i = 0; i < indexCount; i++) { const uint32_t index = DecodeIndex(decl.indexFormat, decl.indexData, decl.indexOffset, i); @@ -9539,319 +8877,266 @@ AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl) return AddMeshError::IndexOutOfRange; } } + // Create a mesh instance. internal::UvMeshInstance *meshInstance = XA_NEW(internal::MemTag::Default, internal::UvMeshInstance); - meshInstance->texcoords.resize(decl.vertexCount); - for (uint32_t i = 0; i < decl.vertexCount; i++) { - internal::Vector2 texcoord = *((const internal::Vector2 *)&((const uint8_t *)decl.vertexUvData)[decl.vertexStride * i]); - // Set nan values to 0. - if (internal::isNan(texcoord.x) || internal::isNan(texcoord.y)) - texcoord.x = texcoord.y = 0.0f; - meshInstance->texcoords[i] = texcoord; - } - meshInstance->rotateCharts = decl.rotateCharts; + meshInstance->mesh = nullptr; + ctx->uvMeshInstances.push_back(meshInstance); // See if this is an instance of an already existing mesh. internal::UvMesh *mesh = nullptr; for (uint32_t m = 0; m < ctx->uvMeshes.size(); m++) { if (memcmp(&ctx->uvMeshes[m]->decl, &decl, sizeof(UvMeshDecl)) == 0) { - meshInstance->mesh = mesh = ctx->uvMeshes[m]; + mesh = ctx->uvMeshes[m]; + XA_PRINT(" instance of a previous UV mesh\n"); break; } } if (!mesh) { // Copy geometry to mesh. - meshInstance->mesh = mesh = XA_NEW(internal::MemTag::Default, internal::UvMesh); + mesh = XA_NEW(internal::MemTag::Default, internal::UvMesh); + ctx->uvMeshes.push_back(mesh); mesh->decl = decl; + if (decl.faceMaterialData) { + mesh->faceMaterials.resize(decl.indexCount / 3); + memcpy(mesh->faceMaterials.data(), decl.faceMaterialData, mesh->faceMaterials.size() * sizeof(uint32_t)); + } mesh->indices.resize(decl.indexCount); for (uint32_t i = 0; i < indexCount; i++) - mesh->indices[i] = decoded ? i : DecodeIndex(decl.indexFormat, decl.indexData, decl.indexOffset, i); - mesh->vertexToChartMap.resize(decl.vertexCount); - for (uint32_t i = 0; i < mesh->vertexToChartMap.size(); i++) - mesh->vertexToChartMap[i] = UINT32_MAX; - // Calculate charts (incident faces). - internal::HashMap<internal::Vector2> vertexToFaceMap(internal::MemTag::Default, indexCount); // Face is index / 3 - const uint32_t faceCount = indexCount / 3; - for (uint32_t i = 0; i < indexCount; i++) - vertexToFaceMap.add(meshInstance->texcoords[mesh->indices[i]]); - internal::BitArray faceAssigned(faceCount); - faceAssigned.zeroOutMemory(); - for (uint32_t f = 0; f < faceCount; f++) { - if (faceAssigned.get(f)) - continue; - // Found an unassigned face, create a new chart. - internal::UvMeshChart *chart = XA_NEW(internal::MemTag::Default, internal::UvMeshChart); - chart->material = decl.faceMaterialData ? decl.faceMaterialData[f] : 0; - // Walk incident faces and assign them to the chart. - faceAssigned.set(f); - chart->faces.push_back(f); - for (;;) { - bool newFaceAssigned = false; - const uint32_t faceCount2 = chart->faces.size(); - for (uint32_t f2 = 0; f2 < faceCount2; f2++) { - const uint32_t face = chart->faces[f2]; - for (uint32_t i = 0; i < 3; i++) { - const internal::Vector2 &texcoord = meshInstance->texcoords[meshInstance->mesh->indices[face * 3 + i]]; - uint32_t mapIndex = vertexToFaceMap.get(texcoord); - while (mapIndex != UINT32_MAX) { - const uint32_t face2 = mapIndex / 3; // 3 vertices added per face. - // Materials must match. - if (!faceAssigned.get(face2) && (!decl.faceMaterialData || decl.faceMaterialData[face] == decl.faceMaterialData[face2])) { - faceAssigned.set(face2); - chart->faces.push_back(face2); - newFaceAssigned = true; - } - mapIndex = vertexToFaceMap.getNext(mapIndex); - } - } - } - if (!newFaceAssigned) + mesh->indices[i] = hasIndices ? DecodeIndex(decl.indexFormat, decl.indexData, decl.indexOffset, i) : i; + mesh->texcoords.resize(decl.vertexCount); + for (uint32_t i = 0; i < decl.vertexCount; i++) + mesh->texcoords[i] = *((const internal::Vector2 *)&((const uint8_t *)decl.vertexUvData)[decl.vertexStride * i]); + // Validate. + mesh->faceIgnore.resize(decl.indexCount / 3); + mesh->faceIgnore.zeroOutMemory(); + const uint32_t kMaxWarnings = 50; + uint32_t warningCount = 0; + for (uint32_t f = 0; f < indexCount / 3; f++) { + bool ignore = false; + uint32_t tri[3]; + for (uint32_t i = 0; i < 3; i++) + tri[i] = mesh->indices[f * 3 + i]; + // Check for nan UVs. + for (uint32_t i = 0; i < 3; i++) { + const uint32_t vertex = tri[i]; + if (internal::isNan(mesh->texcoords[vertex].x) || internal::isNan(mesh->texcoords[vertex].y)) { + ignore = true; + if (++warningCount <= kMaxWarnings) + XA_PRINT(" NAN texture coordinate in vertex %u\n", vertex); break; + } } - for (uint32_t i = 0; i < chart->faces.size(); i++) { - for (uint32_t j = 0; j < 3; j++) { - const uint32_t vertex = meshInstance->mesh->indices[chart->faces[i] * 3 + j]; - chart->indices.push_back(vertex); - mesh->vertexToChartMap[vertex] = mesh->charts.size(); + // Check for zero area faces. + if (!ignore) { + const internal::Vector2 &v1 = mesh->texcoords[tri[0]]; + const internal::Vector2 &v2 = mesh->texcoords[tri[1]]; + const internal::Vector2 &v3 = mesh->texcoords[tri[2]]; + const float area = fabsf(((v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y)) * 0.5f); + if (area <= internal::kAreaEpsilon) { + ignore = true; + if (++warningCount <= kMaxWarnings) + XA_PRINT(" Zero area face: %d, indices (%d %d %d), area is %f\n", f, tri[0], tri[1], tri[2], area); } } - mesh->charts.push_back(chart); + if (ignore) + mesh->faceIgnore.set(f); } - ctx->uvMeshes.push_back(mesh); - } else { - XA_PRINT(" instance of a previous UV mesh\n"); + if (warningCount > kMaxWarnings) + XA_PRINT(" %u additional warnings truncated\n", warningCount - kMaxWarnings); } - XA_PRINT(" %u charts\n", meshInstance->mesh->charts.size()); - ctx->uvMeshInstances.push_back(meshInstance); + meshInstance->mesh = mesh; + XA_PROFILE_END(addMeshCopyData) return AddMeshError::Success; } -void ComputeCharts(Atlas *atlas, ChartOptions options) -{ +void ComputeCharts(Atlas *atlas, ChartOptions options) { if (!atlas) { XA_PRINT_WARNING("ComputeCharts: atlas is null.\n"); return; } Context *ctx = (Context *)atlas; - if (!ctx->uvMeshInstances.isEmpty()) { - XA_PRINT_WARNING("ComputeCharts: This function should not be called with UV meshes.\n"); - return; - } AddMeshJoin(atlas); - if (ctx->meshes.isEmpty()) { - XA_PRINT_WARNING("ComputeCharts: No meshes. Call AddMesh first.\n"); - return; - } - XA_PRINT("Computing charts\n"); - XA_PROFILE_START(computeChartsReal) - if (!ctx->paramAtlas.computeCharts(ctx->taskScheduler, options, ctx->progressFunc, ctx->progressUserData)) { - XA_PRINT(" Cancelled by user\n"); - return; - } - XA_PROFILE_END(computeChartsReal) - // Count charts. - uint32_t chartCount = 0; - const uint32_t meshCount = ctx->meshes.size(); - for (uint32_t i = 0; i < meshCount; i++) { - for (uint32_t j = 0; j < ctx->paramAtlas.chartGroupCount(i); j++) { - const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, j); - chartCount += chartGroup->segmentChartCount(); - } - } - XA_PRINT(" %u charts\n", chartCount); -#if XA_PROFILE - XA_PRINT(" Chart groups\n"); - uint32_t chartGroupCount = 0; - for (uint32_t i = 0; i < meshCount; i++) { - XA_PRINT(" Mesh %u: %u chart groups\n", i, ctx->paramAtlas.chartGroupCount(i)); - chartGroupCount += ctx->paramAtlas.chartGroupCount(i); - } - XA_PRINT(" %u total\n", chartGroupCount); -#endif - XA_PROFILE_PRINT_AND_RESET(" Total (real): ", computeChartsReal) - XA_PROFILE_PRINT_AND_RESET(" Total (thread): ", computeChartsThread) - XA_PROFILE_PRINT_AND_RESET(" Create face groups: ", createFaceGroups) - XA_PROFILE_PRINT_AND_RESET(" Extract invalid mesh geometry: ", extractInvalidMeshGeometry) - XA_PROFILE_PRINT_AND_RESET(" Chart group compute charts (real): ", chartGroupComputeChartsReal) - XA_PROFILE_PRINT_AND_RESET(" Chart group compute charts (thread): ", chartGroupComputeChartsThread) - XA_PROFILE_PRINT_AND_RESET(" Create chart group mesh: ", createChartGroupMesh) - XA_PROFILE_PRINT_AND_RESET(" Create colocals: ", createChartGroupMeshColocals) - XA_PROFILE_PRINT_AND_RESET(" Create boundaries: ", createChartGroupMeshBoundaries) - XA_PROFILE_PRINT_AND_RESET(" Build atlas: ", buildAtlas) - XA_PROFILE_PRINT_AND_RESET(" Init: ", buildAtlasInit) - XA_PROFILE_PRINT_AND_RESET(" Planar charts: ", planarCharts) - XA_PROFILE_PRINT_AND_RESET(" Clustered charts: ", clusteredCharts) - XA_PROFILE_PRINT_AND_RESET(" Place seeds: ", clusteredChartsPlaceSeeds) - XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", clusteredChartsPlaceSeedsBoundaryIntersection) - XA_PROFILE_PRINT_AND_RESET(" Relocate seeds: ", clusteredChartsRelocateSeeds) - XA_PROFILE_PRINT_AND_RESET(" Reset: ", clusteredChartsReset) - XA_PROFILE_PRINT_AND_RESET(" Grow: ", clusteredChartsGrow) - XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", clusteredChartsGrowBoundaryIntersection) - XA_PROFILE_PRINT_AND_RESET(" Merge: ", clusteredChartsMerge) - XA_PROFILE_PRINT_AND_RESET(" Fill holes: ", clusteredChartsFillHoles) - XA_PROFILE_PRINT_AND_RESET(" Copy chart faces: ", copyChartFaces) -#if XA_PROFILE_ALLOC - XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc) -#endif - XA_PRINT_MEM_USAGE -} - -void ParameterizeCharts(Atlas *atlas, ParameterizeOptions options) -{ - if (!atlas) { - XA_PRINT_WARNING("ParameterizeCharts: atlas is null.\n"); - return; - } - Context *ctx = (Context *)atlas; - if (!ctx->uvMeshInstances.isEmpty()) { - XA_PRINT_WARNING("ParameterizeCharts: This function should not be called with UV meshes.\n"); - return; - } - if (!ctx->paramAtlas.chartsComputed()) { - XA_PRINT_WARNING("ParameterizeCharts: ComputeCharts must be called first.\n"); + if (ctx->meshes.isEmpty() && ctx->uvMeshInstances.isEmpty()) { + XA_PRINT_WARNING("ComputeCharts: No meshes. Call AddMesh or AddUvMesh first.\n"); return; } - atlas->atlasCount = 0; - atlas->height = 0; - atlas->texelsPerUnit = 0; - atlas->width = 0; - if (atlas->utilization) { + // Reset atlas state. This function may be called multiple times, or again after PackCharts. + if (atlas->utilization) XA_FREE(atlas->utilization); - atlas->utilization = nullptr; - } - if (atlas->image) { + if (atlas->image) XA_FREE(atlas->image); - atlas->image = nullptr; - } DestroyOutputMeshes(ctx); - XA_PRINT("Parameterizing charts\n"); - XA_PROFILE_START(parameterizeChartsReal) - if (!ctx->paramAtlas.parameterizeCharts(ctx->taskScheduler, options, ctx->progressFunc, ctx->progressUserData)) { - XA_PRINT(" Cancelled by user\n"); + memset(&ctx->atlas, 0, sizeof(Atlas)); + XA_PRINT("Computing charts\n"); + if (!ctx->meshes.isEmpty()) { + if (!ctx->paramAtlas.computeCharts(ctx->taskScheduler, options, ctx->progressFunc, ctx->progressUserData)) { + XA_PRINT(" Cancelled by user\n"); return; - } - XA_PROFILE_END(parameterizeChartsReal) - const uint32_t meshCount = ctx->meshes.size(); - uint32_t chartCount = 0, chartsWithHolesCount = 0, holesCount = 0, chartsWithTJunctionsCount = 0, tJunctionsCount = 0, orthoChartsCount = 0, planarChartsCount = 0, lscmChartsCount = 0, piecewiseChartsCount = 0, chartsAddedCount = 0, chartsDeletedCount = 0; - for (uint32_t i = 0; i < meshCount; i++) { - for (uint32_t j = 0; j < ctx->paramAtlas.chartGroupCount(i); j++) { - const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, j); - for (uint32_t k = 0; k < chartGroup->chartCount(); k++) { - const internal::param::Chart *chart = chartGroup->chartAt(k); -#if XA_PRINT_CHART_WARNINGS - if (chart->warningFlags() & internal::param::ChartWarningFlags::CloseHolesFailed) - XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u): failed to close holes\n", chartCount, i, j, k); - if (chart->warningFlags() & internal::param::ChartWarningFlags::FixTJunctionsDuplicatedEdge) - XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u): fixing t-junctions created non-manifold geometry\n", chartCount, i, j, k); - if (chart->warningFlags() & internal::param::ChartWarningFlags::FixTJunctionsFailed) - XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u): fixing t-junctions failed\n", chartCount, i, j, k); - if (chart->warningFlags() & internal::param::ChartWarningFlags::TriangulateDuplicatedEdge) - XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u): triangulation created non-manifold geometry\n", chartCount, i, j, k); -#endif - holesCount += chart->closedHolesCount(); - if (chart->closedHolesCount() > 0) - chartsWithHolesCount++; - tJunctionsCount += chart->fixedTJunctionsCount(); - if (chart->fixedTJunctionsCount() > 0) - chartsWithTJunctionsCount++; - if (chart->type() == ChartType::Planar) - planarChartsCount++; - else if (chart->type() == ChartType::Ortho) - orthoChartsCount++; - else if (chart->type() == ChartType::LSCM) - lscmChartsCount++; - else if (chart->type() == ChartType::Piecewise) - piecewiseChartsCount++; - } - chartCount += chartGroup->chartCount(); - chartsAddedCount += chartGroup->paramAddedChartsCount(); - chartsDeletedCount += chartGroup->paramDeletedChartsCount(); - } - } - if (holesCount > 0) - XA_PRINT(" %u holes closed in %u charts\n", holesCount, chartsWithHolesCount); - if (tJunctionsCount > 0) - XA_PRINT(" %u t-junctions fixed in %u charts\n", tJunctionsCount, chartsWithTJunctionsCount); - XA_PRINT(" %u planar charts, %u ortho charts, %u LSCM charts, %u piecewise charts\n", planarChartsCount, orthoChartsCount, lscmChartsCount, piecewiseChartsCount); - if (chartsDeletedCount > 0) { - XA_PRINT(" %u charts with invalid parameterizations replaced with %u new charts\n", chartsDeletedCount, chartsAddedCount); + } + uint32_t chartsWithTJunctionsCount = 0, tJunctionCount = 0, orthoChartsCount = 0, planarChartsCount = 0, lscmChartsCount = 0, piecewiseChartsCount = 0, originalUvChartsCount = 0; + uint32_t chartCount = 0; + const uint32_t meshCount = ctx->meshes.size(); + for (uint32_t i = 0; i < meshCount; i++) { + for (uint32_t j = 0; j < ctx->paramAtlas.chartGroupCount(i); j++) { + const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, j); + for (uint32_t k = 0; k < chartGroup->chartCount(); k++) { + const internal::param::Chart *chart = chartGroup->chartAt(k); + tJunctionCount += chart->tjunctionCount(); + if (chart->tjunctionCount() > 0) + chartsWithTJunctionsCount++; + if (chart->type() == ChartType::Planar) + planarChartsCount++; + else if (chart->type() == ChartType::Ortho) + orthoChartsCount++; + else if (chart->type() == ChartType::LSCM) + lscmChartsCount++; + else if (chart->type() == ChartType::Piecewise) + piecewiseChartsCount++; + if (chart->generatorType() == internal::segment::ChartGeneratorType::OriginalUv) + originalUvChartsCount++; + } + chartCount += chartGroup->chartCount(); + } + } + if (tJunctionCount > 0) + XA_PRINT(" %u t-junctions found in %u charts\n", tJunctionCount, chartsWithTJunctionsCount); XA_PRINT(" %u charts\n", chartCount); - } - uint32_t chartIndex = 0, invalidParamCount = 0; - for (uint32_t i = 0; i < meshCount; i++) { - for (uint32_t j = 0; j < ctx->paramAtlas.chartGroupCount(i); j++) { - const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, j); - for (uint32_t k = 0; k < chartGroup->chartCount(); k++) { - internal::param::Chart *chart = chartGroup->chartAt(k); - const internal::param::Quality &quality = chart->quality(); + XA_PRINT(" %u planar, %u ortho, %u LSCM, %u piecewise\n", planarChartsCount, orthoChartsCount, lscmChartsCount, piecewiseChartsCount); + if (originalUvChartsCount > 0) + XA_PRINT(" %u with original UVs\n", originalUvChartsCount); + uint32_t chartIndex = 0, invalidParamCount = 0; + for (uint32_t i = 0; i < meshCount; i++) { + for (uint32_t j = 0; j < ctx->paramAtlas.chartGroupCount(i); j++) { + const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, j); + for (uint32_t k = 0; k < chartGroup->chartCount(); k++) { + internal::param::Chart *chart = chartGroup->chartAt(k); + const internal::param::Quality &quality = chart->quality(); #if XA_DEBUG_EXPORT_OBJ_CHARTS_AFTER_PARAMETERIZATION - { - char filename[256]; - XA_SPRINTF(filename, sizeof(filename), "debug_chart_%03u_after_parameterization.obj", chartIndex); - chart->unifiedMesh()->writeObjFile(filename); - } -#endif - const char *type = "LSCM"; - if (chart->type() == ChartType::Planar) - type = "planar"; - else if (chart->type() == ChartType::Ortho) - type = "ortho"; - else if (chart->type() == ChartType::Piecewise) - type = "piecewise"; - if (chart->isInvalid()) { - if (quality.boundaryIntersection) { - XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u) (%s): invalid parameterization, self-intersecting boundary.\n", chartIndex, i, j, k, type); - } - if (quality.flippedTriangleCount > 0) { - XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u) (%s): invalid parameterization, %u / %u flipped triangles.\n", chartIndex, i, j, k, type, quality.flippedTriangleCount, quality.totalTriangleCount); + { + char filename[256]; + XA_SPRINTF(filename, sizeof(filename), "debug_chart_%03u_after_parameterization.obj", chartIndex); + chart->unifiedMesh()->writeObjFile(filename); } - invalidParamCount++; +#endif + const char *type = "LSCM"; + if (chart->type() == ChartType::Planar) + type = "planar"; + else if (chart->type() == ChartType::Ortho) + type = "ortho"; + else if (chart->type() == ChartType::Piecewise) + type = "piecewise"; + if (chart->isInvalid()) { + if (quality.boundaryIntersection) { + XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u) (%s): invalid parameterization, self-intersecting boundary.\n", chartIndex, i, j, k, type); + } + if (quality.flippedTriangleCount > 0) { + XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u) (%s): invalid parameterization, %u / %u flipped triangles.\n", chartIndex, i, j, k, type, quality.flippedTriangleCount, quality.totalTriangleCount); + } + if (quality.zeroAreaTriangleCount > 0) { + XA_PRINT_WARNING(" Chart %u (mesh %u, group %u, id %u) (%s): invalid parameterization, %u / %u zero area triangles.\n", chartIndex, i, j, k, type, quality.zeroAreaTriangleCount, quality.totalTriangleCount); + } + invalidParamCount++; #if XA_DEBUG_EXPORT_OBJ_INVALID_PARAMETERIZATION - char filename[256]; - XA_SPRINTF(filename, sizeof(filename), "debug_chart_%03u_invalid_parameterization.obj", chartIndex); - const internal::Mesh *mesh = chart->unifiedMesh(); - FILE *file; - XA_FOPEN(file, filename, "w"); - if (file) { - mesh->writeObjVertices(file); - fprintf(file, "s off\n"); - fprintf(file, "o object\n"); - for (uint32_t f = 0; f < mesh->faceCount(); f++) - mesh->writeObjFace(file, f); - if (!chart->paramFlippedFaces().isEmpty()) { - fprintf(file, "o flipped_faces\n"); - for (uint32_t f = 0; f < chart->paramFlippedFaces().size(); f++) - mesh->writeObjFace(file, chart->paramFlippedFaces()[f]); + char filename[256]; + XA_SPRINTF(filename, sizeof(filename), "debug_chart_%03u_invalid_parameterization.obj", chartIndex); + const internal::Mesh *mesh = chart->unifiedMesh(); + FILE *file; + XA_FOPEN(file, filename, "w"); + if (file) { + mesh->writeObjVertices(file); + fprintf(file, "s off\n"); + fprintf(file, "o object\n"); + for (uint32_t f = 0; f < mesh->faceCount(); f++) + mesh->writeObjFace(file, f); + if (!chart->paramFlippedFaces().isEmpty()) { + fprintf(file, "o flipped_faces\n"); + for (uint32_t f = 0; f < chart->paramFlippedFaces().size(); f++) + mesh->writeObjFace(file, chart->paramFlippedFaces()[f]); + } + mesh->writeObjBoundaryEges(file); + fclose(file); } - mesh->writeObjBoundaryEges(file); - mesh->writeObjLinkedBoundaries(file); - fclose(file); - } #endif + } + chartIndex++; } - chartIndex++; } } + if (invalidParamCount > 0) + XA_PRINT_WARNING(" %u charts with invalid parameterizations\n", invalidParamCount); +#if XA_PROFILE + XA_PRINT(" Chart groups\n"); + uint32_t chartGroupCount = 0; + for (uint32_t i = 0; i < meshCount; i++) { +#if 0 + XA_PRINT(" Mesh %u: %u chart groups\n", i, ctx->paramAtlas.chartGroupCount(i)); +#endif + chartGroupCount += ctx->paramAtlas.chartGroupCount(i); + } + XA_PRINT(" %u total\n", chartGroupCount); +#endif + XA_PROFILE_PRINT_AND_RESET(" Compute charts total (real): ", computeChartsReal) + XA_PROFILE_PRINT_AND_RESET(" Compute charts total (thread): ", computeChartsThread) + XA_PROFILE_PRINT_AND_RESET(" Create face groups: ", createFaceGroups) + XA_PROFILE_PRINT_AND_RESET(" Extract invalid mesh geometry: ", extractInvalidMeshGeometry) + XA_PROFILE_PRINT_AND_RESET(" Chart group compute charts (real): ", chartGroupComputeChartsReal) + XA_PROFILE_PRINT_AND_RESET(" Chart group compute charts (thread): ", chartGroupComputeChartsThread) + XA_PROFILE_PRINT_AND_RESET(" Create chart group mesh: ", createChartGroupMesh) + XA_PROFILE_PRINT_AND_RESET(" Create colocals: ", createChartGroupMeshColocals) + XA_PROFILE_PRINT_AND_RESET(" Create boundaries: ", createChartGroupMeshBoundaries) + XA_PROFILE_PRINT_AND_RESET(" Build atlas: ", buildAtlas) + XA_PROFILE_PRINT_AND_RESET(" Init: ", buildAtlasInit) + XA_PROFILE_PRINT_AND_RESET(" Planar charts: ", planarCharts) + if (options.useInputMeshUvs) { + XA_PROFILE_PRINT_AND_RESET(" Original UV charts: ", originalUvCharts) + } + XA_PROFILE_PRINT_AND_RESET(" Clustered charts: ", clusteredCharts) + XA_PROFILE_PRINT_AND_RESET(" Place seeds: ", clusteredChartsPlaceSeeds) + XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", clusteredChartsPlaceSeedsBoundaryIntersection) + XA_PROFILE_PRINT_AND_RESET(" Relocate seeds: ", clusteredChartsRelocateSeeds) + XA_PROFILE_PRINT_AND_RESET(" Reset: ", clusteredChartsReset) + XA_PROFILE_PRINT_AND_RESET(" Grow: ", clusteredChartsGrow) + XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", clusteredChartsGrowBoundaryIntersection) + XA_PROFILE_PRINT_AND_RESET(" Merge: ", clusteredChartsMerge) + XA_PROFILE_PRINT_AND_RESET(" Fill holes: ", clusteredChartsFillHoles) + XA_PROFILE_PRINT_AND_RESET(" Copy chart faces: ", copyChartFaces) + XA_PROFILE_PRINT_AND_RESET(" Create chart mesh and parameterize (real): ", createChartMeshAndParameterizeReal) + XA_PROFILE_PRINT_AND_RESET(" Create chart mesh and parameterize (thread): ", createChartMeshAndParameterizeThread) + XA_PROFILE_PRINT_AND_RESET(" Create chart mesh: ", createChartMesh) + XA_PROFILE_PRINT_AND_RESET(" Parameterize charts: ", parameterizeCharts) + XA_PROFILE_PRINT_AND_RESET(" Orthogonal: ", parameterizeChartsOrthogonal) + XA_PROFILE_PRINT_AND_RESET(" LSCM: ", parameterizeChartsLSCM) + XA_PROFILE_PRINT_AND_RESET(" Recompute: ", parameterizeChartsRecompute) + XA_PROFILE_PRINT_AND_RESET(" Piecewise: ", parameterizeChartsPiecewise) + XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", parameterizeChartsPiecewiseBoundaryIntersection) + XA_PROFILE_PRINT_AND_RESET(" Evaluate quality: ", parameterizeChartsEvaluateQuality) +#if XA_PROFILE_ALLOC + XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc) +#endif + XA_PRINT_MEM_USAGE + } else { + XA_PROFILE_START(computeChartsReal) + if (!internal::segment::computeUvMeshCharts(ctx->taskScheduler, ctx->uvMeshes, ctx->progressFunc, ctx->progressUserData)) { + XA_PRINT(" Cancelled by user\n"); + return; + } + XA_PROFILE_END(computeChartsReal) + ctx->uvMeshChartsComputed = true; + // Count charts. + uint32_t chartCount = 0; + const uint32_t meshCount = ctx->uvMeshes.size(); + for (uint32_t i = 0; i < meshCount; i++) + chartCount += ctx->uvMeshes[i]->charts.size(); + XA_PRINT(" %u charts\n", chartCount); + XA_PROFILE_PRINT_AND_RESET(" Total (real): ", computeChartsReal) + XA_PROFILE_PRINT_AND_RESET(" Total (thread): ", computeChartsThread) } - if (invalidParamCount > 0) - XA_PRINT_WARNING(" %u charts with invalid parameterizations\n", invalidParamCount); - XA_PROFILE_PRINT_AND_RESET(" Total (real): ", parameterizeChartsReal) - XA_PROFILE_PRINT_AND_RESET(" Total (thread): ", parameterizeChartsThread) - XA_PROFILE_PRINT_AND_RESET(" Create chart mesh: ", createChartMesh) - XA_PROFILE_PRINT_AND_RESET(" Fix t-junctions: ", fixChartMeshTJunctions) - XA_PROFILE_PRINT_AND_RESET(" Close holes: ", closeChartMeshHoles) - XA_PROFILE_PRINT_AND_RESET(" Orthogonal: ", parameterizeChartsOrthogonal) - XA_PROFILE_PRINT_AND_RESET(" LSCM: ", parameterizeChartsLSCM) - XA_PROFILE_PRINT_AND_RESET(" Recompute: ", parameterizeChartsRecompute) - XA_PROFILE_PRINT_AND_RESET(" Piecewise: ", parameterizeChartsPiecewise) - XA_PROFILE_PRINT_AND_RESET(" Boundary intersection: ", parameterizeChartsPiecewiseBoundaryIntersection) - XA_PROFILE_PRINT_AND_RESET(" Evaluate quality: ", parameterizeChartsEvaluateQuality) #if XA_PROFILE_ALLOC XA_PROFILE_PRINT_AND_RESET(" Alloc: ", alloc) #endif XA_PRINT_MEM_USAGE } -void PackCharts(Atlas *atlas, PackOptions packOptions) -{ +void PackCharts(Atlas *atlas, PackOptions packOptions) { // Validate arguments and context state. if (!atlas) { XA_PRINT_WARNING("PackCharts: atlas is null.\n"); @@ -9867,10 +9152,9 @@ void PackCharts(Atlas *atlas, PackOptions packOptions) XA_PRINT_WARNING("PackCharts: ComputeCharts must be called first.\n"); return; } - if (!ctx->paramAtlas.chartsParameterized()) { - XA_PRINT_WARNING("PackCharts: ParameterizeCharts must be called first.\n"); - return; - } + } else if (!ctx->uvMeshChartsComputed) { + XA_PRINT_WARNING("PackCharts: ComputeCharts must be called first.\n"); + return; } if (packOptions.texelsPerUnit < 0.0f) { XA_PRINT_WARNING("PackCharts: PackOptions::texelsPerUnit is negative.\n"); @@ -9893,8 +9177,7 @@ void PackCharts(Atlas *atlas, PackOptions packOptions) if (!ctx->uvMeshInstances.isEmpty()) { for (uint32_t i = 0; i < ctx->uvMeshInstances.size(); i++) packAtlas.addUvMeshCharts(ctx->uvMeshInstances[i]); - } - else + } else packAtlas.addCharts(ctx->taskScheduler, &ctx->paramAtlas); XA_PROFILE_END(packChartsAddCharts) XA_PROFILE_START(packCharts) @@ -9946,16 +9229,35 @@ void PackCharts(Atlas *atlas, PackOptions packOptions) uint32_t chartIndex = 0; for (uint32_t i = 0; i < atlas->meshCount; i++) { Mesh &outputMesh = atlas->meshes[i]; + MeshPolygonMapping *meshPolygonMapping = ctx->meshPolygonMappings[i]; + // One polygon can have many triangles. Don't want to process the same polygon more than once when counting indices, building chart faces etc. + internal::BitArray polygonTouched; + if (meshPolygonMapping) { + polygonTouched.resize(meshPolygonMapping->faceVertexCount.size()); + polygonTouched.zeroOutMemory(); + } // Count and alloc arrays. - const internal::param::InvalidMeshGeometry &invalid = ctx->paramAtlas.invalidMeshGeometry(i); + const internal::InvalidMeshGeometry &invalid = ctx->paramAtlas.invalidMeshGeometry(i); outputMesh.vertexCount += invalid.vertices().length; outputMesh.indexCount += invalid.faces().length * 3; for (uint32_t cg = 0; cg < ctx->paramAtlas.chartGroupCount(i); cg++) { const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, cg); for (uint32_t c = 0; c < chartGroup->chartCount(); c++) { const internal::param::Chart *chart = chartGroup->chartAt(c); - outputMesh.vertexCount += chart->mesh()->vertexCount(); - outputMesh.indexCount += chart->mesh()->faceCount() * 3; + outputMesh.vertexCount += chart->originalVertexCount(); + const uint32_t faceCount = chart->unifiedMesh()->faceCount(); + if (meshPolygonMapping) { + // Map triangles back to polygons and count the polygon vertices. + for (uint32_t f = 0; f < faceCount; f++) { + const uint32_t polygon = meshPolygonMapping->triangleToPolygonMap[chart->mapFaceToSourceFace(f)]; + if (!polygonTouched.get(polygon)) { + polygonTouched.set(polygon); + outputMesh.indexCount += meshPolygonMapping->faceVertexCount[polygon]; + } + } + } else { + outputMesh.indexCount += faceCount * 3; + } outputMesh.chartCount++; } } @@ -9966,7 +9268,7 @@ void PackCharts(Atlas *atlas, PackOptions packOptions) // Copy mesh data. uint32_t firstVertex = 0; { - const internal::param::InvalidMeshGeometry &mesh = ctx->paramAtlas.invalidMeshGeometry(i); + const internal::InvalidMeshGeometry &mesh = ctx->paramAtlas.invalidMeshGeometry(i); internal::ConstArrayView<uint32_t> faces = mesh.faces(); internal::ConstArrayView<uint32_t> indices = mesh.indices(); internal::ConstArrayView<uint32_t> vertices = mesh.vertices(); @@ -9991,23 +9293,50 @@ void PackCharts(Atlas *atlas, PackOptions packOptions) const internal::param::ChartGroup *chartGroup = ctx->paramAtlas.chartGroupAt(i, cg); for (uint32_t c = 0; c < chartGroup->chartCount(); c++) { const internal::param::Chart *chart = chartGroup->chartAt(c); - const internal::Mesh *mesh = chart->mesh(); + const internal::Mesh *unifiedMesh = chart->unifiedMesh(); + const uint32_t faceCount = unifiedMesh->faceCount(); +#if XA_CHECK_PARAM_WINDING + uint32_t flippedCount = 0; + for (uint32_t f = 0; f < faceCount; f++) { + const float area = mesh->computeFaceParametricArea(f); + if (area < 0.0f) + flippedCount++; + } + const char *type = "LSCM"; + if (chart->type() == ChartType::Planar) + type = "planar"; + else if (chart->type() == ChartType::Ortho) + type = "ortho"; + else if (chart->type() == ChartType::Piecewise) + type = "piecewise"; + if (flippedCount > 0) { + if (flippedCount == faceCount) { + XA_PRINT_WARNING("chart %u (%s): all face flipped\n", chartIndex, type); + } else { + XA_PRINT_WARNING("chart %u (%s): %u / %u faces flipped\n", chartIndex, type, flippedCount, faceCount); + } + } +#endif // Vertices. - for (uint32_t v = 0; v < mesh->vertexCount(); v++) { + for (uint32_t v = 0; v < chart->originalVertexCount(); v++) { Vertex &vertex = outputMesh.vertexArray[firstVertex + v]; vertex.atlasIndex = packAtlas.getChart(chartIndex)->atlasIndex; XA_DEBUG_ASSERT(vertex.atlasIndex >= 0); vertex.chartIndex = (int32_t)chartIndex; - const internal::Vector2 &uv = mesh->texcoord(v); + const internal::Vector2 &uv = unifiedMesh->texcoord(chart->originalVertexToUnifiedVertex(v)); vertex.uv[0] = internal::max(0.0f, uv.x); vertex.uv[1] = internal::max(0.0f, uv.y); vertex.xref = chart->mapChartVertexToSourceVertex(v); } // Indices. - for (uint32_t f = 0; f < mesh->faceCount(); f++) { + for (uint32_t f = 0; f < faceCount; f++) { const uint32_t indexOffset = chart->mapFaceToSourceFace(f) * 3; - for (uint32_t j = 0; j < 3; j++) - outputMesh.indexArray[indexOffset + j] = firstVertex + mesh->vertexAt(f * 3 + j); + for (uint32_t j = 0; j < 3; j++) { + uint32_t outIndex = indexOffset + j; + if (meshPolygonMapping) + outIndex = meshPolygonMapping->triangleToPolygonIndicesMap[outIndex]; + outputMesh.indexArray[outIndex] = firstVertex + chart->originalVertices()[f * 3 + j]; + } } // Charts. Chart *outputChart = &outputMesh.chartArray[meshChartIndex]; @@ -10015,14 +9344,38 @@ void PackCharts(Atlas *atlas, PackOptions packOptions) XA_DEBUG_ASSERT(atlasIndex >= 0); outputChart->atlasIndex = (uint32_t)atlasIndex; outputChart->type = chart->isInvalid() ? ChartType::Invalid : chart->type(); - outputChart->faceCount = mesh->faceCount(); - outputChart->faceArray = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, outputChart->faceCount); - for (uint32_t f = 0; f < outputChart->faceCount; f++) - outputChart->faceArray[f] = chart->mapFaceToSourceFace(f); + if (meshPolygonMapping) { + // Count polygons. + polygonTouched.zeroOutMemory(); + outputChart->faceCount = 0; + for (uint32_t f = 0; f < faceCount; f++) { + const uint32_t polygon = meshPolygonMapping->triangleToPolygonMap[chart->mapFaceToSourceFace(f)]; + if (!polygonTouched.get(polygon)) { + polygonTouched.set(polygon); + outputChart->faceCount++; + } + } + // Write polygons. + outputChart->faceArray = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, outputChart->faceCount); + polygonTouched.zeroOutMemory(); + uint32_t of = 0; + for (uint32_t f = 0; f < faceCount; f++) { + const uint32_t polygon = meshPolygonMapping->triangleToPolygonMap[chart->mapFaceToSourceFace(f)]; + if (!polygonTouched.get(polygon)) { + polygonTouched.set(polygon); + outputChart->faceArray[of++] = polygon; + } + } + } else { + outputChart->faceCount = faceCount; + outputChart->faceArray = XA_ALLOC_ARRAY(internal::MemTag::Default, uint32_t, outputChart->faceCount); + for (uint32_t f = 0; f < outputChart->faceCount; f++) + outputChart->faceArray[f] = chart->mapFaceToSourceFace(f); + } outputChart->material = 0; meshChartIndex++; chartIndex++; - firstVertex += mesh->vertexCount(); + firstVertex += chart->originalVertexCount(); } } XA_DEBUG_ASSERT(outputMesh.vertexCount == firstVertex); @@ -10102,28 +9455,21 @@ void PackCharts(Atlas *atlas, PackOptions packOptions) XA_PRINT_MEM_USAGE } -void Generate(Atlas *atlas, ChartOptions chartOptions, ParameterizeOptions parameterizeOptions, PackOptions packOptions) -{ +void Generate(Atlas *atlas, ChartOptions chartOptions, PackOptions packOptions) { if (!atlas) { XA_PRINT_WARNING("Generate: atlas is null.\n"); return; } Context *ctx = (Context *)atlas; - if (!ctx->uvMeshInstances.isEmpty()) { - XA_PRINT_WARNING("Generate: This function should not be called with UV meshes.\n"); - return; - } - if (ctx->meshes.isEmpty()) { - XA_PRINT_WARNING("Generate: No meshes. Call AddMesh first.\n"); + if (ctx->meshes.isEmpty() && ctx->uvMeshInstances.isEmpty()) { + XA_PRINT_WARNING("Generate: No meshes. Call AddMesh or AddUvMesh first.\n"); return; } ComputeCharts(atlas, chartOptions); - ParameterizeCharts(atlas, parameterizeOptions); PackCharts(atlas, packOptions); } -void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc, void *progressUserData) -{ +void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc, void *progressUserData) { if (!atlas) { XA_PRINT_WARNING("SetProgressCallback: atlas is null.\n"); return; @@ -10133,37 +9479,33 @@ void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc, void *progress ctx->progressUserData = progressUserData; } -void SetAlloc(ReallocFunc reallocFunc, FreeFunc freeFunc) -{ +void SetAlloc(ReallocFunc reallocFunc, FreeFunc freeFunc) { internal::s_realloc = reallocFunc; internal::s_free = freeFunc; } -void SetPrint(PrintFunc print, bool verbose) -{ +void SetPrint(PrintFunc print, bool verbose) { internal::s_print = print; internal::s_printVerbose = verbose; } -const char *StringForEnum(AddMeshError::Enum error) -{ +const char *StringForEnum(AddMeshError error) { if (error == AddMeshError::Error) return "Unspecified error"; if (error == AddMeshError::IndexOutOfRange) return "Index out of range"; + if (error == AddMeshError::InvalidFaceVertexCount) + return "Invalid face vertex count"; if (error == AddMeshError::InvalidIndexCount) return "Invalid index count"; return "Success"; } -const char *StringForEnum(ProgressCategory::Enum category) -{ +const char *StringForEnum(ProgressCategory category) { if (category == ProgressCategory::AddMesh) return "Adding mesh(es)"; if (category == ProgressCategory::ComputeCharts) return "Computing charts"; - if (category == ProgressCategory::ParameterizeCharts) - return "Parameterizing charts"; if (category == ProgressCategory::PackCharts) return "Packing charts"; if (category == ProgressCategory::BuildOutputMeshes) @@ -10172,3 +9514,96 @@ const char *StringForEnum(ProgressCategory::Enum category) } } // namespace xatlas + +#if XATLAS_C_API +static_assert(sizeof(xatlas::Chart) == sizeof(xatlasChart), "xatlasChart size mismatch"); +static_assert(sizeof(xatlas::Vertex) == sizeof(xatlasVertex), "xatlasVertex size mismatch"); +static_assert(sizeof(xatlas::Mesh) == sizeof(xatlasMesh), "xatlasMesh size mismatch"); +static_assert(sizeof(xatlas::Atlas) == sizeof(xatlasAtlas), "xatlasAtlas size mismatch"); +static_assert(sizeof(xatlas::MeshDecl) == sizeof(xatlasMeshDecl), "xatlasMeshDecl size mismatch"); +static_assert(sizeof(xatlas::UvMeshDecl) == sizeof(xatlasUvMeshDecl), "xatlasUvMeshDecl size mismatch"); +static_assert(sizeof(xatlas::ChartOptions) == sizeof(xatlasChartOptions), "xatlasChartOptions size mismatch"); +static_assert(sizeof(xatlas::PackOptions) == sizeof(xatlasPackOptions), "xatlasPackOptions size mismatch"); + +#ifdef __cplusplus +extern "C" { +#endif + +xatlasAtlas *xatlasCreate() { + return (xatlasAtlas *)xatlas::Create(); +} + +void xatlasDestroy(xatlasAtlas *atlas) { + xatlas::Destroy((xatlas::Atlas *)atlas); +} + +xatlasAddMeshError xatlasAddMesh(xatlasAtlas *atlas, const xatlasMeshDecl *meshDecl, uint32_t meshCountHint) { + return (xatlasAddMeshError)xatlas::AddMesh((xatlas::Atlas *)atlas, *(const xatlas::MeshDecl *)meshDecl, meshCountHint); +} + +void xatlasAddMeshJoin(xatlasAtlas *atlas) { + xatlas::AddMeshJoin((xatlas::Atlas *)atlas); +} + +xatlasAddMeshError xatlasAddUvMesh(xatlasAtlas *atlas, const xatlasUvMeshDecl *decl) { + return (xatlasAddMeshError)xatlas::AddUvMesh((xatlas::Atlas *)atlas, *(const xatlas::UvMeshDecl *)decl); +} + +void xatlasComputeCharts(xatlasAtlas *atlas, const xatlasChartOptions *chartOptions) { + xatlas::ComputeCharts((xatlas::Atlas *)atlas, chartOptions ? *(xatlas::ChartOptions *)chartOptions : xatlas::ChartOptions()); +} + +void xatlasPackCharts(xatlasAtlas *atlas, const xatlasPackOptions *packOptions) { + xatlas::PackCharts((xatlas::Atlas *)atlas, packOptions ? *(xatlas::PackOptions *)packOptions : xatlas::PackOptions()); +} + +void xatlasGenerate(xatlasAtlas *atlas, const xatlasChartOptions *chartOptions, const xatlasPackOptions *packOptions) { + xatlas::Generate((xatlas::Atlas *)atlas, chartOptions ? *(xatlas::ChartOptions *)chartOptions : xatlas::ChartOptions(), packOptions ? *(xatlas::PackOptions *)packOptions : xatlas::PackOptions()); +} + +void xatlasSetProgressCallback(xatlasAtlas *atlas, xatlasProgressFunc progressFunc, void *progressUserData) { + xatlas::ProgressFunc pf; + *(void **)&pf = (void *)progressFunc; + xatlas::SetProgressCallback((xatlas::Atlas *)atlas, pf, progressUserData); +} + +void xatlasSetAlloc(xatlasReallocFunc reallocFunc, xatlasFreeFunc freeFunc) { + xatlas::SetAlloc((xatlas::ReallocFunc)reallocFunc, (xatlas::FreeFunc)freeFunc); +} + +void xatlasSetPrint(xatlasPrintFunc print, bool verbose) { + xatlas::SetPrint((xatlas::PrintFunc)print, verbose); +} + +const char *xatlasAddMeshErrorString(xatlasAddMeshError error) { + return xatlas::StringForEnum((xatlas::AddMeshError)error); +} + +const char *xatlasProgressCategoryString(xatlasProgressCategory category) { + return xatlas::StringForEnum((xatlas::ProgressCategory)category); +} + +void xatlasMeshDeclInit(xatlasMeshDecl *meshDecl) { + xatlas::MeshDecl init; + memcpy(meshDecl, &init, sizeof(init)); +} + +void xatlasUvMeshDeclInit(xatlasUvMeshDecl *uvMeshDecl) { + xatlas::UvMeshDecl init; + memcpy(uvMeshDecl, &init, sizeof(init)); +} + +void xatlasChartOptionsInit(xatlasChartOptions *chartOptions) { + xatlas::ChartOptions init; + memcpy(chartOptions, &init, sizeof(init)); +} + +void xatlasPackOptionsInit(xatlasPackOptions *packOptions) { + xatlas::PackOptions init; + memcpy(packOptions, &init, sizeof(init)); +} + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // XATLAS_C_API diff --git a/thirdparty/xatlas/xatlas.h b/thirdparty/xatlas/xatlas.h index cc47f4837e..fc40d9d49c 100644 --- a/thirdparty/xatlas/xatlas.h +++ b/thirdparty/xatlas/xatlas.h @@ -31,35 +31,30 @@ Copyright NVIDIA Corporation 2006 -- Ignacio Castano <icastano@nvidia.com> #pragma once #ifndef XATLAS_H #define XATLAS_H +#include <stddef.h> #include <stdint.h> namespace xatlas { -struct ChartType -{ - enum Enum - { - Planar, - Ortho, - LSCM, - Piecewise, - Invalid - }; +enum class ChartType { + Planar, + Ortho, + LSCM, + Piecewise, + Invalid }; // A group of connected faces, belonging to a single atlas. -struct Chart -{ +struct Chart { uint32_t *faceArray; uint32_t atlasIndex; // Sub-atlas index. uint32_t faceCount; - ChartType::Enum type; + ChartType type; uint32_t material; }; // Output vertex. -struct Vertex -{ +struct Vertex { int32_t atlasIndex; // Sub-atlas index. -1 if the vertex doesn't exist in any atlas. int32_t chartIndex; // -1 if the vertex doesn't exist in any chart. float uv[2]; // Not normalized - values are in Atlas width and height range. @@ -67,8 +62,7 @@ struct Vertex }; // Output mesh. -struct Mesh -{ +struct Mesh { Chart *chartArray; uint32_t *indexArray; Vertex *vertexArray; @@ -83,16 +77,15 @@ static const uint32_t kImageIsBilinearBit = 0x40000000; static const uint32_t kImageIsPaddingBit = 0x20000000; // Empty on creation. Populated after charts are packed. -struct Atlas -{ +struct Atlas { uint32_t *image; Mesh *meshes; // The output meshes, corresponding to each AddMesh call. + float *utilization; // Normalized atlas texel utilization array. E.g. a value of 0.8 means 20% empty space. atlasCount in length. uint32_t width; // Atlas width in texels. uint32_t height; // Atlas height in texels. uint32_t atlasCount; // Number of sub-atlases. Equal to 0 unless PackOptions resolution is changed from default (0). uint32_t chartCount; // Total number of charts in all meshes. uint32_t meshCount; // Number of output meshes. Equal to the number of times AddMesh was called. - float *utilization; // Normalized atlas texel utilization array. E.g. a value of 0.8 means 20% empty space. atlasCount in length. float texelsPerUnit; // Equal to PackOptions texelsPerUnit if texelsPerUnit > 0, otherwise an estimated value to match PackOptions resolution. }; @@ -101,73 +94,76 @@ Atlas *Create(); void Destroy(Atlas *atlas); -struct IndexFormat -{ - enum Enum - { - UInt16, - UInt32 - }; +enum class IndexFormat { + UInt16, + UInt32 }; // Input mesh declaration. -struct MeshDecl -{ +struct MeshDecl { const void *vertexPositionData = nullptr; const void *vertexNormalData = nullptr; // optional const void *vertexUvData = nullptr; // optional. The input UVs are provided as a hint to the chart generator. const void *indexData = nullptr; // optional - - // Optional. indexCount / 3 (triangle count) in length. + + // Optional. Must be faceCount in length. // Don't atlas faces set to true. Ignored faces still exist in the output meshes, Vertex uv is set to (0, 0) and Vertex atlasIndex to -1. const bool *faceIgnoreData = nullptr; + // Optional. Must be faceCount in length. + // Only faces with the same material will be assigned to the same chart. + const uint32_t *faceMaterialData = nullptr; + + // Optional. Must be faceCount in length. + // Polygon / n-gon support. Faces are assumed to be triangles if this is null. + const uint8_t *faceVertexCount = nullptr; + uint32_t vertexCount = 0; uint32_t vertexPositionStride = 0; uint32_t vertexNormalStride = 0; // optional uint32_t vertexUvStride = 0; // optional uint32_t indexCount = 0; int32_t indexOffset = 0; // optional. Add this offset to all indices. - IndexFormat::Enum indexFormat = IndexFormat::UInt16; + uint32_t faceCount = 0; // Optional if faceVertexCount is null. Otherwise assumed to be indexCount / 3. + IndexFormat indexFormat = IndexFormat::UInt16; // Vertex positions within epsilon distance of each other are considered colocal. float epsilon = 1.192092896e-07F; }; -struct AddMeshError -{ - enum Enum - { - Success, // No error. - Error, // Unspecified error. - IndexOutOfRange, // An index is >= MeshDecl vertexCount. - InvalidIndexCount // Not evenly divisible by 3 - expecting triangles. - }; +enum class AddMeshError { + Success, // No error. + Error, // Unspecified error. + IndexOutOfRange, // An index is >= MeshDecl vertexCount. + InvalidFaceVertexCount, // Must be >= 3. + InvalidIndexCount // Not evenly divisible by 3 - expecting triangles. }; // Add a mesh to the atlas. MeshDecl data is copied, so it can be freed after AddMesh returns. -AddMeshError::Enum AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint = 0); +AddMeshError AddMesh(Atlas *atlas, const MeshDecl &meshDecl, uint32_t meshCountHint = 0); // Wait for AddMesh async processing to finish. ComputeCharts / Generate call this internally. void AddMeshJoin(Atlas *atlas); -struct UvMeshDecl -{ +struct UvMeshDecl { const void *vertexUvData = nullptr; const void *indexData = nullptr; // optional - const uint32_t *faceMaterialData = nullptr; // Optional. Faces with different materials won't be assigned to the same chart. Must be indexCount / 3 in length. + const uint32_t *faceMaterialData = nullptr; // Optional. Overlapping UVs should be assigned a different material. Must be indexCount / 3 in length. uint32_t vertexCount = 0; uint32_t vertexStride = 0; uint32_t indexCount = 0; int32_t indexOffset = 0; // optional. Add this offset to all indices. - IndexFormat::Enum indexFormat = IndexFormat::UInt16; - bool rotateCharts = true; + IndexFormat indexFormat = IndexFormat::UInt16; }; -AddMeshError::Enum AddUvMesh(Atlas *atlas, const UvMeshDecl &decl); +AddMeshError AddUvMesh(Atlas *atlas, const UvMeshDecl &decl); + +// Custom parameterization function. texcoords initial values are an orthogonal parameterization. +typedef void (*ParameterizeFunc)(const float *positions, float *texcoords, uint32_t vertexCount, const uint32_t *indices, uint32_t indexCount); + +struct ChartOptions { + ParameterizeFunc paramFunc = nullptr; -struct ChartOptions -{ float maxChartArea = 0.0f; // Don't grow charts to be larger than this. 0 means no limit. float maxBoundaryLength = 0.0f; // Don't grow charts to have a longer boundary than this. 0 means no limit. @@ -180,26 +176,31 @@ struct ChartOptions float maxCost = 2.0f; // If total of all metrics * weights > maxCost, don't grow chart. Lower values result in more charts. uint32_t maxIterations = 1; // Number of iterations of the chart growing and seeding phases. Higher values result in better charts. + + bool useInputMeshUvs = false; // Use MeshDecl::vertexUvData for charts. + bool fixWinding = false; // Enforce consistent texture coordinate winding. }; // Call after all AddMesh calls. Can be called multiple times to recompute charts with different options. void ComputeCharts(Atlas *atlas, ChartOptions options = ChartOptions()); -// Custom parameterization function. texcoords initial values are an orthogonal parameterization. -typedef void (*ParameterizeFunc)(const float *positions, float *texcoords, uint32_t vertexCount, const uint32_t *indices, uint32_t indexCount); +struct PackOptions { + // Charts larger than this will be scaled down. 0 means no limit. + uint32_t maxChartSize = 0; -struct ParameterizeOptions -{ - ParameterizeFunc func = nullptr; - bool closeHoles = true; // If the custom parameterization function works with multiple boundaries, this can be set to false to improve performance. - bool fixTJunctions = true; // If meshes don't have T-junctions, this can be set to false to improve performance. -}; + // Number of pixels to pad charts with. + uint32_t padding = 0; + + // Unit to texel scale. e.g. a 1x1 quad with texelsPerUnit of 32 will take up approximately 32x32 texels in the atlas. + // If 0, an estimated value will be calculated to approximately match the given resolution. + // If resolution is also 0, the estimated value will approximately match a 1024x1024 atlas. + float texelsPerUnit = 0.0f; -// Call after ComputeCharts. Can be called multiple times to re-parameterize charts with a different ParameterizeFunc. -void ParameterizeCharts(Atlas *atlas, ParameterizeOptions options = ParameterizeOptions()); + // If 0, generate a single atlas with texelsPerUnit determining the final resolution. + // If not 0, and texelsPerUnit is not 0, generate one or more atlases with that exact resolution. + // If not 0, and texelsPerUnit is 0, texelsPerUnit is estimated to approximately match the resolution. + uint32_t resolution = 0; -struct PackOptions -{ // Leave space around charts for texels that would be sampled by bilinear filtering. bool bilinear = true; @@ -212,44 +213,29 @@ struct PackOptions // Create Atlas::image bool createImage = false; - // Charts larger than this will be scaled down. 0 means no limit. - uint32_t maxChartSize = 0; - - // Number of pixels to pad charts with. - uint32_t padding = 0; + // Rotate charts to the axis of their convex hull. + bool rotateChartsToAxis = true; - // Unit to texel scale. e.g. a 1x1 quad with texelsPerUnit of 32 will take up approximately 32x32 texels in the atlas. - // If 0, an estimated value will be calculated to approximately match the given resolution. - // If resolution is also 0, the estimated value will approximately match a 1024x1024 atlas. - float texelsPerUnit = 0.0f; - - // If 0, generate a single atlas with texelsPerUnit determining the final resolution. - // If not 0, and texelsPerUnit is not 0, generate one or more atlases with that exact resolution. - // If not 0, and texelsPerUnit is 0, texelsPerUnit is estimated to approximately match the resolution. - uint32_t resolution = 0; + // Rotate charts to improve packing. + bool rotateCharts = true; }; -// Call after ParameterizeCharts. Can be called multiple times to re-pack charts with different options. +// Call after ComputeCharts. Can be called multiple times to re-pack charts with different options. void PackCharts(Atlas *atlas, PackOptions packOptions = PackOptions()); -// Equivalent to calling ComputeCharts, ParameterizeCharts and PackCharts in sequence. Can be called multiple times to regenerate with different options. -void Generate(Atlas *atlas, ChartOptions chartOptions = ChartOptions(), ParameterizeOptions parameterizeOptions = ParameterizeOptions(), PackOptions packOptions = PackOptions()); +// Equivalent to calling ComputeCharts and PackCharts in sequence. Can be called multiple times to regenerate with different options. +void Generate(Atlas *atlas, ChartOptions chartOptions = ChartOptions(), PackOptions packOptions = PackOptions()); // Progress tracking. -struct ProgressCategory -{ - enum Enum - { - AddMesh, - ComputeCharts, - ParameterizeCharts, - PackCharts, - BuildOutputMeshes - }; +enum class ProgressCategory { + AddMesh, + ComputeCharts, + PackCharts, + BuildOutputMeshes }; // May be called from any thread. Return false to cancel. -typedef bool (*ProgressFunc)(ProgressCategory::Enum category, int progress, void *userData); +typedef bool (*ProgressFunc)(ProgressCategory category, int progress, void *userData); void SetProgressCallback(Atlas *atlas, ProgressFunc progressFunc = nullptr, void *progressUserData = nullptr); @@ -263,8 +249,8 @@ typedef int (*PrintFunc)(const char *, ...); void SetPrint(PrintFunc print, bool verbose); // Helper functions for error messages. -const char *StringForEnum(AddMeshError::Enum error); -const char *StringForEnum(ProgressCategory::Enum category); +const char *StringForEnum(AddMeshError error); +const char *StringForEnum(ProgressCategory category); } // namespace xatlas |