diff options
Diffstat (limited to 'thirdparty/meshoptimizer/vcacheoptimizer.cpp')
-rw-r--r-- | thirdparty/meshoptimizer/vcacheoptimizer.cpp | 473 |
1 files changed, 473 insertions, 0 deletions
diff --git a/thirdparty/meshoptimizer/vcacheoptimizer.cpp b/thirdparty/meshoptimizer/vcacheoptimizer.cpp new file mode 100644 index 0000000000..fb8ade4b77 --- /dev/null +++ b/thirdparty/meshoptimizer/vcacheoptimizer.cpp @@ -0,0 +1,473 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +// This work is based on: +// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006 +// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007 +namespace meshopt +{ + +const size_t kCacheSizeMax = 16; +const size_t kValenceMax = 8; + +struct VertexScoreTable +{ + float cache[1 + kCacheSizeMax]; + float live[1 + kValenceMax]; +}; + +// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD +static const VertexScoreTable kVertexScoreTable = { + {0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f}, + {0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f}, +}; + +// Tuned to minimize the encoded index buffer size +static const VertexScoreTable kVertexScoreTableStrip = { + {0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f}, + {0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f}, +}; + +struct TriangleAdjacency +{ + unsigned int* counts; + unsigned int* offsets; + unsigned int* data; +}; + +static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +{ + size_t face_count = index_count / 3; + + // allocate arrays + adjacency.counts = allocator.allocate<unsigned int>(vertex_count); + adjacency.offsets = allocator.allocate<unsigned int>(vertex_count); + adjacency.data = allocator.allocate<unsigned int>(index_count); + + // fill triangle counts + memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + assert(indices[i] < vertex_count); + + adjacency.counts[indices[i]]++; + } + + // fill offset table + unsigned int offset = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + adjacency.offsets[i] = offset; + offset += adjacency.counts[i]; + } + + assert(offset == index_count); + + // fill triangle data + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + + adjacency.data[adjacency.offsets[a]++] = unsigned(i); + adjacency.data[adjacency.offsets[b]++] = unsigned(i); + adjacency.data[adjacency.offsets[c]++] = unsigned(i); + } + + // fix offsets that have been disturbed by the previous pass + for (size_t i = 0; i < vertex_count; ++i) + { + assert(adjacency.offsets[i] >= adjacency.counts[i]); + + adjacency.offsets[i] -= adjacency.counts[i]; + } +} + +static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count) +{ + // check dead-end stack + while (dead_end_top) + { + unsigned int vertex = dead_end[--dead_end_top]; + + if (live_triangles[vertex] > 0) + return vertex; + } + + // input order + while (input_cursor < vertex_count) + { + if (live_triangles[input_cursor] > 0) + return input_cursor; + + ++input_cursor; + } + + return ~0u; +} + +static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size) +{ + unsigned int best_candidate = ~0u; + int best_priority = -1; + + for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate) + { + unsigned int vertex = *next_candidate; + + // otherwise we don't need to process it + if (live_triangles[vertex] > 0) + { + int priority = 0; + + // will it be in cache after fanning? + if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size) + { + priority = timestamp - cache_timestamps[vertex]; // position in cache + } + + if (priority > best_priority) + { + best_candidate = vertex; + best_priority = priority; + } + } + } + + return best_candidate; +} + +static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles) +{ + assert(cache_position >= -1 && cache_position < int(kCacheSizeMax)); + + unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax; + + return table->cache[1 + cache_position] + table->live[live_triangles_clamped]; +} + +static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count) +{ + // input order + while (input_cursor < face_count) + { + if (!emitted_flags[input_cursor]) + return input_cursor; + + ++input_cursor; + } + + return ~0u; +} + +} // namespace meshopt + +void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + + meshopt_Allocator allocator; + + // guard for empty meshes + if (index_count == 0 || vertex_count == 0) + return; + + // support in-place optimization + if (destination == indices) + { + unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); + memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); + indices = indices_copy; + } + + unsigned int cache_size = 16; + assert(cache_size <= kCacheSizeMax); + + size_t face_count = index_count / 3; + + // build adjacency information + TriangleAdjacency adjacency = {}; + buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); + + // live triangle counts + unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); + memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); + + // emitted flags + unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); + memset(emitted_flags, 0, face_count); + + // compute initial vertex scores + float* vertex_scores = allocator.allocate<float>(vertex_count); + + for (size_t i = 0; i < vertex_count; ++i) + vertex_scores[i] = vertexScore(table, -1, live_triangles[i]); + + // compute triangle scores + float* triangle_scores = allocator.allocate<float>(face_count); + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0]; + unsigned int b = indices[i * 3 + 1]; + unsigned int c = indices[i * 3 + 2]; + + triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c]; + } + + unsigned int cache_holder[2 * (kCacheSizeMax + 3)]; + unsigned int* cache = cache_holder; + unsigned int* cache_new = cache_holder + kCacheSizeMax + 3; + size_t cache_count = 0; + + unsigned int current_triangle = 0; + unsigned int input_cursor = 1; + + unsigned int output_triangle = 0; + + while (current_triangle != ~0u) + { + assert(output_triangle < face_count); + + unsigned int a = indices[current_triangle * 3 + 0]; + unsigned int b = indices[current_triangle * 3 + 1]; + unsigned int c = indices[current_triangle * 3 + 2]; + + // output indices + destination[output_triangle * 3 + 0] = a; + destination[output_triangle * 3 + 1] = b; + destination[output_triangle * 3 + 2] = c; + output_triangle++; + + // update emitted flags + emitted_flags[current_triangle] = true; + triangle_scores[current_triangle] = 0; + + // new triangle + size_t cache_write = 0; + cache_new[cache_write++] = a; + cache_new[cache_write++] = b; + cache_new[cache_write++] = c; + + // old triangles + for (size_t i = 0; i < cache_count; ++i) + { + unsigned int index = cache[i]; + + if (index != a && index != b && index != c) + { + cache_new[cache_write++] = index; + } + } + + unsigned int* cache_temp = cache; + cache = cache_new, cache_new = cache_temp; + cache_count = cache_write > cache_size ? cache_size : cache_write; + + // update live triangle counts + live_triangles[a]--; + live_triangles[b]--; + live_triangles[c]--; + + // remove emitted triangle from adjacency data + // this makes sure that we spend less time traversing these lists on subsequent iterations + for (size_t k = 0; k < 3; ++k) + { + unsigned int index = indices[current_triangle * 3 + k]; + + unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbours_size = adjacency.counts[index]; + + for (size_t i = 0; i < neighbours_size; ++i) + { + unsigned int tri = neighbours[i]; + + if (tri == current_triangle) + { + neighbours[i] = neighbours[neighbours_size - 1]; + adjacency.counts[index]--; + break; + } + } + } + + unsigned int best_triangle = ~0u; + float best_score = 0; + + // update cache positions, vertex scores and triangle scores, and find next best triangle + for (size_t i = 0; i < cache_write; ++i) + { + unsigned int index = cache[i]; + + int cache_position = i >= cache_size ? -1 : int(i); + + // update vertex score + float score = vertexScore(table, cache_position, live_triangles[index]); + float score_diff = score - vertex_scores[index]; + + vertex_scores[index] = score; + + // update scores of vertex triangles + const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index]; + const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index]; + + for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) + { + unsigned int tri = *it; + assert(!emitted_flags[tri]); + + float tri_score = triangle_scores[tri] + score_diff; + assert(tri_score > 0); + + if (best_score < tri_score) + { + best_triangle = tri; + best_score = tri_score; + } + + triangle_scores[tri] = tri_score; + } + } + + // step through input triangles in order if we hit a dead-end + current_triangle = best_triangle; + + if (current_triangle == ~0u) + { + current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count); + } + } + + assert(input_cursor == face_count); + assert(output_triangle == face_count); +} + +void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +{ + meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable); +} + +void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +{ + meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip); +} + +void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(cache_size >= 3); + + meshopt_Allocator allocator; + + // guard for empty meshes + if (index_count == 0 || vertex_count == 0) + return; + + // support in-place optimization + if (destination == indices) + { + unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); + memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); + indices = indices_copy; + } + + size_t face_count = index_count / 3; + + // build adjacency information + TriangleAdjacency adjacency = {}; + buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); + + // live triangle counts + unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); + memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); + + // cache time stamps + unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count); + memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); + + // dead-end stack + unsigned int* dead_end = allocator.allocate<unsigned int>(index_count); + unsigned int dead_end_top = 0; + + // emitted flags + unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); + memset(emitted_flags, 0, face_count); + + unsigned int current_vertex = 0; + + unsigned int timestamp = cache_size + 1; + unsigned int input_cursor = 1; // vertex to restart from in case of dead-end + + unsigned int output_triangle = 0; + + while (current_vertex != ~0u) + { + const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top; + + // emit all vertex neighbours + const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex]; + const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex]; + + for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) + { + unsigned int triangle = *it; + + if (!emitted_flags[triangle]) + { + unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; + + // output indices + destination[output_triangle * 3 + 0] = a; + destination[output_triangle * 3 + 1] = b; + destination[output_triangle * 3 + 2] = c; + output_triangle++; + + // update dead-end stack + dead_end[dead_end_top + 0] = a; + dead_end[dead_end_top + 1] = b; + dead_end[dead_end_top + 2] = c; + dead_end_top += 3; + + // update live triangle counts + live_triangles[a]--; + live_triangles[b]--; + live_triangles[c]--; + + // update cache info + // if vertex is not in cache, put it in cache + if (timestamp - cache_timestamps[a] > cache_size) + cache_timestamps[a] = timestamp++; + + if (timestamp - cache_timestamps[b] > cache_size) + cache_timestamps[b] = timestamp++; + + if (timestamp - cache_timestamps[c] > cache_size) + cache_timestamps[c] = timestamp++; + + // update emitted flags + emitted_flags[triangle] = true; + } + } + + // next candidates are the ones we pushed to dead-end stack just now + const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top; + + // get next vertex + current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size); + + if (current_vertex == ~0u) + { + current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count); + } + } + + assert(output_triangle == face_count); +} |