summaryrefslogtreecommitdiff
path: root/thirdparty/astcenc/astcenc_partition_tables.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/astcenc/astcenc_partition_tables.cpp')
-rw-r--r--thirdparty/astcenc/astcenc_partition_tables.cpp481
1 files changed, 481 insertions, 0 deletions
diff --git a/thirdparty/astcenc/astcenc_partition_tables.cpp b/thirdparty/astcenc/astcenc_partition_tables.cpp
new file mode 100644
index 0000000000..cad42384d7
--- /dev/null
+++ b/thirdparty/astcenc/astcenc_partition_tables.cpp
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2011-2023 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief Functions for generating partition tables on demand.
+ */
+
+#include "astcenc_internal.h"
+
+/** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */
+#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
+
+/**
+ * @brief Generate a canonical representation of a partition pattern.
+ *
+ * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store
+ * the remapped texel index. Remapping ensures that we only match on the partition pattern,
+ * independent of the partition order generated by the hash.
+ *
+ * @param texel_count The number of texels in the block.
+ * @param partition_of_texel The partition assignments, in hash order.
+ * @param[out] bit_pattern The output bit pattern representation.
+ */
+static void generate_canonical_partitioning(
+ unsigned int texel_count,
+ const uint8_t* partition_of_texel,
+ uint64_t bit_pattern[BIT_PATTERN_WORDS]
+) {
+ // Clear the pattern
+ for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++)
+ {
+ bit_pattern[i] = 0;
+ }
+
+ // Store a mapping to reorder the raw partitions so that the partitions are ordered such
+ // that the lowest texel index in partition N is smaller than the lowest texel index in
+ // partition N + 1.
+ int mapped_index[BLOCK_MAX_PARTITIONS];
+ int map_weight_count = 0;
+
+ for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
+ {
+ mapped_index[i] = -1;
+ }
+
+ for (unsigned int i = 0; i < texel_count; i++)
+ {
+ int index = partition_of_texel[i];
+ if (mapped_index[index] < 0)
+ {
+ mapped_index[index] = map_weight_count++;
+ }
+
+ uint64_t xlat_index = mapped_index[index];
+ bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
+ }
+}
+
+/**
+ * @brief Compare two canonical patterns to see if they are the same.
+ *
+ * @param part1 The first canonical bit pattern to check.
+ * @param part2 The second canonical bit pattern to check.
+ *
+ * @return @c true if the patterns are the same, @c false otherwise.
+ */
+static bool compare_canonical_partitionings(
+ const uint64_t part1[BIT_PATTERN_WORDS],
+ const uint64_t part2[BIT_PATTERN_WORDS]
+) {
+ return (part1[0] == part2[0])
+#if BIT_PATTERN_WORDS > 1
+ && (part1[1] == part2[1])
+#endif
+#if BIT_PATTERN_WORDS > 2
+ && (part1[2] == part2[2])
+#endif
+#if BIT_PATTERN_WORDS > 3
+ && (part1[3] == part2[3])
+#endif
+#if BIT_PATTERN_WORDS > 4
+ && (part1[4] == part2[4])
+#endif
+#if BIT_PATTERN_WORDS > 5
+ && (part1[5] == part2[5])
+#endif
+#if BIT_PATTERN_WORDS > 6
+ && (part1[6] == part2[6])
+#endif
+ ;
+}
+
+/**
+ * @brief Hash function used for procedural partition assignment.
+ *
+ * @param inp The hash seed.
+ *
+ * @return The hashed value.
+ */
+static uint32_t hash52(
+ uint32_t inp
+) {
+ inp ^= inp >> 15;
+
+ // (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
+ inp *= 0xEEDE0891;
+ inp ^= inp >> 5;
+ inp += inp << 16;
+ inp ^= inp >> 7;
+ inp ^= inp >> 3;
+ inp ^= inp << 6;
+ inp ^= inp >> 17;
+ return inp;
+}
+
+/**
+ * @brief Select texel assignment for a single coordinate.
+ *
+ * @param seed The seed - the partition index from the block.
+ * @param x The texel X coordinate in the block.
+ * @param y The texel Y coordinate in the block.
+ * @param z The texel Z coordinate in the block.
+ * @param partition_count The total partition count of this encoding.
+ * @param small_block @c true if the block has fewer than 32 texels.
+ *
+ * @return The assigned partition index for this texel.
+ */
+static uint8_t select_partition(
+ int seed,
+ int x,
+ int y,
+ int z,
+ int partition_count,
+ bool small_block
+) {
+ // For small blocks bias the coordinates to get better distribution
+ if (small_block)
+ {
+ x <<= 1;
+ y <<= 1;
+ z <<= 1;
+ }
+
+ seed += (partition_count - 1) * 1024;
+
+ uint32_t rnum = hash52(seed);
+
+ uint8_t seed1 = rnum & 0xF;
+ uint8_t seed2 = (rnum >> 4) & 0xF;
+ uint8_t seed3 = (rnum >> 8) & 0xF;
+ uint8_t seed4 = (rnum >> 12) & 0xF;
+ uint8_t seed5 = (rnum >> 16) & 0xF;
+ uint8_t seed6 = (rnum >> 20) & 0xF;
+ uint8_t seed7 = (rnum >> 24) & 0xF;
+ uint8_t seed8 = (rnum >> 28) & 0xF;
+ uint8_t seed9 = (rnum >> 18) & 0xF;
+ uint8_t seed10 = (rnum >> 22) & 0xF;
+ uint8_t seed11 = (rnum >> 26) & 0xF;
+ uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
+
+ // Squaring all the seeds in order to bias their distribution towards lower values.
+ seed1 *= seed1;
+ seed2 *= seed2;
+ seed3 *= seed3;
+ seed4 *= seed4;
+ seed5 *= seed5;
+ seed6 *= seed6;
+ seed7 *= seed7;
+ seed8 *= seed8;
+ seed9 *= seed9;
+ seed10 *= seed10;
+ seed11 *= seed11;
+ seed12 *= seed12;
+
+ int sh1, sh2;
+ if (seed & 1)
+ {
+ sh1 = (seed & 2 ? 4 : 5);
+ sh2 = (partition_count == 3 ? 6 : 5);
+ }
+ else
+ {
+ sh1 = (partition_count == 3 ? 6 : 5);
+ sh2 = (seed & 2 ? 4 : 5);
+ }
+
+ int sh3 = (seed & 0x10) ? sh1 : sh2;
+
+ seed1 >>= sh1;
+ seed2 >>= sh2;
+ seed3 >>= sh1;
+ seed4 >>= sh2;
+ seed5 >>= sh1;
+ seed6 >>= sh2;
+ seed7 >>= sh1;
+ seed8 >>= sh2;
+
+ seed9 >>= sh3;
+ seed10 >>= sh3;
+ seed11 >>= sh3;
+ seed12 >>= sh3;
+
+ int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
+ int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
+ int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
+ int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
+
+ // Apply the saw
+ a &= 0x3F;
+ b &= 0x3F;
+ c &= 0x3F;
+ d &= 0x3F;
+
+ // Remove some of the components if we are to output < 4 partitions.
+ if (partition_count <= 3)
+ {
+ d = 0;
+ }
+
+ if (partition_count <= 2)
+ {
+ c = 0;
+ }
+
+ if (partition_count <= 1)
+ {
+ b = 0;
+ }
+
+ uint8_t partition;
+ if (a >= b && a >= c && a >= d)
+ {
+ partition = 0;
+ }
+ else if (b >= c && b >= d)
+ {
+ partition = 1;
+ }
+ else if (c >= d)
+ {
+ partition = 2;
+ }
+ else
+ {
+ partition = 3;
+ }
+
+ return partition;
+}
+
+/**
+ * @brief Generate a single partition info structure.
+ *
+ * @param[out] bsd The block size information.
+ * @param partition_count The partition count of this partitioning.
+ * @param partition_index The partition index / seed of this partitioning.
+ * @param partition_remap_index The remapped partition index of this partitioning.
+ * @param[out] pi The partition info structure to populate.
+ *
+ * @return True if this is a useful partition index, False if we can skip it.
+ */
+static bool generate_one_partition_info_entry(
+ block_size_descriptor& bsd,
+ unsigned int partition_count,
+ unsigned int partition_index,
+ unsigned int partition_remap_index,
+ partition_info& pi
+) {
+ int texels_per_block = bsd.texel_count;
+ bool small_block = texels_per_block < 32;
+
+ uint8_t *partition_of_texel = pi.partition_of_texel;
+
+ // Assign texels to partitions
+ int texel_idx = 0;
+ int counts[BLOCK_MAX_PARTITIONS] { 0 };
+ for (unsigned int z = 0; z < bsd.zdim; z++)
+ {
+ for (unsigned int y = 0; y < bsd.ydim; y++)
+ {
+ for (unsigned int x = 0; x < bsd.xdim; x++)
+ {
+ uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
+ pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
+ *partition_of_texel++ = part;
+ }
+ }
+ }
+
+ // Fill loop tail so we can overfetch later
+ for (unsigned int i = 0; i < partition_count; i++)
+ {
+ int ptex_count = counts[i];
+ int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
+ for (int j = ptex_count; j < ptex_count_simd; j++)
+ {
+ pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
+ }
+ }
+
+ // Populate the actual procedural partition count
+ if (counts[0] == 0)
+ {
+ pi.partition_count = 0;
+ }
+ else if (counts[1] == 0)
+ {
+ pi.partition_count = 1;
+ }
+ else if (counts[2] == 0)
+ {
+ pi.partition_count = 2;
+ }
+ else if (counts[3] == 0)
+ {
+ pi.partition_count = 3;
+ }
+ else
+ {
+ pi.partition_count = 4;
+ }
+
+ // Populate the partition index
+ pi.partition_index = static_cast<uint16_t>(partition_index);
+
+ // Populate the coverage bitmaps for 2/3/4 partitions
+ uint64_t* bitmaps { nullptr };
+ if (partition_count == 2)
+ {
+ bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
+ }
+ else if (partition_count == 3)
+ {
+ bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
+ }
+ else if (partition_count == 4)
+ {
+ bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
+ }
+
+ for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
+ {
+ pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
+ }
+
+ // Valid partitionings have texels in all of the requested partitions
+ bool valid = pi.partition_count == partition_count;
+
+ if (bitmaps)
+ {
+ // Populate the partition coverage bitmap
+ for (unsigned int i = 0; i < partition_count; i++)
+ {
+ bitmaps[i] = 0ULL;
+ }
+
+ unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
+ for (unsigned int i = 0; i < texels_to_process; i++)
+ {
+ unsigned int idx = bsd.kmeans_texels[i];
+ bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
+ }
+ }
+
+ return valid;
+}
+
+static void build_partition_table_for_one_partition_count(
+ block_size_descriptor& bsd,
+ bool can_omit_partitionings,
+ unsigned int partition_count_cutoff,
+ unsigned int partition_count,
+ partition_info* ptab,
+ uint64_t* canonical_patterns
+) {
+ unsigned int next_index = 0;
+ bsd.partitioning_count_selected[partition_count - 1] = 0;
+ bsd.partitioning_count_all[partition_count - 1] = 0;
+
+ // Skip tables larger than config max partition count if we can omit modes
+ if (can_omit_partitionings && (partition_count > partition_count_cutoff))
+ {
+ return;
+ }
+
+ // Iterate through twice
+ // - Pass 0: Keep selected partitionings
+ // - Pass 1: Keep non-selected partitionings (skip if in omit mode)
+ unsigned int max_iter = can_omit_partitionings ? 1 : 2;
+
+ // Tracker for things we built in the first iteration
+ uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
+ for (unsigned int x = 0; x < max_iter; x++)
+ {
+ for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
+ {
+ // Don't include things we built in the first pass
+ if ((x == 1) && build[i])
+ {
+ continue;
+ }
+
+ bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
+ if ((x == 0) && !keep_useful)
+ {
+ continue;
+ }
+
+ generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS);
+ bool keep_canonical = true;
+ for (unsigned int j = 0; j < next_index; j++)
+ {
+ bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns + j * BIT_PATTERN_WORDS);
+ if (match)
+ {
+ keep_canonical = false;
+ break;
+ }
+ }
+
+ if (keep_useful && keep_canonical)
+ {
+ if (x == 0)
+ {
+ bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
+ bsd.partitioning_count_selected[partition_count - 1]++;
+ bsd.partitioning_count_all[partition_count - 1]++;
+ build[i] = 1;
+ next_index++;
+ }
+ }
+ else
+ {
+ if (x == 1)
+ {
+ bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
+ bsd.partitioning_count_all[partition_count - 1]++;
+ next_index++;
+ }
+ }
+ }
+ }
+}
+
+/* See header for documentation. */
+void init_partition_tables(
+ block_size_descriptor& bsd,
+ bool can_omit_partitionings,
+ unsigned int partition_count_cutoff
+) {
+ partition_info* par_tab2 = bsd.partitionings;
+ partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
+ partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
+ partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
+
+ generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
+ bsd.partitioning_count_selected[0] = 1;
+ bsd.partitioning_count_all[0] = 1;
+
+ uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS];
+
+ build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
+ build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
+ build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
+
+ delete[] canonical_patterns;
+}