summaryrefslogtreecommitdiff
path: root/thirdparty/astcenc/astcenc_decompress_symbolic.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/astcenc/astcenc_decompress_symbolic.cpp')
-rw-r--r--thirdparty/astcenc/astcenc_decompress_symbolic.cpp623
1 files changed, 623 insertions, 0 deletions
diff --git a/thirdparty/astcenc/astcenc_decompress_symbolic.cpp b/thirdparty/astcenc/astcenc_decompress_symbolic.cpp
new file mode 100644
index 0000000000..39e5525c3b
--- /dev/null
+++ b/thirdparty/astcenc/astcenc_decompress_symbolic.cpp
@@ -0,0 +1,623 @@
+// SPDX-License-Identifier: Apache-2.0
+// ----------------------------------------------------------------------------
+// Copyright 2011-2023 Arm Limited
+//
+// Licensed under the Apache License, Version 2.0 (the "License"); you may not
+// use this file except in compliance with the License. You may obtain a copy
+// of the License at:
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+// License for the specific language governing permissions and limitations
+// under the License.
+// ----------------------------------------------------------------------------
+
+/**
+ * @brief Functions to decompress a symbolic block.
+ */
+
+#include "astcenc_internal.h"
+
+#include <stdio.h>
+#include <assert.h>
+
+/**
+ * @brief Compute the integer linear interpolation of two color endpoints.
+ *
+ * @param decode_mode The ASTC profile (linear or sRGB)
+ * @param color0 The endpoint0 color.
+ * @param color1 The endpoint1 color.
+ * @param weights The interpolation weight (between 0 and 64).
+ *
+ * @return The interpolated color.
+ */
+static vint4 lerp_color_int(
+ astcenc_profile decode_mode,
+ vint4 color0,
+ vint4 color1,
+ vint4 weights
+) {
+ vint4 weight1 = weights;
+ vint4 weight0 = vint4(64) - weight1;
+
+ if (decode_mode == ASTCENC_PRF_LDR_SRGB)
+ {
+ color0 = asr<8>(color0);
+ color1 = asr<8>(color1);
+ }
+
+ vint4 color = (color0 * weight0) + (color1 * weight1) + vint4(32);
+ color = asr<6>(color);
+
+ if (decode_mode == ASTCENC_PRF_LDR_SRGB)
+ {
+ color = color * vint4(257);
+ }
+
+ return color;
+}
+
+
+/**
+ * @brief Convert integer color value into a float value for the decoder.
+ *
+ * @param data The integer color value post-interpolation.
+ * @param lns_mask If set treat lane as HDR (LNS) else LDR (unorm16).
+ *
+ * @return The float color value.
+ */
+static inline vfloat4 decode_texel(
+ vint4 data,
+ vmask4 lns_mask
+) {
+ vint4 color_lns = vint4::zero();
+ vint4 color_unorm = vint4::zero();
+
+ if (any(lns_mask))
+ {
+ color_lns = lns_to_sf16(data);
+ }
+
+ if (!all(lns_mask))
+ {
+ color_unorm = unorm16_to_sf16(data);
+ }
+
+ // Pick components and then convert to FP16
+ vint4 datai = select(color_unorm, color_lns, lns_mask);
+ return float16_to_float(datai);
+}
+
+/* See header for documentation. */
+void unpack_weights(
+ const block_size_descriptor& bsd,
+ const symbolic_compressed_block& scb,
+ const decimation_info& di,
+ bool is_dual_plane,
+ int weights_plane1[BLOCK_MAX_TEXELS],
+ int weights_plane2[BLOCK_MAX_TEXELS]
+) {
+ // Safe to overshoot as all arrays are allocated to full size
+ if (!is_dual_plane)
+ {
+ // Build full 64-entry weight lookup table
+ vint4 tab0(reinterpret_cast<const int*>(scb.weights + 0));
+ vint4 tab1(reinterpret_cast<const int*>(scb.weights + 16));
+ vint4 tab2(reinterpret_cast<const int*>(scb.weights + 32));
+ vint4 tab3(reinterpret_cast<const int*>(scb.weights + 48));
+
+ vint tab0p, tab1p, tab2p, tab3p;
+ vtable_prepare(tab0, tab1, tab2, tab3, tab0p, tab1p, tab2p, tab3p);
+
+ for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
+ {
+ vint summed_value(8);
+ vint weight_count(di.texel_weight_count + i);
+ int max_weight_count = hmax(weight_count).lane<0>();
+
+ promise(max_weight_count > 0);
+ for (int j = 0; j < max_weight_count; j++)
+ {
+ vint texel_weights(di.texel_weights_tr[j] + i);
+ vint texel_weights_int(di.texel_weight_contribs_int_tr[j] + i);
+
+ summed_value += vtable_8bt_32bi(tab0p, tab1p, tab2p, tab3p, texel_weights) * texel_weights_int;
+ }
+
+ store(lsr<4>(summed_value), weights_plane1 + i);
+ }
+ }
+ else
+ {
+ // Build a 32-entry weight lookup table per plane
+ // Plane 1
+ vint4 tab0_plane1(reinterpret_cast<const int*>(scb.weights + 0));
+ vint4 tab1_plane1(reinterpret_cast<const int*>(scb.weights + 16));
+ vint tab0_plane1p, tab1_plane1p;
+ vtable_prepare(tab0_plane1, tab1_plane1, tab0_plane1p, tab1_plane1p);
+
+ // Plane 2
+ vint4 tab0_plane2(reinterpret_cast<const int*>(scb.weights + 32));
+ vint4 tab1_plane2(reinterpret_cast<const int*>(scb.weights + 48));
+ vint tab0_plane2p, tab1_plane2p;
+ vtable_prepare(tab0_plane2, tab1_plane2, tab0_plane2p, tab1_plane2p);
+
+ for (unsigned int i = 0; i < bsd.texel_count; i += ASTCENC_SIMD_WIDTH)
+ {
+ vint sum_plane1(8);
+ vint sum_plane2(8);
+
+ vint weight_count(di.texel_weight_count + i);
+ int max_weight_count = hmax(weight_count).lane<0>();
+
+ promise(max_weight_count > 0);
+ for (int j = 0; j < max_weight_count; j++)
+ {
+ vint texel_weights(di.texel_weights_tr[j] + i);
+ vint texel_weights_int(di.texel_weight_contribs_int_tr[j] + i);
+
+ sum_plane1 += vtable_8bt_32bi(tab0_plane1p, tab1_plane1p, texel_weights) * texel_weights_int;
+ sum_plane2 += vtable_8bt_32bi(tab0_plane2p, tab1_plane2p, texel_weights) * texel_weights_int;
+ }
+
+ store(lsr<4>(sum_plane1), weights_plane1 + i);
+ store(lsr<4>(sum_plane2), weights_plane2 + i);
+ }
+ }
+}
+
+/**
+ * @brief Return an FP32 NaN value for use in error colors.
+ *
+ * This NaN encoding will turn into 0xFFFF when converted to an FP16 NaN.
+ *
+ * @return The float color value.
+ */
+static float error_color_nan()
+{
+ if32 v;
+ v.u = 0xFFFFE000U;
+ return v.f;
+}
+
+/* See header for documentation. */
+void decompress_symbolic_block(
+ astcenc_profile decode_mode,
+ const block_size_descriptor& bsd,
+ int xpos,
+ int ypos,
+ int zpos,
+ const symbolic_compressed_block& scb,
+ image_block& blk
+) {
+ blk.xpos = xpos;
+ blk.ypos = ypos;
+ blk.zpos = zpos;
+
+ blk.data_min = vfloat4::zero();
+ blk.data_mean = vfloat4::zero();
+ blk.data_max = vfloat4::zero();
+ blk.grayscale = false;
+
+ // If we detected an error-block, blow up immediately.
+ if (scb.block_type == SYM_BTYPE_ERROR)
+ {
+ for (unsigned int i = 0; i < bsd.texel_count; i++)
+ {
+ blk.data_r[i] = error_color_nan();
+ blk.data_g[i] = error_color_nan();
+ blk.data_b[i] = error_color_nan();
+ blk.data_a[i] = error_color_nan();
+ blk.rgb_lns[i] = 0;
+ blk.alpha_lns[i] = 0;
+ }
+
+ return;
+ }
+
+ if ((scb.block_type == SYM_BTYPE_CONST_F16) ||
+ (scb.block_type == SYM_BTYPE_CONST_U16))
+ {
+ vfloat4 color;
+ uint8_t use_lns = 0;
+
+ // UNORM16 constant color block
+ if (scb.block_type == SYM_BTYPE_CONST_U16)
+ {
+ vint4 colori(scb.constant_color);
+
+ // For sRGB decoding a real decoder would just use the top 8 bits for color conversion.
+ // We don't color convert, so rescale the top 8 bits into the full 16 bit dynamic range.
+ if (decode_mode == ASTCENC_PRF_LDR_SRGB)
+ {
+ colori = asr<8>(colori) * 257;
+ }
+
+ vint4 colorf16 = unorm16_to_sf16(colori);
+ color = float16_to_float(colorf16);
+ }
+ // FLOAT16 constant color block
+ else
+ {
+ switch (decode_mode)
+ {
+ case ASTCENC_PRF_LDR_SRGB:
+ case ASTCENC_PRF_LDR:
+ color = vfloat4(error_color_nan());
+ break;
+ case ASTCENC_PRF_HDR_RGB_LDR_A:
+ case ASTCENC_PRF_HDR:
+ // Constant-color block; unpack from FP16 to FP32.
+ color = float16_to_float(vint4(scb.constant_color));
+ use_lns = 1;
+ break;
+ }
+ }
+
+ for (unsigned int i = 0; i < bsd.texel_count; i++)
+ {
+ blk.data_r[i] = color.lane<0>();
+ blk.data_g[i] = color.lane<1>();
+ blk.data_b[i] = color.lane<2>();
+ blk.data_a[i] = color.lane<3>();
+ blk.rgb_lns[i] = use_lns;
+ blk.alpha_lns[i] = use_lns;
+ }
+
+ return;
+ }
+
+ // Get the appropriate partition-table entry
+ int partition_count = scb.partition_count;
+ const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
+
+ // Get the appropriate block descriptors
+ const auto& bm = bsd.get_block_mode(scb.block_mode);
+ const auto& di = bsd.get_decimation_info(bm.decimation_mode);
+
+ bool is_dual_plane = static_cast<bool>(bm.is_dual_plane);
+
+ // Unquantize and undecimate the weights
+ int plane1_weights[BLOCK_MAX_TEXELS];
+ int plane2_weights[BLOCK_MAX_TEXELS];
+ unpack_weights(bsd, scb, di, is_dual_plane, plane1_weights, plane2_weights);
+
+ // Now that we have endpoint colors and weights, we can unpack texel colors
+ int plane2_component = scb.plane2_component;
+ vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_component);
+
+ for (int i = 0; i < partition_count; i++)
+ {
+ // Decode the color endpoints for this partition
+ vint4 ep0;
+ vint4 ep1;
+ bool rgb_lns;
+ bool a_lns;
+
+ unpack_color_endpoints(decode_mode,
+ scb.color_formats[i],
+ scb.color_values[i],
+ rgb_lns, a_lns,
+ ep0, ep1);
+
+ vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns);
+
+ int texel_count = pi.partition_texel_count[i];
+ for (int j = 0; j < texel_count; j++)
+ {
+ int tix = pi.texels_of_partition[i][j];
+ vint4 weight = select(vint4(plane1_weights[tix]), vint4(plane2_weights[tix]), plane2_mask);
+ vint4 color = lerp_color_int(decode_mode, ep0, ep1, weight);
+ vfloat4 colorf = decode_texel(color, lns_mask);
+
+ blk.data_r[tix] = colorf.lane<0>();
+ blk.data_g[tix] = colorf.lane<1>();
+ blk.data_b[tix] = colorf.lane<2>();
+ blk.data_a[tix] = colorf.lane<3>();
+ }
+ }
+}
+
+#if !defined(ASTCENC_DECOMPRESS_ONLY)
+
+/* See header for documentation. */
+float compute_symbolic_block_difference_2plane(
+ const astcenc_config& config,
+ const block_size_descriptor& bsd,
+ const symbolic_compressed_block& scb,
+ const image_block& blk
+) {
+ // If we detected an error-block, blow up immediately.
+ if (scb.block_type == SYM_BTYPE_ERROR)
+ {
+ return ERROR_CALC_DEFAULT;
+ }
+
+ assert(scb.block_mode >= 0);
+ assert(scb.partition_count == 1);
+ assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == 1);
+
+ // Get the appropriate block descriptor
+ const block_mode& bm = bsd.get_block_mode(scb.block_mode);
+ const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
+
+ // Unquantize and undecimate the weights
+ int plane1_weights[BLOCK_MAX_TEXELS];
+ int plane2_weights[BLOCK_MAX_TEXELS];
+ unpack_weights(bsd, scb, di, true, plane1_weights, plane2_weights);
+
+ vmask4 plane2_mask = vint4::lane_id() == vint4(scb.plane2_component);
+
+ vfloat4 summa = vfloat4::zero();
+
+ // Decode the color endpoints for this partition
+ vint4 ep0;
+ vint4 ep1;
+ bool rgb_lns;
+ bool a_lns;
+
+ unpack_color_endpoints(config.profile,
+ scb.color_formats[0],
+ scb.color_values[0],
+ rgb_lns, a_lns,
+ ep0, ep1);
+
+ // Unpack and compute error for each texel in the partition
+ unsigned int texel_count = bsd.texel_count;
+ for (unsigned int i = 0; i < texel_count; i++)
+ {
+ vint4 weight = select(vint4(plane1_weights[i]), vint4(plane2_weights[i]), plane2_mask);
+ vint4 colori = lerp_color_int(config.profile, ep0, ep1, weight);
+
+ vfloat4 color = int_to_float(colori);
+ vfloat4 oldColor = blk.texel(i);
+
+ // Compare error using a perceptual decode metric for RGBM textures
+ if (config.flags & ASTCENC_FLG_MAP_RGBM)
+ {
+ // Fail encodings that result in zero weight M pixels. Note that this can cause
+ // "interesting" artifacts if we reject all useful encodings - we typically get max
+ // brightness encodings instead which look just as bad. We recommend users apply a
+ // bias to their stored M value, limiting the lower value to 16 or 32 to avoid
+ // getting small M values post-quantization, but we can't prove it would never
+ // happen, especially at low bit rates ...
+ if (color.lane<3>() == 0.0f)
+ {
+ return -ERROR_CALC_DEFAULT;
+ }
+
+ // Compute error based on decoded RGBM color
+ color = vfloat4(
+ color.lane<0>() * color.lane<3>() * config.rgbm_m_scale,
+ color.lane<1>() * color.lane<3>() * config.rgbm_m_scale,
+ color.lane<2>() * color.lane<3>() * config.rgbm_m_scale,
+ 1.0f
+ );
+
+ oldColor = vfloat4(
+ oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale,
+ oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale,
+ oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale,
+ 1.0f
+ );
+ }
+
+ vfloat4 error = oldColor - color;
+ error = min(abs(error), 1e15f);
+ error = error * error;
+
+ summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);
+ }
+
+ return summa.lane<0>();
+}
+
+/* See header for documentation. */
+float compute_symbolic_block_difference_1plane(
+ const astcenc_config& config,
+ const block_size_descriptor& bsd,
+ const symbolic_compressed_block& scb,
+ const image_block& blk
+) {
+ assert(bsd.get_block_mode(scb.block_mode).is_dual_plane == 0);
+
+ // If we detected an error-block, blow up immediately.
+ if (scb.block_type == SYM_BTYPE_ERROR)
+ {
+ return ERROR_CALC_DEFAULT;
+ }
+
+ assert(scb.block_mode >= 0);
+
+ // Get the appropriate partition-table entry
+ unsigned int partition_count = scb.partition_count;
+ const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index);
+
+ // Get the appropriate block descriptor
+ const block_mode& bm = bsd.get_block_mode(scb.block_mode);
+ const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
+
+ // Unquantize and undecimate the weights
+ int plane1_weights[BLOCK_MAX_TEXELS];
+ unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
+
+ vfloat4 summa = vfloat4::zero();
+ for (unsigned int i = 0; i < partition_count; i++)
+ {
+ // Decode the color endpoints for this partition
+ vint4 ep0;
+ vint4 ep1;
+ bool rgb_lns;
+ bool a_lns;
+
+ unpack_color_endpoints(config.profile,
+ scb.color_formats[i],
+ scb.color_values[i],
+ rgb_lns, a_lns,
+ ep0, ep1);
+
+ // Unpack and compute error for each texel in the partition
+ unsigned int texel_count = pi.partition_texel_count[i];
+ for (unsigned int j = 0; j < texel_count; j++)
+ {
+ unsigned int tix = pi.texels_of_partition[i][j];
+ vint4 colori = lerp_color_int(config.profile, ep0, ep1,
+ vint4(plane1_weights[tix]));
+
+ vfloat4 color = int_to_float(colori);
+ vfloat4 oldColor = blk.texel(tix);
+
+ // Compare error using a perceptual decode metric for RGBM textures
+ if (config.flags & ASTCENC_FLG_MAP_RGBM)
+ {
+ // Fail encodings that result in zero weight M pixels. Note that this can cause
+ // "interesting" artifacts if we reject all useful encodings - we typically get max
+ // brightness encodings instead which look just as bad. We recommend users apply a
+ // bias to their stored M value, limiting the lower value to 16 or 32 to avoid
+ // getting small M values post-quantization, but we can't prove it would never
+ // happen, especially at low bit rates ...
+ if (color.lane<3>() == 0.0f)
+ {
+ return -ERROR_CALC_DEFAULT;
+ }
+
+ // Compute error based on decoded RGBM color
+ color = vfloat4(
+ color.lane<0>() * color.lane<3>() * config.rgbm_m_scale,
+ color.lane<1>() * color.lane<3>() * config.rgbm_m_scale,
+ color.lane<2>() * color.lane<3>() * config.rgbm_m_scale,
+ 1.0f
+ );
+
+ oldColor = vfloat4(
+ oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale,
+ oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale,
+ oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale,
+ 1.0f
+ );
+ }
+
+ vfloat4 error = oldColor - color;
+ error = min(abs(error), 1e15f);
+ error = error * error;
+
+ summa += min(dot(error, blk.channel_weight), ERROR_CALC_DEFAULT);
+ }
+ }
+
+ return summa.lane<0>();
+}
+
+/* See header for documentation. */
+float compute_symbolic_block_difference_1plane_1partition(
+ const astcenc_config& config,
+ const block_size_descriptor& bsd,
+ const symbolic_compressed_block& scb,
+ const image_block& blk
+) {
+ // If we detected an error-block, blow up immediately.
+ if (scb.block_type == SYM_BTYPE_ERROR)
+ {
+ return ERROR_CALC_DEFAULT;
+ }
+
+ assert(scb.block_mode >= 0);
+ assert(bsd.get_partition_info(scb.partition_count, scb.partition_index).partition_count == 1);
+
+ // Get the appropriate block descriptor
+ const block_mode& bm = bsd.get_block_mode(scb.block_mode);
+ const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode);
+
+ // Unquantize and undecimate the weights
+ alignas(ASTCENC_VECALIGN) int plane1_weights[BLOCK_MAX_TEXELS];
+ unpack_weights(bsd, scb, di, false, plane1_weights, nullptr);
+
+ // Decode the color endpoints for this partition
+ vint4 ep0;
+ vint4 ep1;
+ bool rgb_lns;
+ bool a_lns;
+
+ unpack_color_endpoints(config.profile,
+ scb.color_formats[0],
+ scb.color_values[0],
+ rgb_lns, a_lns,
+ ep0, ep1);
+
+
+ // Pre-shift sRGB so things round correctly
+ if (config.profile == ASTCENC_PRF_LDR_SRGB)
+ {
+ ep0 = asr<8>(ep0);
+ ep1 = asr<8>(ep1);
+ }
+
+ // Unpack and compute error for each texel in the partition
+ vfloatacc summav = vfloatacc::zero();
+
+ vint lane_id = vint::lane_id();
+ vint srgb_scale(config.profile == ASTCENC_PRF_LDR_SRGB ? 257 : 1);
+
+ unsigned int texel_count = bsd.texel_count;
+ for (unsigned int i = 0; i < texel_count; i += ASTCENC_SIMD_WIDTH)
+ {
+ // Compute EP1 contribution
+ vint weight1 = vint::loada(plane1_weights + i);
+ vint ep1_r = vint(ep1.lane<0>()) * weight1;
+ vint ep1_g = vint(ep1.lane<1>()) * weight1;
+ vint ep1_b = vint(ep1.lane<2>()) * weight1;
+ vint ep1_a = vint(ep1.lane<3>()) * weight1;
+
+ // Compute EP0 contribution
+ vint weight0 = vint(64) - weight1;
+ vint ep0_r = vint(ep0.lane<0>()) * weight0;
+ vint ep0_g = vint(ep0.lane<1>()) * weight0;
+ vint ep0_b = vint(ep0.lane<2>()) * weight0;
+ vint ep0_a = vint(ep0.lane<3>()) * weight0;
+
+ // Shift so things round correctly
+ vint colori_r = asr<6>(ep0_r + ep1_r + vint(32)) * srgb_scale;
+ vint colori_g = asr<6>(ep0_g + ep1_g + vint(32)) * srgb_scale;
+ vint colori_b = asr<6>(ep0_b + ep1_b + vint(32)) * srgb_scale;
+ vint colori_a = asr<6>(ep0_a + ep1_a + vint(32)) * srgb_scale;
+
+ // Compute color diff
+ vfloat color_r = int_to_float(colori_r);
+ vfloat color_g = int_to_float(colori_g);
+ vfloat color_b = int_to_float(colori_b);
+ vfloat color_a = int_to_float(colori_a);
+
+ vfloat color_orig_r = loada(blk.data_r + i);
+ vfloat color_orig_g = loada(blk.data_g + i);
+ vfloat color_orig_b = loada(blk.data_b + i);
+ vfloat color_orig_a = loada(blk.data_a + i);
+
+ vfloat color_error_r = min(abs(color_orig_r - color_r), vfloat(1e15f));
+ vfloat color_error_g = min(abs(color_orig_g - color_g), vfloat(1e15f));
+ vfloat color_error_b = min(abs(color_orig_b - color_b), vfloat(1e15f));
+ vfloat color_error_a = min(abs(color_orig_a - color_a), vfloat(1e15f));
+
+ // Compute squared error metric
+ color_error_r = color_error_r * color_error_r;
+ color_error_g = color_error_g * color_error_g;
+ color_error_b = color_error_b * color_error_b;
+ color_error_a = color_error_a * color_error_a;
+
+ vfloat metric = color_error_r * blk.channel_weight.lane<0>()
+ + color_error_g * blk.channel_weight.lane<1>()
+ + color_error_b * blk.channel_weight.lane<2>()
+ + color_error_a * blk.channel_weight.lane<3>();
+
+ // Mask off bad lanes
+ vmask mask = lane_id < vint(texel_count);
+ lane_id += vint(ASTCENC_SIMD_WIDTH);
+ haccumulate(summav, metric, mask);
+ }
+
+ return hadd_s(summav);
+}
+
+#endif