diff options
Diffstat (limited to 'thirdparty')
160 files changed, 16674 insertions, 9171 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md index 6333a0fe87..f467d6a64b 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -52,13 +52,13 @@ Includes some patches in the `patches` folder which have been sent upstream. ## cvtt -- Upstream: https://github.com/elasota/cvtt -- Version: 1.0.0-beta4 (cc8472a04ba110fe999c686d07af40f7839051fd, 2018) +- Upstream: https://github.com/elasota/ConvectionKernels +- Version: git (dc2dbbe0ae2cf2be06ef56d1021e2222a56c7fe2, 2021) - License: MIT Files extracted from upstream source: -- all .cpp, .h, and .txt files in ConvectionKernels/ +- all .cpp, .h, and .txt files except the folders MakeTables and etc2packer. ## doctest @@ -206,7 +206,7 @@ Files extracted from upstream source: ## harfbuzz - Upstream: https://github.com/harfbuzz/harfbuzz -- Version: 3.2.0 (be91d2917d9860326cb5fd1d03ffe1042a72f6d3, 2021) +- Version: 3.3.2 (ac46c3248e8b0316235943175c4d4a11c24dd4a9, 2022) - License: MIT Files extracted from upstream source: @@ -309,7 +309,7 @@ Files extracted from upstream source: ## libwebp - Upstream: https://chromium.googlesource.com/webm/libwebp/ -- Version: 1.2.1 (9ce5843dbabcfd3f7c39ec7ceba9cbeb213cbfdf, 2021) +- Version: 1.2.2 (b0a860891dcd4c0c2d7c6149e5cccb6eb881cc21, 2022) - License: BSD-3-Clause Files extracted from upstream source: @@ -317,10 +317,6 @@ Files extracted from upstream source: - `src/*` except from: `.am`, `.rc` and `.in` files - `AUTHORS`, `COPYING`, `PATENTS` -Important: The files `utils/bit_reader_utils.{c,h}` have Godot-made -changes to ensure they build for Javascript/HTML5. Those -changes are marked with `// -- GODOT --` comments. - ## mbedtls @@ -469,7 +465,7 @@ Collection of single-file libraries used in Godot components. ## msdfgen - Upstream: https://github.com/Chlumsky/msdfgen -- Version: 1.9.1 (1b3b6b985094e6f12751177490add3ad11dd91a9, 2010) +- Version: 1.9.2 (64a91eec3ca3787e6f78b4c99fcd3052ad3e37c0, 2021) - License: MIT Files extracted from the upstream source: @@ -606,7 +602,7 @@ instead of `miniz.h` as an external dependency. ## thorvg - Upstream: https://github.com/Samsung/thorvg -- Version: 0.7.0 (e527f565b770f0a41df821e6618ccaeea94f465e, 2021) +- Version: 0.7.1 (d53eb2a880002cb770ace1c1ace9c5dfcfc28252, 2022) - License: MIT Files extracted from upstream source: @@ -614,8 +610,6 @@ Files extracted from upstream source: See `thorvg/update-thorvg.sh` for extraction instructions. Set the version number and run the script. -Patches in the `patches` directory should be re-applied after updates. - ## vhacd diff --git a/thirdparty/cvtt/ConvectionKernels.cpp b/thirdparty/cvtt/ConvectionKernels.cpp deleted file mode 100644 index 8d379344e1..0000000000 --- a/thirdparty/cvtt/ConvectionKernels.cpp +++ /dev/null @@ -1,7586 +0,0 @@ -/* -Convection Texture Tools -Copyright (c) 2018 Eric Lasota - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject -to the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -------------------------------------------------------------------------------------- - -Portions based on DirectX Texture Library (DirectXTex) - -Copyright (c) Microsoft Corporation. All rights reserved. -Licensed under the MIT License. - -http://go.microsoft.com/fwlink/?LinkId=248926 -*/ -#include "ConvectionKernels.h" -#include "ConvectionKernels_BC7_SingleColor.h" - -#if (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(_M_X64) || defined(__SSE2__) -#define CVTT_USE_SSE2 -#endif - -#ifdef CVTT_USE_SSE2 -#include <emmintrin.h> -#endif - -#include <float.h> -#include <assert.h> -#include <string.h> -#include <algorithm> -#include <math.h> - -#define UNREFERENCED_PARAMETER(n) ((void)n) - -namespace cvtt -{ -#ifdef CVTT_USE_SSE2 - // SSE2 version - struct ParallelMath - { - typedef uint16_t ScalarUInt16; - typedef int16_t ScalarSInt16; - - template<unsigned int TRoundingMode> - struct RoundForScope - { - unsigned int m_oldCSR; - - RoundForScope() - { - m_oldCSR = _mm_getcsr(); - _mm_setcsr((m_oldCSR & ~_MM_ROUND_MASK) | (TRoundingMode)); - } - - ~RoundForScope() - { - _mm_setcsr(m_oldCSR); - } - }; - - struct RoundTowardZeroForScope : RoundForScope<_MM_ROUND_TOWARD_ZERO> - { - }; - - struct RoundTowardNearestForScope : RoundForScope<_MM_ROUND_NEAREST> - { - }; - - struct RoundUpForScope : RoundForScope<_MM_ROUND_UP> - { - }; - - struct RoundDownForScope : RoundForScope<_MM_ROUND_DOWN> - { - }; - - static const int ParallelSize = 8; - - enum Int16Subtype - { - IntSubtype_Signed, - IntSubtype_UnsignedFull, - IntSubtype_UnsignedTruncated, - IntSubtype_Abstract, - }; - - template<int TSubtype> - struct VInt16 - { - __m128i m_value; - - inline VInt16 operator+(int16_t other) const - { - VInt16 result; - result.m_value = _mm_add_epi16(m_value, _mm_set1_epi16(static_cast<int16_t>(other))); - return result; - } - - inline VInt16 operator+(const VInt16 &other) const - { - VInt16 result; - result.m_value = _mm_add_epi16(m_value, other.m_value); - return result; - } - - inline VInt16 operator|(const VInt16 &other) const - { - VInt16 result; - result.m_value = _mm_or_si128(m_value, other.m_value); - return result; - } - - inline VInt16 operator&(const VInt16 &other) const - { - VInt16 result; - result.m_value = _mm_and_si128(m_value, other.m_value); - return result; - } - - inline VInt16 operator-(const VInt16 &other) const - { - VInt16 result; - result.m_value = _mm_sub_epi16(m_value, other.m_value); - return result; - } - - inline VInt16 operator<<(int bits) const - { - VInt16 result; - result.m_value = _mm_slli_epi16(m_value, bits); - return result; - } - }; - - typedef VInt16<IntSubtype_Signed> SInt16; - typedef VInt16<IntSubtype_UnsignedFull> UInt16; - typedef VInt16<IntSubtype_UnsignedTruncated> UInt15; - typedef VInt16<IntSubtype_Abstract> AInt16; - - template<int TSubtype> - struct VInt32 - { - __m128i m_values[2]; - - inline VInt32 operator+(const VInt32& other) const - { - VInt32 result; - result.m_values[0] = _mm_add_epi32(m_values[0], other.m_values[0]); - result.m_values[1] = _mm_add_epi32(m_values[1], other.m_values[1]); - return result; - } - - inline VInt32 operator-(const VInt32& other) const - { - VInt32 result; - result.m_values[0] = _mm_sub_epi32(m_values[0], other.m_values[0]); - result.m_values[1] = _mm_sub_epi32(m_values[1], other.m_values[1]); - return result; - } - - inline VInt32 operator<<(const int other) const - { - VInt32 result; - result.m_values[0] = _mm_slli_epi32(m_values[0], other); - result.m_values[1] = _mm_slli_epi32(m_values[1], other); - return result; - } - }; - - typedef VInt32<IntSubtype_Signed> SInt32; - typedef VInt32<IntSubtype_UnsignedTruncated> UInt31; - typedef VInt32<IntSubtype_UnsignedFull> UInt32; - typedef VInt32<IntSubtype_Abstract> AInt32; - - template<class TTargetType> - struct LosslessCast - { -#ifdef CVTT_PERMIT_ALIASING - template<int TSrcSubtype> - static const TTargetType& Cast(const VInt32<TSrcSubtype> &src) - { - return reinterpret_cast<VInt32<TSubtype>&>(src); - } - - template<int TSrcSubtype> - static const TTargetType& Cast(const VInt16<TSrcSubtype> &src) - { - return reinterpret_cast<VInt16<TSubtype>&>(src); - } -#else - template<int TSrcSubtype> - static TTargetType Cast(const VInt32<TSrcSubtype> &src) - { - TTargetType result; - result.m_values[0] = src.m_values[0]; - result.m_values[1] = src.m_values[1]; - return result; - } - - template<int TSrcSubtype> - static TTargetType Cast(const VInt16<TSrcSubtype> &src) - { - TTargetType result; - result.m_value = src.m_value; - return result; - } -#endif - }; - - struct Int64 - { - __m128i m_values[4]; - }; - - struct Float - { - __m128 m_values[2]; - - inline Float operator+(const Float &other) const - { - Float result; - result.m_values[0] = _mm_add_ps(m_values[0], other.m_values[0]); - result.m_values[1] = _mm_add_ps(m_values[1], other.m_values[1]); - return result; - } - - inline Float operator+(float other) const - { - Float result; - result.m_values[0] = _mm_add_ps(m_values[0], _mm_set1_ps(other)); - result.m_values[1] = _mm_add_ps(m_values[1], _mm_set1_ps(other)); - return result; - } - - inline Float operator-(const Float& other) const - { - Float result; - result.m_values[0] = _mm_sub_ps(m_values[0], other.m_values[0]); - result.m_values[1] = _mm_sub_ps(m_values[1], other.m_values[1]); - return result; - } - - inline Float operator-() const - { - Float result; - result.m_values[0] = _mm_sub_ps(_mm_setzero_ps(), m_values[0]); - result.m_values[1] = _mm_sub_ps(_mm_setzero_ps(), m_values[1]); - return result; - } - - inline Float operator*(const Float& other) const - { - Float result; - result.m_values[0] = _mm_mul_ps(m_values[0], other.m_values[0]); - result.m_values[1] = _mm_mul_ps(m_values[1], other.m_values[1]); - return result; - } - - inline Float operator*(float other) const - { - Float result; - result.m_values[0] = _mm_mul_ps(m_values[0], _mm_set1_ps(other)); - result.m_values[1] = _mm_mul_ps(m_values[1], _mm_set1_ps(other)); - return result; - } - - inline Float operator/(const Float &other) const - { - Float result; - result.m_values[0] = _mm_div_ps(m_values[0], other.m_values[0]); - result.m_values[1] = _mm_div_ps(m_values[1], other.m_values[1]); - return result; - } - - inline Float operator/(float other) const - { - Float result; - result.m_values[0] = _mm_div_ps(m_values[0], _mm_set1_ps(other)); - result.m_values[1] = _mm_div_ps(m_values[1], _mm_set1_ps(other)); - return result; - } - }; - - struct Int16CompFlag - { - __m128i m_value; - - inline Int16CompFlag operator&(const Int16CompFlag &other) const - { - Int16CompFlag result; - result.m_value = _mm_and_si128(m_value, other.m_value); - return result; - } - - inline Int16CompFlag operator|(const Int16CompFlag &other) const - { - Int16CompFlag result; - result.m_value = _mm_or_si128(m_value, other.m_value); - return result; - } - }; - - struct FloatCompFlag - { - __m128 m_values[2]; - }; - - template<int TSubtype> - static VInt16<TSubtype> AbstractAdd(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b) - { - VInt16<TSubtype> result; - result.m_value = _mm_add_epi16(a.m_value, b.m_value); - return result; - } - - template<int TSubtype> - static VInt16<TSubtype> AbstractSubtract(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b) - { - VInt16<TSubtype> result; - result.m_value = _mm_sub_epi16(a.m_value, b.m_value); - return result; - } - - static Float Select(const FloatCompFlag &flag, const Float &a, const Float &b) - { - Float result; - for (int i = 0; i < 2; i++) - result.m_values[i] = _mm_or_ps(_mm_and_ps(flag.m_values[i], a.m_values[i]), _mm_andnot_ps(flag.m_values[i], b.m_values[i])); - return result; - } - - template<int TSubtype> - static VInt16<TSubtype> Select(const Int16CompFlag &flag, const VInt16<TSubtype> &a, const VInt16<TSubtype> &b) - { - VInt16<TSubtype> result; - result.m_value = _mm_or_si128(_mm_and_si128(flag.m_value, a.m_value), _mm_andnot_si128(flag.m_value, b.m_value)); - return result; - } - - template<int TSubtype> - static VInt16<TSubtype> SelectOrZero(const Int16CompFlag &flag, const VInt16<TSubtype> &a) - { - VInt16<TSubtype> result; - result.m_value = _mm_and_si128(flag.m_value, a.m_value); - return result; - } - - template<int TSubtype> - static void ConditionalSet(VInt16<TSubtype> &dest, const Int16CompFlag &flag, const VInt16<TSubtype> &src) - { - dest.m_value = _mm_or_si128(_mm_andnot_si128(flag.m_value, dest.m_value), _mm_and_si128(flag.m_value, src.m_value)); - } - - static SInt16 ConditionalNegate(const Int16CompFlag &flag, const SInt16 &v) - { - SInt16 result; - result.m_value = _mm_add_epi16(_mm_xor_si128(flag.m_value, v.m_value), _mm_srli_epi16(flag.m_value, 15)); - return result; - } - - template<int TSubtype> - static void NotConditionalSet(VInt16<TSubtype> &dest, const Int16CompFlag &flag, const VInt16<TSubtype> &src) - { - dest.m_value = _mm_or_si128(_mm_and_si128(flag.m_value, dest.m_value), _mm_andnot_si128(flag.m_value, src.m_value)); - } - - static void ConditionalSet(Float &dest, const FloatCompFlag &flag, const Float &src) - { - for (int i = 0; i < 2; i++) - dest.m_values[i] = _mm_or_ps(_mm_andnot_ps(flag.m_values[i], dest.m_values[i]), _mm_and_ps(flag.m_values[i], src.m_values[i])); - } - - static void NotConditionalSet(Float &dest, const FloatCompFlag &flag, const Float &src) - { - for (int i = 0; i < 2; i++) - dest.m_values[i] = _mm_or_ps(_mm_and_ps(flag.m_values[i], dest.m_values[i]), _mm_andnot_ps(flag.m_values[i], src.m_values[i])); - } - - static void MakeSafeDenominator(Float& v) - { - ConditionalSet(v, Equal(v, MakeFloatZero()), MakeFloat(1.0f)); - } - - static SInt16 TruncateToPrecisionSigned(const SInt16 &v, int precision) - { - int lostBits = 16 - precision; - if (lostBits == 0) - return v; - - SInt16 result; - result.m_value = _mm_srai_epi16(_mm_slli_epi16(v.m_value, lostBits), lostBits); - return result; - } - - static UInt16 TruncateToPrecisionUnsigned(const UInt16 &v, int precision) - { - int lostBits = 16 - precision; - if (lostBits == 0) - return v; - - UInt16 result; - result.m_value = _mm_srli_epi16(_mm_slli_epi16(v.m_value, lostBits), lostBits); - return result; - } - - static UInt16 Min(const UInt16 &a, const UInt16 &b) - { - __m128i bitFlip = _mm_set1_epi16(-32768); - - UInt16 result; - result.m_value = _mm_xor_si128(_mm_min_epi16(_mm_xor_si128(a.m_value, bitFlip), _mm_xor_si128(b.m_value, bitFlip)), bitFlip); - return result; - } - - static SInt16 Min(const SInt16 &a, const SInt16 &b) - { - SInt16 result; - result.m_value = _mm_min_epi16(a.m_value, b.m_value); - return result; - } - - static UInt15 Min(const UInt15 &a, const UInt15 &b) - { - UInt15 result; - result.m_value = _mm_min_epi16(a.m_value, b.m_value); - return result; - } - - static Float Min(const Float &a, const Float &b) - { - Float result; - for (int i = 0; i < 2; i++) - result.m_values[i] = _mm_min_ps(a.m_values[i], b.m_values[i]); - return result; - } - - static UInt16 Max(const UInt16 &a, const UInt16 &b) - { - __m128i bitFlip = _mm_set1_epi16(-32768); - - UInt16 result; - result.m_value = _mm_xor_si128(_mm_max_epi16(_mm_xor_si128(a.m_value, bitFlip), _mm_xor_si128(b.m_value, bitFlip)), bitFlip); - return result; - } - - static SInt16 Max(const SInt16 &a, const SInt16 &b) - { - SInt16 result; - result.m_value = _mm_max_epi16(a.m_value, b.m_value); - return result; - } - - static UInt15 Max(const UInt15 &a, const UInt15 &b) - { - UInt15 result; - result.m_value = _mm_max_epi16(a.m_value, b.m_value); - return result; - } - - static Float Max(const Float &a, const Float &b) - { - Float result; - for (int i = 0; i < 2; i++) - result.m_values[i] = _mm_max_ps(a.m_values[i], b.m_values[i]); - return result; - } - - static Float Clamp(const Float &v, float min, float max) - { - Float result; - for (int i = 0; i < 2; i++) - result.m_values[i] = _mm_max_ps(_mm_min_ps(v.m_values[i], _mm_set1_ps(max)), _mm_set1_ps(min)); - return result; - } - - static Float Reciprocal(const Float &v) - { - Float result; - for (int i = 0; i < 2; i++) - result.m_values[i] = _mm_rcp_ps(v.m_values[i]); - return result; - } - - static void ConvertLDRInputs(const PixelBlockU8* inputBlocks, int pxOffset, int channel, UInt15 &chOut) - { - int16_t values[8]; - for (int i = 0; i < 8; i++) - values[i] = inputBlocks[i].m_pixels[pxOffset][channel]; - - chOut.m_value = _mm_set_epi16(values[7], values[6], values[5], values[4], values[3], values[2], values[1], values[0]); - } - - static void ConvertHDRInputs(const PixelBlockF16* inputBlocks, int pxOffset, int channel, SInt16 &chOut) - { - int16_t values[8]; - for (int i = 0; i < 8; i++) - values[i] = inputBlocks[i].m_pixels[pxOffset][channel]; - - chOut.m_value = _mm_set_epi16(values[7], values[6], values[5], values[4], values[3], values[2], values[1], values[0]); - } - - static Float MakeFloat(float v) - { - Float f; - f.m_values[0] = f.m_values[1] = _mm_set1_ps(v); - return f; - } - - static Float MakeFloatZero() - { - Float f; - f.m_values[0] = f.m_values[1] = _mm_setzero_ps(); - return f; - } - - static UInt16 MakeUInt16(uint16_t v) - { - UInt16 result; - result.m_value = _mm_set1_epi16(static_cast<short>(v)); - return result; - } - - static SInt16 MakeSInt16(int16_t v) - { - SInt16 result; - result.m_value = _mm_set1_epi16(static_cast<short>(v)); - return result; - } - - static AInt16 MakeAInt16(int16_t v) - { - AInt16 result; - result.m_value = _mm_set1_epi16(static_cast<short>(v)); - return result; - } - - static UInt15 MakeUInt15(uint16_t v) - { - UInt15 result; - result.m_value = _mm_set1_epi16(static_cast<short>(v)); - return result; - } - - static SInt32 MakeSInt32(int32_t v) - { - SInt32 result; - result.m_values[0] = _mm_set1_epi32(v); - result.m_values[1] = _mm_set1_epi32(v); - return result; - } - - static UInt31 MakeUInt31(uint32_t v) - { - UInt31 result; - result.m_values[0] = _mm_set1_epi32(v); - result.m_values[1] = _mm_set1_epi32(v); - return result; - } - - static uint16_t Extract(const UInt16 &v, int offset) - { - return reinterpret_cast<const uint16_t*>(&v.m_value)[offset]; - } - - static int16_t Extract(const SInt16 &v, int offset) - { - return reinterpret_cast<const int16_t*>(&v.m_value)[offset]; - } - - static uint16_t Extract(const UInt15 &v, int offset) - { - return reinterpret_cast<const uint16_t*>(&v.m_value)[offset]; - } - - static int16_t Extract(const AInt16 &v, int offset) - { - return reinterpret_cast<const int16_t*>(&v.m_value)[offset]; - } - - static void PutUInt16(UInt16 &dest, int offset, uint16_t v) - { - reinterpret_cast<uint16_t*>(&dest)[offset] = v; - } - - static void PutUInt15(UInt15 &dest, int offset, uint16_t v) - { - reinterpret_cast<uint16_t*>(&dest)[offset] = v; - } - - static void PutSInt16(SInt16 &dest, int offset, int16_t v) - { - reinterpret_cast<int16_t*>(&dest)[offset] = v; - } - - static float ExtractFloat(const Float& v, int offset) - { - return reinterpret_cast<const float*>(&v)[offset]; - } - - static void PutFloat(Float &dest, int offset, float v) - { - reinterpret_cast<float*>(&dest)[offset] = v; - } - - static Int16CompFlag Less(const SInt16 &a, const SInt16 &b) - { - Int16CompFlag result; - result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value); - return result; - } - - static Int16CompFlag Less(const UInt15 &a, const UInt15 &b) - { - Int16CompFlag result; - result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value); - return result; - } - - static Int16CompFlag LessOrEqual(const UInt15 &a, const UInt15 &b) - { - Int16CompFlag result; - result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value); - return result; - } - - static FloatCompFlag Less(const Float &a, const Float &b) - { - FloatCompFlag result; - for (int i = 0; i < 2; i++) - result.m_values[i] = _mm_cmplt_ps(a.m_values[i], b.m_values[i]); - return result; - } - - static FloatCompFlag LessOrEqual(const Float &a, const Float &b) - { - FloatCompFlag result; - for (int i = 0; i < 2; i++) - result.m_values[i] = _mm_cmple_ps(a.m_values[i], b.m_values[i]); - return result; - } - - template<int TSubtype> - static Int16CompFlag Equal(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b) - { - Int16CompFlag result; - result.m_value = _mm_cmpeq_epi16(a.m_value, b.m_value); - return result; - } - - static FloatCompFlag Equal(const Float &a, const Float &b) - { - FloatCompFlag result; - for (int i = 0; i < 2; i++) - result.m_values[i] = _mm_cmpeq_ps(a.m_values[i], b.m_values[i]); - return result; - } - - static Float ToFloat(const UInt16 &v) - { - Float result; - result.m_values[0] = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v.m_value, _mm_setzero_si128())); - result.m_values[1] = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v.m_value, _mm_setzero_si128())); - return result; - } - - static UInt31 ToUInt31(const UInt16 &v) - { - UInt31 result; - result.m_values[0] = _mm_unpacklo_epi16(v.m_value, _mm_setzero_si128()); - result.m_values[1] = _mm_unpackhi_epi16(v.m_value, _mm_setzero_si128()); - return result; - } - - static SInt32 ToInt32(const UInt16 &v) - { - SInt32 result; - result.m_values[0] = _mm_unpacklo_epi16(v.m_value, _mm_setzero_si128()); - result.m_values[1] = _mm_unpackhi_epi16(v.m_value, _mm_setzero_si128()); - return result; - } - - static SInt32 ToInt32(const SInt16 &v) - { - SInt32 result; - result.m_values[0] = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), v.m_value), 16); - result.m_values[1] = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), v.m_value), 16); - return result; - } - - static Float ToFloat(const SInt16 &v) - { - Float result; - result.m_values[0] = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), v.m_value), 16)); - result.m_values[1] = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), v.m_value), 16)); - return result; - } - - static Float ToFloat(const UInt15 &v) - { - Float result; - result.m_values[0] = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v.m_value, _mm_setzero_si128())); - result.m_values[1] = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v.m_value, _mm_setzero_si128())); - return result; - } - - static Float ToFloat(const UInt31 &v) - { - Float result; - result.m_values[0] = _mm_cvtepi32_ps(v.m_values[0]); - result.m_values[1] = _mm_cvtepi32_ps(v.m_values[1]); - return result; - } - - static Int16CompFlag FloatFlagToInt16(const FloatCompFlag &v) - { - __m128i lo = _mm_castps_si128(v.m_values[0]); - __m128i hi = _mm_castps_si128(v.m_values[1]); - - Int16CompFlag result; - result.m_value = _mm_packs_epi32(lo, hi); - return result; - } - - static FloatCompFlag Int16FlagToFloat(const Int16CompFlag &v) - { - __m128i lo = _mm_unpacklo_epi16(v.m_value, v.m_value); - __m128i hi = _mm_unpackhi_epi16(v.m_value, v.m_value); - - FloatCompFlag result; - result.m_values[0] = _mm_castsi128_ps(lo); - result.m_values[1] = _mm_castsi128_ps(hi); - return result; - } - - static Int16CompFlag MakeBoolInt16(bool b) - { - Int16CompFlag result; - if (b) - result.m_value = _mm_set1_epi16(-1); - else - result.m_value = _mm_setzero_si128(); - return result; - } - - static FloatCompFlag MakeBoolFloat(bool b) - { - FloatCompFlag result; - if (b) - result.m_values[0] = result.m_values[1] = _mm_castsi128_ps(_mm_set1_epi32(-1)); - else - result.m_values[0] = result.m_values[1] = _mm_setzero_ps(); - return result; - } - - static Int16CompFlag AndNot(const Int16CompFlag &a, const Int16CompFlag &b) - { - Int16CompFlag result; - result.m_value = _mm_andnot_si128(b.m_value, a.m_value); - return result; - } - - static UInt16 RoundAndConvertToU16(const Float &v, const void* /*roundingMode*/) - { - __m128i lo = _mm_cvtps_epi32(_mm_add_ps(v.m_values[0], _mm_set1_ps(-32768))); - __m128i hi = _mm_cvtps_epi32(_mm_add_ps(v.m_values[1], _mm_set1_ps(-32768))); - - __m128i packed = _mm_packs_epi32(lo, hi); - - UInt16 result; - result.m_value = _mm_xor_si128(packed, _mm_set1_epi16(-32768)); - return result; - } - - static UInt15 RoundAndConvertToU15(const Float &v, const void* /*roundingMode*/) - { - __m128i lo = _mm_cvtps_epi32(v.m_values[0]); - __m128i hi = _mm_cvtps_epi32(v.m_values[1]); - - __m128i packed = _mm_packs_epi32(lo, hi); - - UInt15 result; - result.m_value = _mm_packs_epi32(lo, hi); - return result; - } - - static SInt16 RoundAndConvertToS16(const Float &v, const void* /*roundingMode*/) - { - __m128i lo = _mm_cvtps_epi32(v.m_values[0]); - __m128i hi = _mm_cvtps_epi32(v.m_values[1]); - - __m128i packed = _mm_packs_epi32(lo, hi); - - SInt16 result; - result.m_value = _mm_packs_epi32(lo, hi); - return result; - } - - static Float Sqrt(const Float &f) - { - Float result; - for (int i = 0; i < 2; i++) - result.m_values[i] = _mm_sqrt_ps(f.m_values[i]); - return result; - } - - static UInt16 Abs(const SInt16 &a) - { - __m128i signBitsXor = _mm_srai_epi16(a.m_value, 15); - __m128i signBitsAdd = _mm_srli_epi16(a.m_value, 15); - - UInt16 result; - result.m_value = _mm_add_epi16(_mm_xor_si128(a.m_value, signBitsXor), signBitsAdd); - return result; - } - - static Float Abs(const Float& a) - { - __m128 invMask = _mm_set1_ps(-0.0f); - - Float result; - result.m_values[0] = _mm_andnot_ps(invMask, a.m_values[0]); - result.m_values[1] = _mm_andnot_ps(invMask, a.m_values[1]); - return result; - } - - static UInt16 SqDiffUInt8(const UInt15 &a, const UInt15 &b) - { - __m128i diff = _mm_sub_epi16(a.m_value, b.m_value); - - UInt16 result; - result.m_value = _mm_mullo_epi16(diff, diff); - return result; - } - - static Float SqDiffSInt16(const SInt16 &a, const SInt16 &b) - { - __m128i diffU = _mm_sub_epi16(_mm_max_epi16(a.m_value, b.m_value), _mm_min_epi16(a.m_value, b.m_value)); - - __m128i mulHi = _mm_mulhi_epu16(diffU, diffU); - __m128i mulLo = _mm_mullo_epi16(diffU, diffU); - __m128i sqDiffHi = _mm_unpackhi_epi16(mulLo, mulHi); - __m128i sqDiffLo = _mm_unpacklo_epi16(mulLo, mulHi); - - Float result; - result.m_values[0] = _mm_cvtepi32_ps(sqDiffLo); - result.m_values[1] = _mm_cvtepi32_ps(sqDiffHi); - - return result; - } - - static Float TwosCLHalfToFloat(const SInt16 &v) - { - __m128i absV = _mm_add_epi16(_mm_xor_si128(v.m_value, _mm_srai_epi16(v.m_value, 15)), _mm_srli_epi16(v.m_value, 15)); - - __m128i signBits = _mm_and_si128(v.m_value, _mm_set1_epi16(-32768)); - __m128i mantissa = _mm_and_si128(v.m_value, _mm_set1_epi16(0x03ff)); - __m128i exponent = _mm_and_si128(v.m_value, _mm_set1_epi16(0x7c00)); - - __m128i isDenormal = _mm_cmpeq_epi16(exponent, _mm_setzero_si128()); - - // Convert exponent to high-bits - exponent = _mm_add_epi16(_mm_srli_epi16(exponent, 3), _mm_set1_epi16(14336)); - - __m128i denormalCorrectionHigh = _mm_and_si128(isDenormal, _mm_or_si128(signBits, _mm_set1_epi16(14336))); - - __m128i highBits = _mm_or_si128(signBits, _mm_or_si128(exponent, _mm_srli_epi16(mantissa, 3))); - __m128i lowBits = _mm_slli_epi16(mantissa, 13); - - __m128i flow = _mm_unpacklo_epi16(lowBits, highBits); - __m128i fhigh = _mm_unpackhi_epi16(lowBits, highBits); - - __m128i correctionLow = _mm_unpacklo_epi16(_mm_setzero_si128(), denormalCorrectionHigh); - __m128i correctionHigh = _mm_unpackhi_epi16(_mm_setzero_si128(), denormalCorrectionHigh); - - Float result; - result.m_values[0] = _mm_sub_ps(_mm_castsi128_ps(flow), _mm_castsi128_ps(correctionLow)); - result.m_values[1] = _mm_sub_ps(_mm_castsi128_ps(fhigh), _mm_castsi128_ps(correctionHigh)); - - return result; - } - - static Float SqDiff2CLFloat(const SInt16 &a, const Float &b) - { - Float fa = TwosCLHalfToFloat(a); - - Float diff = fa - b; - return diff * diff; - } - - static Float SqDiff2CL(const SInt16 &a, const SInt16 &b) - { - Float fa = TwosCLHalfToFloat(a); - Float fb = TwosCLHalfToFloat(b); - - Float diff = fa - fb; - return diff * diff; - } - - static Float SqDiff2CLFloat(const SInt16 &a, float aWeight, const Float &b) - { - Float fa = TwosCLHalfToFloat(a) * aWeight; - - Float diff = fa - b; - return diff * diff; - } - - static UInt16 RightShift(const UInt16 &v, int bits) - { - UInt16 result; - result.m_value = _mm_srli_epi16(v.m_value, bits); - return result; - } - - static UInt31 RightShift(const UInt31 &v, int bits) - { - UInt31 result; - result.m_values[0] = _mm_srli_epi32(v.m_values[0], bits); - result.m_values[1] = _mm_srli_epi32(v.m_values[1], bits); - return result; - } - - static SInt16 RightShift(const SInt16 &v, int bits) - { - SInt16 result; - result.m_value = _mm_srai_epi16(v.m_value, bits); - return result; - } - - static UInt15 RightShift(const UInt15 &v, int bits) - { - UInt15 result; - result.m_value = _mm_srli_epi16(v.m_value, bits); - return result; - } - - static SInt32 RightShift(const SInt32 &v, int bits) - { - SInt32 result; - result.m_values[0] = _mm_srai_epi32(v.m_values[0], bits); - result.m_values[1] = _mm_srai_epi32(v.m_values[1], bits); - return result; - } - - static SInt16 ToSInt16(const SInt32 &v) - { - SInt16 result; - result.m_value = _mm_packs_epi32(v.m_values[0], v.m_values[1]); - return result; - } - - static UInt16 ToUInt16(const UInt32 &v) - { - __m128i low = _mm_srai_epi32(_mm_slli_epi32(v.m_values[0], 16), 16); - __m128i high = _mm_srai_epi32(_mm_slli_epi32(v.m_values[1], 16), 16); - - UInt16 result; - result.m_value = _mm_packs_epi32(low, high); - return result; - } - - static UInt16 ToUInt16(const UInt31 &v) - { - __m128i low = _mm_srai_epi32(_mm_slli_epi32(v.m_values[0], 16), 16); - __m128i high = _mm_srai_epi32(_mm_slli_epi32(v.m_values[1], 16), 16); - - UInt16 result; - result.m_value = _mm_packs_epi32(low, high); - return result; - } - - static UInt15 ToUInt15(const UInt31 &v) - { - UInt15 result; - result.m_value = _mm_packs_epi32(v.m_values[0], v.m_values[1]); - return result; - } - - static SInt32 XMultiply(const SInt16 &a, const SInt16 &b) - { - __m128i high = _mm_mulhi_epi16(a.m_value, b.m_value); - __m128i low = _mm_mullo_epi16(a.m_value, b.m_value); - - SInt32 result; - result.m_values[0] = _mm_unpacklo_epi16(low, high); - result.m_values[1] = _mm_unpackhi_epi16(low, high); - return result; - } - - static SInt32 XMultiply(const SInt16 &a, const UInt15 &b) - { - __m128i high = _mm_mulhi_epi16(a.m_value, b.m_value); - __m128i low = _mm_mullo_epi16(a.m_value, b.m_value); - - SInt32 result; - result.m_values[0] = _mm_unpacklo_epi16(low, high); - result.m_values[1] = _mm_unpackhi_epi16(low, high); - return result; - } - - static SInt32 XMultiply(const UInt15 &a, const SInt16 &b) - { - return XMultiply(b, a); - } - - static UInt32 XMultiply(const UInt16 &a, const UInt16 &b) - { - __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value); - __m128i low = _mm_mullo_epi16(a.m_value, b.m_value); - - UInt32 result; - result.m_values[0] = _mm_unpacklo_epi16(low, high); - result.m_values[1] = _mm_unpackhi_epi16(low, high); - return result; - } - - static UInt16 CompactMultiply(const UInt16 &a, const UInt15 &b) - { - UInt16 result; - result.m_value = _mm_mullo_epi16(a.m_value, b.m_value); - return result; - } - - static UInt16 CompactMultiply(const UInt15 &a, const UInt15 &b) - { - UInt16 result; - result.m_value = _mm_mullo_epi16(a.m_value, b.m_value); - return result; - } - - static UInt31 XMultiply(const UInt15 &a, const UInt15 &b) - { - __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value); - __m128i low = _mm_mullo_epi16(a.m_value, b.m_value); - - UInt31 result; - result.m_values[0] = _mm_unpacklo_epi16(low, high); - result.m_values[1] = _mm_unpackhi_epi16(low, high); - return result; - } - - static UInt31 XMultiply(const UInt16 &a, const UInt15 &b) - { - __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value); - __m128i low = _mm_mullo_epi16(a.m_value, b.m_value); - - UInt31 result; - result.m_values[0] = _mm_unpacklo_epi16(low, high); - result.m_values[1] = _mm_unpackhi_epi16(low, high); - return result; - } - - static UInt31 XMultiply(const UInt15 &a, const UInt16 &b) - { - return XMultiply(b, a); - } - - static bool AnySet(const Int16CompFlag &v) - { - return _mm_movemask_epi8(v.m_value) != 0; - } - - static bool AllSet(const Int16CompFlag &v) - { - return _mm_movemask_epi8(v.m_value) == 0xffff; - } - - static bool AnySet(const FloatCompFlag &v) - { - return _mm_movemask_ps(v.m_values[0]) != 0 || _mm_movemask_ps(v.m_values[1]) != 0; - } - - static bool AllSet(const FloatCompFlag &v) - { - return _mm_movemask_ps(v.m_values[0]) == 0xf && _mm_movemask_ps(v.m_values[1]) == 0xf; - } - }; - -#else - // Scalar version - struct ParallelMath - { - struct RoundTowardZeroForScope - { - }; - - struct RoundTowardNearestForScope - { - }; - - struct RoundUpForScope - { - }; - - struct RoundDownForScope - { - }; - - static const int ParallelSize = 1; - - enum Int16Subtype - { - IntSubtype_Signed, - IntSubtype_UnsignedFull, - IntSubtype_UnsignedTruncated, - IntSubtype_Abstract, - }; - - typedef int32_t SInt16; - typedef int32_t UInt15; - typedef int32_t UInt16; - typedef int32_t AInt16; - - typedef int32_t SInt32; - typedef int32_t UInt31; - typedef int32_t UInt32; - typedef int32_t AInt32; - - typedef int32_t ScalarUInt16; - typedef int32_t ScalarSInt16; - - typedef float Float; - - template<class TTargetType> - struct LosslessCast - { - static const int32_t& Cast(const int32_t &src) - { - return src; - } - }; - - typedef bool Int16CompFlag; - typedef bool FloatCompFlag; - - static int32_t AbstractAdd(const int32_t &a, const int32_t &b) - { - return a + b; - } - - static int32_t AbstractSubtract(const int32_t &a, const int32_t &b) - { - return a - b; - } - - static float Select(bool flag, float a, float b) - { - return flag ? a : b; - } - - static int32_t Select(bool flag, int32_t a, int32_t b) - { - return flag ? a : b; - } - - static int32_t SelectOrZero(bool flag, int32_t a) - { - return flag ? a : 0; - } - - static void ConditionalSet(int32_t& dest, bool flag, int32_t src) - { - if (flag) - dest = src; - } - - static int32_t ConditionalNegate(bool flag, int32_t v) - { - return (flag) ? -v : v; - } - - static void NotConditionalSet(int32_t& dest, bool flag, int32_t src) - { - if (!flag) - dest = src; - } - - static void ConditionalSet(float& dest, bool flag, float src) - { - if (flag) - dest = src; - } - - static void NotConditionalSet(float& dest, bool flag, float src) - { - if (!flag) - dest = src; - } - - static void MakeSafeDenominator(float& v) - { - if (v == 0.0f) - v = 1.0f; - } - - static int32_t SignedRightShift(int32_t v, int bits) - { - return v >> bits; - } - - static int32_t TruncateToPrecisionSigned(int32_t v, int precision) - { - v = (v << (32 - precision)) & 0xffffffff; - return SignedRightShift(v, 32 - precision); - } - - static int32_t TruncateToPrecisionUnsigned(int32_t v, int precision) - { - return v & ((1 << precision) - 1); - } - - static int32_t Min(int32_t a, int32_t b) - { - if (a < b) - return a; - return b; - } - - static float Min(float a, float b) - { - if (a < b) - return a; - return b; - } - - static int32_t Max(int32_t a, int32_t b) - { - if (a > b) - return a; - return b; - } - - static float Max(float a, float b) - { - if (a > b) - return a; - return b; - } - - static float Abs(float a) - { - return fabsf(a); - } - - static int32_t Abs(int32_t a) - { - if (a < 0) - return -a; - return a; - } - - static float Clamp(float v, float min, float max) - { - if (v < min) - return min; - if (v > max) - return max; - return v; - } - - static float Reciprocal(float v) - { - return 1.0f / v; - } - - static void ConvertLDRInputs(const PixelBlockU8* inputBlocks, int pxOffset, int channel, int32_t& chOut) - { - chOut = inputBlocks[0].m_pixels[pxOffset][channel]; - } - - static void ConvertHDRInputs(const PixelBlockF16* inputBlocks, int pxOffset, int channel, int32_t& chOut) - { - chOut = inputBlocks[0].m_pixels[pxOffset][channel]; - } - - static float MakeFloat(float v) - { - return v; - } - - static float MakeFloatZero() - { - return 0.0f; - } - - static int32_t MakeUInt16(uint16_t v) - { - return v; - } - - static int32_t MakeSInt16(int16_t v) - { - return v; - } - - static int32_t MakeAInt16(int16_t v) - { - return v; - } - - static int32_t MakeUInt15(uint16_t v) - { - return v; - } - - static int32_t MakeSInt32(int32_t v) - { - return v; - } - - static int32_t MakeUInt31(int32_t v) - { - return v; - } - - static int32_t Extract(int32_t v, int offset) - { - UNREFERENCED_PARAMETER(offset); - return v; - } - - static void PutUInt16(int32_t &dest, int offset, ParallelMath::ScalarUInt16 v) - { - UNREFERENCED_PARAMETER(offset); - dest = v; - } - - static void PutUInt15(int32_t &dest, int offset, ParallelMath::ScalarUInt16 v) - { - UNREFERENCED_PARAMETER(offset); - dest = v; - } - - static void PutSInt16(int32_t &dest, int offset, ParallelMath::ScalarSInt16 v) - { - UNREFERENCED_PARAMETER(offset); - dest = v; - } - - static float ExtractFloat(float v, int offset) - { - UNREFERENCED_PARAMETER(offset); - return v; - } - - static void PutFloat(float &dest, int offset, float v) - { - UNREFERENCED_PARAMETER(offset); - dest = v; - } - - static bool Less(int32_t a, int32_t b) - { - return a < b; - } - - static bool Less(float a, float b) - { - return a < b; - } - - static bool LessOrEqual(int32_t a, int32_t b) - { - return a < b; - } - - static bool LessOrEqual(float a, float b) - { - return a < b; - } - - static bool Equal(int32_t a, int32_t b) - { - return a == b; - } - - static bool Equal(float a, float b) - { - return a == b; - } - - static float ToFloat(int32_t v) - { - return static_cast<float>(v); - } - - static int32_t ToUInt31(int32_t v) - { - return v; - } - - static int32_t ToInt32(int32_t v) - { - return v; - } - - static bool FloatFlagToInt16(bool v) - { - return v; - } - - static bool Int16FlagToFloat(bool v) - { - return v; - } - - static bool MakeBoolInt16(bool b) - { - return b; - } - - static bool MakeBoolFloat(bool b) - { - return b; - } - - static bool AndNot(bool a, bool b) - { - return a && !b; - } - - static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundTowardZeroForScope *rtz) - { - UNREFERENCED_PARAMETER(rtz); - return static_cast<int>(v); - } - - static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundUpForScope *ru) - { - UNREFERENCED_PARAMETER(ru); - return static_cast<int>(ceilf(v)); - } - - static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundDownForScope *rd) - { - UNREFERENCED_PARAMETER(rd); - return static_cast<int>(floorf(v)); - } - - static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundTowardNearestForScope *rtn) - { - UNREFERENCED_PARAMETER(rtn); - return static_cast<int>(floorf(v + 0.5f)); - } - - template<class TRoundMode> - static int32_t RoundAndConvertToU16(float v, const TRoundMode *roundingMode) - { - return RoundAndConvertToInt(v, roundingMode); - } - - template<class TRoundMode> - static int32_t RoundAndConvertToU15(float v, const TRoundMode *roundingMode) - { - return RoundAndConvertToInt(v, roundingMode); - } - - template<class TRoundMode> - static int32_t RoundAndConvertToS16(float v, const TRoundMode *roundingMode) - { - return RoundAndConvertToInt(v, roundingMode); - } - - static float Sqrt(float f) - { - return sqrtf(f); - } - - static int32_t SqDiffUInt8(int32_t a, int32_t b) - { - int32_t delta = a - b; - return delta * delta; - } - - static int32_t SqDiffInt16(int32_t a, int32_t b) - { - int32_t delta = a - b; - return delta * delta; - } - - static int32_t SqDiffSInt16(int32_t a, int32_t b) - { - int32_t delta = a - b; - return delta * delta; - } - - static float TwosCLHalfToFloat(int32_t v) - { - int32_t absV = (v < 0) ? -v : v; - - int32_t signBits = (absV & -32768); - int32_t mantissa = (absV & 0x03ff); - int32_t exponent = (absV & 0x7c00); - - bool isDenormal = (exponent == 0); - - // Convert exponent to high-bits - exponent = (exponent >> 3) + 14336; - - int32_t denormalCorrection = (isDenormal ? (signBits | 14336) : 0) << 16; - - int32_t fBits = ((exponent | signBits) << 16) | (mantissa << 13); - - float f, correction; - memcpy(&f, &fBits, 4); - memcpy(&correction, &denormalCorrection, 4); - - return f - correction; - } - - static Float SqDiff2CLFloat(const SInt16 &a, const Float &b) - { - Float fa = TwosCLHalfToFloat(a); - - Float diff = fa - b; - return diff * diff; - } - - static Float SqDiff2CL(const SInt16 &a, const SInt16 &b) - { - Float fa = TwosCLHalfToFloat(a); - Float fb = TwosCLHalfToFloat(b); - - Float diff = fa - fb; - return diff * diff; - } - - static Float SqDiff2CLFloat(const SInt16 &a, float aWeight, const Float &b) - { - Float fa = TwosCLHalfToFloat(a) * aWeight; - - Float diff = fa - b; - return diff * diff; - } - - static int32_t RightShift(int32_t v, int bits) - { - return SignedRightShift(v, bits); - } - - static int32_t ToSInt16(int32_t v) - { - return v; - } - - static int32_t ToUInt16(int32_t v) - { - return v; - } - - static int32_t ToUInt15(int32_t v) - { - return v; - } - - static int32_t XMultiply(int32_t a, int32_t b) - { - return a * b; - } - - static int32_t CompactMultiply(int32_t a, int32_t b) - { - return a * b; - } - - static bool AnySet(bool v) - { - return v; - } - - static bool AllSet(bool v) - { - return v; - } - }; - -#endif - - namespace Internal - { - namespace BC7Data - { - enum AlphaMode - { - AlphaMode_Combined, - AlphaMode_Separate, - AlphaMode_None, - }; - - enum PBitMode - { - PBitMode_PerEndpoint, - PBitMode_PerSubset, - PBitMode_None - }; - - struct BC7ModeInfo - { - PBitMode m_pBitMode; - AlphaMode m_alphaMode; - int m_rgbBits; - int m_alphaBits; - int m_partitionBits; - int m_numSubsets; - int m_indexBits; - int m_alphaIndexBits; - bool m_hasIndexSelector; - }; - - BC7ModeInfo g_modes[] = - { - { PBitMode_PerEndpoint, AlphaMode_None, 4, 0, 4, 3, 3, 0, false }, // 0 - { PBitMode_PerSubset, AlphaMode_None, 6, 0, 6, 2, 3, 0, false }, // 1 - { PBitMode_None, AlphaMode_None, 5, 0, 6, 3, 2, 0, false }, // 2 - { PBitMode_PerEndpoint, AlphaMode_None, 7, 0, 6, 2, 2, 0, false }, // 3 (Mode reference has an error, P-bit is really per-endpoint) - - { PBitMode_None, AlphaMode_Separate, 5, 6, 0, 1, 2, 3, true }, // 4 - { PBitMode_None, AlphaMode_Separate, 7, 8, 0, 1, 2, 2, false }, // 5 - { PBitMode_PerEndpoint, AlphaMode_Combined, 7, 7, 0, 1, 4, 0, false }, // 6 - { PBitMode_PerEndpoint, AlphaMode_Combined, 5, 5, 6, 2, 2, 0, false } // 7 - }; - - const int g_weight2[] = { 0, 21, 43, 64 }; - const int g_weight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; - const int g_weight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; - - const int *g_weightTables[] = - { - NULL, - NULL, - g_weight2, - g_weight3, - g_weight4 - }; - - struct BC6HModeInfo - { - uint16_t m_modeID; - bool m_partitioned; - bool m_transformed; - int m_aPrec; - int m_bPrec[3]; - }; - - // [partitioned][precision] - bool g_hdrModesExistForPrecision[2][17] = - { - //0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - { false, false, false, false, false, false, false, false, false, false, true, true, true, false, false, false, true }, - { false, false, false, false, false, false, true, true, true, true, true, true, false, false, false, false, false }, - }; - - BC6HModeInfo g_hdrModes[] = - { - { 0x00, true, true, 10,{ 5, 5, 5 } }, - { 0x01, true, true, 7,{ 6, 6, 6 } }, - { 0x02, true, true, 11,{ 5, 4, 4 } }, - { 0x06, true, true, 11,{ 4, 5, 4 } }, - { 0x0a, true, true, 11,{ 4, 4, 5 } }, - { 0x0e, true, true, 9,{ 5, 5, 5 } }, - { 0x12, true, true, 8,{ 6, 5, 5 } }, - { 0x16, true, true, 8,{ 5, 6, 5 } }, - { 0x1a, true, true, 8,{ 5, 5, 6 } }, - { 0x1e, true, false, 6,{ 6, 6, 6 } }, - { 0x03, false, false, 10,{ 10, 10, 10 } }, - { 0x07, false, true, 11,{ 9, 9, 9 } }, - { 0x0b, false, true, 12,{ 8, 8, 8 } }, - { 0x0f, false, true, 16,{ 4, 4, 4 } }, - }; - - const int g_maxHDRPrecision = 16; - - static const size_t g_numHDRModes = sizeof(g_hdrModes) / sizeof(g_hdrModes[0]); - - static uint16_t g_partitionMap[64] = - { - 0xCCCC, 0x8888, 0xEEEE, 0xECC8, - 0xC880, 0xFEEC, 0xFEC8, 0xEC80, - 0xC800, 0xFFEC, 0xFE80, 0xE800, - 0xFFE8, 0xFF00, 0xFFF0, 0xF000, - 0xF710, 0x008E, 0x7100, 0x08CE, - 0x008C, 0x7310, 0x3100, 0x8CCE, - 0x088C, 0x3110, 0x6666, 0x366C, - 0x17E8, 0x0FF0, 0x718E, 0x399C, - 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, - 0x3c3c, 0x55aa, 0x9696, 0xa55a, - 0x73ce, 0x13c8, 0x324c, 0x3bdc, - 0x6996, 0xc33c, 0x9966, 0x660, - 0x272, 0x4e4, 0x4e40, 0x2720, - 0xc936, 0x936c, 0x39c6, 0x639c, - 0x9336, 0x9cc6, 0x817e, 0xe718, - 0xccf0, 0xfcc, 0x7744, 0xee22, - }; - - static uint32_t g_partitionMap2[64] = - { - 0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8, - 0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050, - 0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090, - 0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250, - 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0, - 0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500, - 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400, - 0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200, - 0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424, - 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50, - 0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0, - 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600, - 0xaa444444, 0x54a854a8, 0x95809580, 0x96969600, - 0xa85454a8, 0x80959580, 0xaa141414, 0x96960000, - 0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000, - 0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254, - }; - - static int g_fixupIndexes2[64] = - { - 15,15,15,15, - 15,15,15,15, - 15,15,15,15, - 15,15,15,15, - 15, 2, 8, 2, - 2, 8, 8,15, - 2, 8, 2, 2, - 8, 8, 2, 2, - - 15,15, 6, 8, - 2, 8,15,15, - 2, 8, 2, 2, - 2,15,15, 6, - 6, 2, 6, 8, - 15,15, 2, 2, - 15,15,15,15, - 15, 2, 2,15, - }; - - static int g_fixupIndexes3[64][2] = - { - { 3,15 },{ 3, 8 },{ 15, 8 },{ 15, 3 }, - { 8,15 },{ 3,15 },{ 15, 3 },{ 15, 8 }, - { 8,15 },{ 8,15 },{ 6,15 },{ 6,15 }, - { 6,15 },{ 5,15 },{ 3,15 },{ 3, 8 }, - { 3,15 },{ 3, 8 },{ 8,15 },{ 15, 3 }, - { 3,15 },{ 3, 8 },{ 6,15 },{ 10, 8 }, - { 5, 3 },{ 8,15 },{ 8, 6 },{ 6,10 }, - { 8,15 },{ 5,15 },{ 15,10 },{ 15, 8 }, - - { 8,15 },{ 15, 3 },{ 3,15 },{ 5,10 }, - { 6,10 },{ 10, 8 },{ 8, 9 },{ 15,10 }, - { 15, 6 },{ 3,15 },{ 15, 8 },{ 5,15 }, - { 15, 3 },{ 15, 6 },{ 15, 6 },{ 15, 8 }, - { 3,15 },{ 15, 3 },{ 5,15 },{ 5,15 }, - { 5,15 },{ 8,15 },{ 5,15 },{ 10,15 }, - { 5,15 },{ 10,15 },{ 8,15 },{ 13,15 }, - { 15, 3 },{ 12,15 },{ 3,15 },{ 3, 8 }, - }; - - static const unsigned char g_fragments[] = - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0, 16 - 0, 1, 2, 3, // 16, 4 - 0, 1, 4, // 20, 3 - 0, 1, 2, 4, // 23, 4 - 2, 3, 7, // 27, 3 - 1, 2, 3, 7, // 30, 4 - 0, 1, 2, 3, 4, 5, 6, 7, // 34, 8 - 0, 1, 4, 8, // 42, 4 - 0, 1, 2, 4, 5, 8, // 46, 6 - 0, 1, 2, 3, 4, 5, 6, 8, // 52, 8 - 1, 4, 5, 6, 9, // 60, 5 - 2, 5, 6, 7, 10, // 65, 5 - 5, 6, 9, 10, // 70, 4 - 2, 3, 7, 11, // 74, 4 - 1, 2, 3, 6, 7, 11, // 78, 6 - 0, 1, 2, 3, 5, 6, 7, 11, // 84, 8 - 0, 1, 2, 3, 8, 9, 10, 11, // 92, 8 - 2, 3, 6, 7, 8, 9, 10, 11, // 100, 8 - 4, 5, 6, 7, 8, 9, 10, 11, // 108, 8 - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, // 116, 12 - 0, 4, 8, 12, // 128, 4 - 0, 2, 3, 4, 6, 7, 8, 12, // 132, 8 - 0, 1, 2, 4, 5, 8, 9, 12, // 140, 8 - 0, 1, 2, 3, 4, 5, 6, 8, 9, 12, // 148, 10 - 3, 6, 7, 8, 9, 12, // 158, 6 - 3, 5, 6, 7, 8, 9, 10, 12, // 164, 8 - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, // 172, 12 - 0, 1, 2, 5, 6, 7, 11, 12, // 184, 8 - 5, 8, 9, 10, 13, // 192, 5 - 8, 12, 13, // 197, 3 - 4, 8, 12, 13, // 200, 4 - 2, 3, 6, 9, 12, 13, // 204, 6 - 0, 1, 2, 3, 8, 9, 12, 13, // 210, 8 - 0, 1, 4, 5, 8, 9, 12, 13, // 218, 8 - 2, 3, 6, 7, 8, 9, 12, 13, // 226, 8 - 2, 3, 5, 6, 9, 10, 12, 13, // 234, 8 - 0, 3, 6, 7, 9, 10, 12, 13, // 242, 8 - 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13, // 250, 12 - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, // 262, 13 - 2, 3, 4, 7, 8, 11, 12, 13, // 275, 8 - 1, 2, 6, 7, 8, 11, 12, 13, // 283, 8 - 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, // 291, 10 - 2, 3, 4, 5, 10, 11, 12, 13, // 301, 8 - 0, 1, 6, 7, 10, 11, 12, 13, // 309, 8 - 6, 9, 10, 11, 14, // 317, 5 - 0, 2, 4, 6, 8, 10, 12, 14, // 322, 8 - 1, 3, 5, 7, 8, 10, 12, 14, // 330, 8 - 1, 3, 4, 6, 9, 11, 12, 14, // 338, 8 - 0, 2, 5, 7, 9, 11, 12, 14, // 346, 8 - 0, 3, 4, 5, 8, 9, 13, 14, // 354, 8 - 2, 3, 4, 7, 8, 9, 13, 14, // 362, 8 - 1, 2, 5, 6, 9, 10, 13, 14, // 370, 8 - 0, 3, 4, 7, 9, 10, 13, 14, // 378, 8 - 0, 3, 5, 6, 8, 11, 13, 14, // 386, 8 - 1, 2, 4, 7, 8, 11, 13, 14, // 394, 8 - 0, 1, 4, 7, 10, 11, 13, 14, // 402, 8 - 0, 3, 6, 7, 10, 11, 13, 14, // 410, 8 - 8, 12, 13, 14, // 418, 4 - 1, 2, 3, 7, 8, 12, 13, 14, // 422, 8 - 4, 8, 9, 12, 13, 14, // 430, 6 - 0, 4, 5, 8, 9, 12, 13, 14, // 436, 8 - 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, // 444, 10 - 2, 6, 8, 9, 10, 12, 13, 14, // 454, 8 - 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, // 462, 12 - 0, 7, 9, 10, 11, 12, 13, 14, // 474, 8 - 1, 2, 3, 4, 5, 6, 8, 15, // 482, 8 - 3, 7, 11, 15, // 490, 4 - 0, 1, 3, 4, 5, 7, 11, 15, // 494, 8 - 0, 4, 5, 10, 11, 15, // 502, 6 - 1, 2, 3, 6, 7, 10, 11, 15, // 508, 8 - 0, 1, 2, 3, 5, 6, 7, 10, 11, 15, // 516, 10 - 0, 4, 5, 6, 9, 10, 11, 15, // 526, 8 - 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, // 534, 12 - 1, 2, 4, 5, 8, 9, 12, 15, // 546, 8 - 2, 3, 5, 6, 8, 9, 12, 15, // 554, 8 - 0, 3, 5, 6, 9, 10, 12, 15, // 562, 8 - 1, 2, 4, 7, 9, 10, 12, 15, // 570, 8 - 1, 2, 5, 6, 8, 11, 12, 15, // 578, 8 - 0, 3, 4, 7, 8, 11, 12, 15, // 586, 8 - 0, 1, 5, 6, 10, 11, 12, 15, // 594, 8 - 1, 2, 6, 7, 10, 11, 12, 15, // 602, 8 - 1, 3, 4, 6, 8, 10, 13, 15, // 610, 8 - 0, 2, 5, 7, 8, 10, 13, 15, // 618, 8 - 0, 2, 4, 6, 9, 11, 13, 15, // 626, 8 - 1, 3, 5, 7, 9, 11, 13, 15, // 634, 8 - 0, 1, 2, 3, 4, 5, 7, 8, 12, 13, 15, // 642, 11 - 2, 3, 4, 5, 8, 9, 14, 15, // 653, 8 - 0, 1, 6, 7, 8, 9, 14, 15, // 661, 8 - 0, 1, 5, 10, 14, 15, // 669, 6 - 0, 3, 4, 5, 9, 10, 14, 15, // 675, 8 - 0, 1, 5, 6, 9, 10, 14, 15, // 683, 8 - 11, 14, 15, // 691, 3 - 7, 11, 14, 15, // 694, 4 - 1, 2, 4, 5, 8, 11, 14, 15, // 698, 8 - 0, 1, 4, 7, 8, 11, 14, 15, // 706, 8 - 0, 1, 4, 5, 10, 11, 14, 15, // 714, 8 - 2, 3, 6, 7, 10, 11, 14, 15, // 722, 8 - 4, 5, 6, 7, 10, 11, 14, 15, // 730, 8 - 0, 1, 4, 5, 7, 8, 10, 11, 14, 15, // 738, 10 - 0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15, // 748, 12 - 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15, // 760, 13 - 0, 1, 2, 3, 4, 6, 7, 11, 12, 14, 15, // 773, 11 - 3, 4, 8, 9, 10, 13, 14, 15, // 784, 8 - 11, 13, 14, 15, // 792, 4 - 0, 1, 2, 4, 11, 13, 14, 15, // 796, 8 - 0, 1, 2, 4, 5, 10, 11, 13, 14, 15, // 804, 10 - 7, 10, 11, 13, 14, 15, // 814, 6 - 3, 6, 7, 10, 11, 13, 14, 15, // 820, 8 - 1, 5, 9, 10, 11, 13, 14, 15, // 828, 8 - 1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, // 836, 12 - 12, 13, 14, 15, // 848, 4 - 0, 1, 2, 3, 12, 13, 14, 15, // 852, 8 - 0, 1, 4, 5, 12, 13, 14, 15, // 860, 8 - 4, 5, 6, 7, 12, 13, 14, 15, // 868, 8 - 4, 8, 9, 10, 12, 13, 14, 15, // 876, 8 - 0, 4, 5, 8, 9, 10, 12, 13, 14, 15, // 884, 10 - 0, 1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, // 894, 12 - 0, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 15, // 906, 12 - 0, 1, 3, 4, 8, 9, 11, 12, 13, 14, 15, // 918, 11 - 0, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, // 929, 11 - 7, 9, 10, 11, 12, 13, 14, 15, // 940, 8 - 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 948, 10 - 2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 958, 12 - 8, 9, 10, 11, 12, 13, 14, 15, // 970, 8 - 0, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 978, 12 - 0, 1, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 990, 13 - 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1003, 12 - 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1015, 13 - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1028, 12 - 0, 2, // 1040, 2 - 1, 3, // 1042, 2 - 0, 1, 4, 5, // 1044, 4 - 0, 1, 2, 4, 5, // 1048, 5 - 2, 3, 6, // 1053, 3 - 0, 2, 4, 6, // 1056, 4 - 1, 2, 5, 6, // 1060, 4 - 0, 1, 2, 3, 5, 6, // 1064, 6 - 0, 1, 2, 4, 5, 6, // 1070, 6 - 0, 1, 2, 3, 4, 5, 6, // 1076, 7 - 0, 3, 4, 7, // 1083, 4 - 0, 1, 2, 3, 4, 7, // 1087, 6 - 1, 3, 5, 7, // 1093, 4 - 2, 3, 6, 7, // 1097, 4 - 1, 2, 3, 6, 7, // 1101, 5 - 1, 2, 3, 5, 6, 7, // 1106, 6 - 0, 1, 2, 3, 5, 6, 7, // 1112, 7 - 4, 5, 6, 7, // 1119, 4 - 0, 8, // 1123, 2 - 0, 1, 4, 5, 8, // 1125, 5 - 0, 1, 8, 9, // 1130, 4 - 4, 5, 8, 9, // 1134, 4 - 0, 1, 4, 5, 8, 9, // 1138, 6 - 2, 6, 8, 9, // 1144, 4 - 6, 7, 8, 9, // 1148, 4 - 0, 2, 4, 6, 8, 10, // 1152, 6 - 1, 2, 5, 6, 9, 10, // 1158, 6 - 0, 3, 4, 7, 9, 10, // 1164, 6 - 0, 1, 2, 8, 9, 10, // 1170, 6 - 4, 5, 6, 8, 9, 10, // 1176, 6 - 3, 11, // 1182, 2 - 2, 3, 6, 7, 11, // 1184, 5 - 0, 3, 8, 11, // 1189, 4 - 0, 3, 4, 7, 8, 11, // 1193, 6 - 1, 3, 5, 7, 9, 11, // 1199, 6 - 2, 3, 10, 11, // 1205, 4 - 1, 5, 10, 11, // 1209, 4 - 4, 5, 10, 11, // 1213, 4 - 6, 7, 10, 11, // 1217, 4 - 2, 3, 6, 7, 10, 11, // 1221, 6 - 1, 2, 3, 9, 10, 11, // 1227, 6 - 5, 6, 7, 9, 10, 11, // 1233, 6 - 8, 9, 10, 11, // 1239, 4 - 4, 12, // 1243, 2 - 0, 1, 2, 3, 4, 5, 8, 12, // 1245, 8 - 8, 9, 12, // 1253, 3 - 0, 4, 5, 8, 9, 12, // 1256, 6 - 0, 1, 4, 5, 8, 9, 12, // 1262, 7 - 2, 3, 5, 6, 8, 9, 12, // 1269, 7 - 1, 5, 9, 13, // 1276, 4 - 6, 7, 9, 13, // 1280, 4 - 1, 4, 7, 10, 13, // 1284, 5 - 1, 6, 8, 11, 13, // 1289, 5 - 0, 1, 12, 13, // 1294, 4 - 4, 5, 12, 13, // 1298, 4 - 0, 1, 6, 7, 12, 13, // 1302, 6 - 0, 1, 4, 8, 12, 13, // 1308, 6 - 8, 9, 12, 13, // 1314, 4 - 4, 8, 9, 12, 13, // 1318, 5 - 4, 5, 8, 9, 12, 13, // 1323, 6 - 0, 4, 5, 8, 9, 12, 13, // 1329, 7 - 0, 1, 6, 10, 12, 13, // 1336, 6 - 3, 6, 7, 9, 10, 12, 13, // 1342, 7 - 0, 1, 10, 11, 12, 13, // 1349, 6 - 2, 4, 7, 9, 14, // 1355, 5 - 4, 5, 10, 14, // 1360, 4 - 2, 6, 10, 14, // 1364, 4 - 2, 5, 8, 11, 14, // 1368, 5 - 0, 2, 12, 14, // 1373, 4 - 8, 10, 12, 14, // 1377, 4 - 4, 6, 8, 10, 12, 14, // 1381, 6 - 13, 14, // 1387, 2 - 9, 10, 13, 14, // 1389, 4 - 5, 6, 9, 10, 13, 14, // 1393, 6 - 0, 1, 2, 12, 13, 14, // 1399, 6 - 4, 5, 6, 12, 13, 14, // 1405, 6 - 8, 9, 12, 13, 14, // 1411, 5 - 8, 9, 10, 12, 13, 14, // 1416, 6 - 7, 15, // 1422, 2 - 0, 5, 10, 15, // 1424, 4 - 0, 1, 2, 3, 6, 7, 11, 15, // 1428, 8 - 10, 11, 15, // 1436, 3 - 0, 1, 5, 6, 10, 11, 15, // 1439, 7 - 3, 6, 7, 10, 11, 15, // 1446, 6 - 12, 15, // 1452, 2 - 0, 3, 12, 15, // 1454, 4 - 4, 7, 12, 15, // 1458, 4 - 0, 3, 6, 9, 12, 15, // 1462, 6 - 0, 3, 5, 10, 12, 15, // 1468, 6 - 8, 11, 12, 15, // 1474, 4 - 5, 6, 8, 11, 12, 15, // 1478, 6 - 4, 7, 8, 11, 12, 15, // 1484, 6 - 1, 3, 13, 15, // 1490, 4 - 9, 11, 13, 15, // 1494, 4 - 5, 7, 9, 11, 13, 15, // 1498, 6 - 2, 3, 14, 15, // 1504, 4 - 2, 3, 4, 5, 14, 15, // 1508, 6 - 6, 7, 14, 15, // 1514, 4 - 2, 3, 5, 9, 14, 15, // 1518, 6 - 2, 3, 8, 9, 14, 15, // 1524, 6 - 10, 14, 15, // 1530, 3 - 0, 4, 5, 9, 10, 14, 15, // 1533, 7 - 2, 3, 7, 11, 14, 15, // 1540, 6 - 10, 11, 14, 15, // 1546, 4 - 7, 10, 11, 14, 15, // 1550, 5 - 6, 7, 10, 11, 14, 15, // 1555, 6 - 1, 2, 3, 13, 14, 15, // 1561, 6 - 5, 6, 7, 13, 14, 15, // 1567, 6 - 10, 11, 13, 14, 15, // 1573, 5 - 9, 10, 11, 13, 14, 15, // 1578, 6 - 0, 4, 8, 9, 12, 13, 14, 15, // 1584, 8 - 9, 10, 12, 13, 14, 15, // 1592, 6 - 8, 11, 12, 13, 14, 15, // 1598, 6 - 3, 7, 10, 11, 12, 13, 14, 15, // 1604, 8 - }; - static const int g_shapeRanges[][2] = - { - { 0, 16 },{ 16, 4 },{ 20, 3 },{ 23, 4 },{ 27, 3 },{ 30, 4 },{ 34, 8 },{ 42, 4 },{ 46, 6 },{ 52, 8 },{ 60, 5 }, - { 65, 5 },{ 70, 4 },{ 74, 4 },{ 78, 6 },{ 84, 8 },{ 92, 8 },{ 100, 8 },{ 108, 8 },{ 116, 12 },{ 128, 4 },{ 132, 8 }, - { 140, 8 },{ 148, 10 },{ 158, 6 },{ 164, 8 },{ 172, 12 },{ 184, 8 },{ 192, 5 },{ 197, 3 },{ 200, 4 },{ 204, 6 },{ 210, 8 }, - { 218, 8 },{ 226, 8 },{ 234, 8 },{ 242, 8 },{ 250, 12 },{ 262, 13 },{ 275, 8 },{ 283, 8 },{ 291, 10 },{ 301, 8 },{ 309, 8 }, - { 317, 5 },{ 322, 8 },{ 330, 8 },{ 338, 8 },{ 346, 8 },{ 354, 8 },{ 362, 8 },{ 370, 8 },{ 378, 8 },{ 386, 8 },{ 394, 8 }, - { 402, 8 },{ 410, 8 },{ 418, 4 },{ 422, 8 },{ 430, 6 },{ 436, 8 },{ 444, 10 },{ 454, 8 },{ 462, 12 },{ 474, 8 },{ 482, 8 }, - { 490, 4 },{ 494, 8 },{ 502, 6 },{ 508, 8 },{ 516, 10 },{ 526, 8 },{ 534, 12 },{ 546, 8 },{ 554, 8 },{ 562, 8 },{ 570, 8 }, - { 578, 8 },{ 586, 8 },{ 594, 8 },{ 602, 8 },{ 610, 8 },{ 618, 8 },{ 626, 8 },{ 634, 8 },{ 642, 11 },{ 653, 8 },{ 661, 8 }, - { 669, 6 },{ 675, 8 },{ 683, 8 },{ 691, 3 },{ 694, 4 },{ 698, 8 },{ 706, 8 },{ 714, 8 },{ 722, 8 },{ 730, 8 },{ 738, 10 }, - { 748, 12 },{ 760, 13 },{ 773, 11 },{ 784, 8 },{ 792, 4 },{ 796, 8 },{ 804, 10 },{ 814, 6 },{ 820, 8 },{ 828, 8 },{ 836, 12 }, - { 848, 4 },{ 852, 8 },{ 860, 8 },{ 868, 8 },{ 876, 8 },{ 884, 10 },{ 894, 12 },{ 906, 12 },{ 918, 11 },{ 929, 11 },{ 940, 8 }, - { 948, 10 },{ 958, 12 },{ 970, 8 },{ 978, 12 },{ 990, 13 },{ 1003, 12 },{ 1015, 13 },{ 1028, 12 },{ 1040, 2 },{ 1042, 2 },{ 1044, 4 }, - { 1048, 5 },{ 1053, 3 },{ 1056, 4 },{ 1060, 4 },{ 1064, 6 },{ 1070, 6 },{ 1076, 7 },{ 1083, 4 },{ 1087, 6 },{ 1093, 4 },{ 1097, 4 }, - { 1101, 5 },{ 1106, 6 },{ 1112, 7 },{ 1119, 4 },{ 1123, 2 },{ 1125, 5 },{ 1130, 4 },{ 1134, 4 },{ 1138, 6 },{ 1144, 4 },{ 1148, 4 }, - { 1152, 6 },{ 1158, 6 },{ 1164, 6 },{ 1170, 6 },{ 1176, 6 },{ 1182, 2 },{ 1184, 5 },{ 1189, 4 },{ 1193, 6 },{ 1199, 6 },{ 1205, 4 }, - { 1209, 4 },{ 1213, 4 },{ 1217, 4 },{ 1221, 6 },{ 1227, 6 },{ 1233, 6 },{ 1239, 4 },{ 1243, 2 },{ 1245, 8 },{ 1253, 3 },{ 1256, 6 }, - { 1262, 7 },{ 1269, 7 },{ 1276, 4 },{ 1280, 4 },{ 1284, 5 },{ 1289, 5 },{ 1294, 4 },{ 1298, 4 },{ 1302, 6 },{ 1308, 6 },{ 1314, 4 }, - { 1318, 5 },{ 1323, 6 },{ 1329, 7 },{ 1336, 6 },{ 1342, 7 },{ 1349, 6 },{ 1355, 5 },{ 1360, 4 },{ 1364, 4 },{ 1368, 5 },{ 1373, 4 }, - { 1377, 4 },{ 1381, 6 },{ 1387, 2 },{ 1389, 4 },{ 1393, 6 },{ 1399, 6 },{ 1405, 6 },{ 1411, 5 },{ 1416, 6 },{ 1422, 2 },{ 1424, 4 }, - { 1428, 8 },{ 1436, 3 },{ 1439, 7 },{ 1446, 6 },{ 1452, 2 },{ 1454, 4 },{ 1458, 4 },{ 1462, 6 },{ 1468, 6 },{ 1474, 4 },{ 1478, 6 }, - { 1484, 6 },{ 1490, 4 },{ 1494, 4 },{ 1498, 6 },{ 1504, 4 },{ 1508, 6 },{ 1514, 4 },{ 1518, 6 },{ 1524, 6 },{ 1530, 3 },{ 1533, 7 }, - { 1540, 6 },{ 1546, 4 },{ 1550, 5 },{ 1555, 6 },{ 1561, 6 },{ 1567, 6 },{ 1573, 5 },{ 1578, 6 },{ 1584, 8 },{ 1592, 6 },{ 1598, 6 }, - { 1604, 8 }, - }; - static const int g_shapes1[][2] = - { - { 0, 16 } - }; - static const int g_shapes2[64][2] = - { - { 33, 96 },{ 63, 66 },{ 20, 109 },{ 22, 107 },{ 37, 92 },{ 7, 122 },{ 8, 121 },{ 23, 106 }, - { 38, 91 },{ 2, 127 },{ 9, 120 },{ 26, 103 },{ 3, 126 },{ 6, 123 },{ 1, 128 },{ 19, 110 }, - { 15, 114 },{ 124, 5 },{ 72, 57 },{ 115, 14 },{ 125, 4 },{ 70, 59 },{ 100, 29 },{ 60, 69 }, - { 116, 13 },{ 99, 30 },{ 78, 51 },{ 94, 35 },{ 104, 25 },{ 111, 18 },{ 71, 58 },{ 90, 39 }, - { 45, 84 },{ 16, 113 },{ 82, 47 },{ 95, 34 },{ 87, 42 },{ 83, 46 },{ 53, 76 },{ 48, 81 }, - { 68, 61 },{ 105, 24 },{ 98, 31 },{ 88, 41 },{ 75, 54 },{ 43, 86 },{ 52, 77 },{ 117, 12 }, - { 119, 10 },{ 118, 11 },{ 85, 44 },{ 101, 28 },{ 36, 93 },{ 55, 74 },{ 89, 40 },{ 79, 50 }, - { 56, 73 },{ 49, 80 },{ 64, 65 },{ 27, 102 },{ 32, 97 },{ 112, 17 },{ 67, 62 },{ 21, 108 }, - }; - static const int g_shapes3[64][3] = - { - { 148, 160, 240 },{ 132, 212, 205 },{ 136, 233, 187 },{ 175, 237, 143 },{ 6, 186, 232 },{ 33, 142, 232 },{ 131, 123, 142 },{ 131, 96, 186 }, - { 6, 171, 110 },{ 1, 18, 110 },{ 1, 146, 123 },{ 33, 195, 66 },{ 20, 51, 66 },{ 20, 178, 96 },{ 2, 177, 106 },{ 211, 4, 59 }, - { 8, 191, 91 },{ 230, 14, 29 },{ 1, 188, 234 },{ 151, 110, 168 },{ 20, 144, 238 },{ 137, 66, 206 },{ 173, 179, 232 },{ 209, 194, 186 }, - { 239, 165, 142 },{ 131, 152, 242 },{ 214, 54, 12 },{ 140, 219, 201 },{ 190, 150, 231 },{ 156, 135, 241 },{ 185, 227, 167 },{ 145, 210, 59 }, - { 138, 174, 106 },{ 189, 229, 14 },{ 176, 133, 106 },{ 78, 178, 195 },{ 111, 146, 171 },{ 216, 180, 196 },{ 217, 181, 193 },{ 184, 228, 166 }, - { 192, 225, 153 },{ 134, 141, 123 },{ 6, 222, 198 },{ 149, 183, 96 },{ 33, 226, 164 },{ 161, 215, 51 },{ 197, 221, 18 },{ 1, 223, 199 }, - { 154, 163, 110 },{ 20, 236, 169 },{ 157, 204, 66 },{ 1, 202, 220 },{ 20, 170, 235 },{ 203, 158, 66 },{ 162, 155, 110 },{ 6, 201, 218 }, - { 139, 135, 123 },{ 33, 167, 224 },{ 182, 150, 96 },{ 19, 200, 213 },{ 63, 207, 159 },{ 147, 172, 109 },{ 129, 130, 128 },{ 208, 14, 59 }, - }; - - static const int g_shapeList1[] = - { - 0, - }; - - static const int g_shapeList1Collapse[] = - { - 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, - }; - static const int g_shapeList2[] = - { - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, - 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, - 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, - 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, - 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, - 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, - 122, 123, 124, 125, 126, 127, 128, - }; - static const int g_shapeList2Collapse[] = - { - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, - 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, - 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, - 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, - 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, - 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, - 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, - 120, 121, 122, 123, 124, 125, 126, 127, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, - }; - - static const int g_shapeList12[] = - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, - 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, - 121, 122, 123, 124, 125, 126, 127, 128, - }; - - static const int g_shapeList12Collapse[] = - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, - 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, - 121, 122, 123, 124, 125, 126, 127, 128, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, - }; - - static const int g_shapeList3[] = - { - 1, 2, 4, 6, 8, 12, 14, 18, 19, 20, 29, - 33, 51, 54, 59, 63, 66, 78, 91, 96, 106, 109, - 110, 111, 123, 128, 129, 130, 131, 132, 133, 134, 135, - 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, - 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, - 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, - 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, - 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, - 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, - 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, - 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, - 235, 236, 237, 238, 239, 240, 241, 242, - }; - - static const int g_shapeList3Collapse[] = - { - -1, 0, 1, -1, 2, -1, 3, -1, 4, -1, -1, - -1, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1, - -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, -1, - 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 12, -1, -1, 13, - -1, -1, -1, -1, 14, -1, -1, -1, 15, -1, -1, - 16, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 17, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 18, -1, -1, -1, -1, 19, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 20, -1, -1, 21, - 22, 23, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 24, -1, -1, -1, -1, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, - 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, - 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, - 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, - 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, - 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, - 139, - }; - - static const int g_shapeList3Short[] = - { - 1, 2, 4, 6, 18, 20, 33, 51, 59, 66, 96, - 106, 110, 123, 131, 132, 136, 142, 143, 146, 148, 160, - 171, 175, 177, 178, 186, 187, 195, 205, 211, 212, 232, - 233, 237, 240, - }; - - static const int g_shapeList3ShortCollapse[] = - { - -1, 0, 1, -1, 2, -1, 3, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 4, -1, 5, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 7, -1, -1, -1, - -1, -1, -1, -1, 8, -1, -1, -1, -1, -1, -1, - 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 11, -1, -1, -1, - 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 13, -1, -1, -1, -1, -1, -1, -1, 14, - 15, -1, -1, -1, 16, -1, -1, -1, -1, -1, 17, - 18, -1, -1, 19, -1, 20, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 21, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 22, -1, -1, -1, 23, - -1, 24, 25, -1, -1, -1, -1, -1, -1, -1, 26, - 27, -1, -1, -1, -1, -1, -1, -1, 28, -1, -1, - -1, -1, -1, -1, -1, -1, -1, 29, -1, -1, -1, - -1, -1, 30, 31, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 32, 33, -1, -1, -1, 34, -1, -1, 35, -1, - -1, - }; - - static const int g_shapeListAll[] = - { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, - 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, - 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, - 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, - 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, - 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, - 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, - 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, - 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, - 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, - 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, - 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, - 242, - }; - - static const int g_numShapes1 = sizeof(g_shapeList1) / sizeof(g_shapeList1[0]); - static const int g_numShapes2 = sizeof(g_shapeList2) / sizeof(g_shapeList2[0]); - static const int g_numShapes12 = sizeof(g_shapeList12) / sizeof(g_shapeList12[0]); - static const int g_numShapes3 = sizeof(g_shapeList3) / sizeof(g_shapeList3[0]); - static const int g_numShapes3Short = sizeof(g_shapeList3Short) / sizeof(g_shapeList3Short[0]); - static const int g_numShapesAll = sizeof(g_shapeListAll) / sizeof(g_shapeListAll[0]); - static const int g_numFragments = sizeof(g_fragments) / sizeof(g_fragments[0]); - - static const int g_maxFragmentsPerMode = (g_numShapes2 > g_numShapes3) ? g_numShapes2 : g_numShapes3; - } - - namespace BC6HData - { - enum EField - { - NA, // N/A - M, // Mode - D, // Shape - RW, - RX, - RY, - RZ, - GW, - GX, - GY, - GZ, - BW, - BX, - BY, - BZ, - }; - - struct ModeDescriptor - { - EField m_eField; - uint8_t m_uBit; - }; - - const ModeDescriptor g_modeDescriptors[14][82] = - { - { // Mode 1 (0x00) - 10 5 5 5 - { M, 0 },{ M, 1 },{ GY, 4 },{ BY, 4 },{ BZ, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, - { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, - { D, 3 },{ D, 4 }, - }, - - { // Mode 2 (0x01) - 7 6 6 6 - { M, 0 },{ M, 1 },{ GY, 5 },{ GZ, 4 },{ GZ, 5 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, - { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 }, - { D, 3 },{ D, 4 }, - }, - - { // Mode 3 (0x02) - 11 5 4 4 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { RW,10 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 }, - { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 }, - { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, - { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, - { D, 3 },{ D, 4 }, - }, - - { // Mode 4 (0x06) - 11 4 5 4 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 }, - { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { GW,10 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,10 }, - { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 0 }, - { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ GY, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, - { D, 3 },{ D, 4 }, - }, - - { // Mode 5 (0x0a) - 11 4 4 5 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,10 }, - { BY, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,10 }, - { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BW,10 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ BZ, 1 }, - { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ BZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, - { D, 3 },{ D, 4 }, - }, - - { // Mode 6 (0x0e) - 9 5 5 5 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, - { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, - { D, 3 },{ D, 4 }, - }, - - { // Mode 7 (0x12) - 8 6 5 5 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ GZ, 4 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 3 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, - { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 }, - { D, 3 },{ D, 4 }, - }, - - { // Mode 8 (0x16) - 8 5 6 5 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 0 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ GY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ GZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BZ, 1 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, - { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, - { D, 3 },{ D, 4 }, - }, - - { // Mode 9 (0x1a) - 8 5 5 6 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ BY, 5 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { GZ, 4 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { BZ, 0 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, - { BZ, 2 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ BZ, 3 },{ D, 0 },{ D, 1 },{ D, 2 }, - { D, 3 },{ D, 4 }, - }, - - { // Mode 10 (0x1e) - 6 6 6 6 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ GZ, 4 },{ BZ, 0 },{ BZ, 1 },{ BY, 4 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GY, 5 },{ BY, 5 },{ BZ, 2 },{ GY, 4 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ GZ, 5 },{ BZ, 3 },{ BZ, 5 },{ BZ, 4 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { RX, 5 },{ GY, 0 },{ GY, 1 },{ GY, 2 },{ GY, 3 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { GX, 5 },{ GZ, 0 },{ GZ, 1 },{ GZ, 2 },{ GZ, 3 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BX, 5 },{ BY, 0 },{ BY, 1 },{ BY, 2 },{ BY, 3 },{ RY, 0 },{ RY, 1 },{ RY, 2 },{ RY, 3 },{ RY, 4 }, - { RY, 5 },{ RZ, 0 },{ RZ, 1 },{ RZ, 2 },{ RZ, 3 },{ RZ, 4 },{ RZ, 5 },{ D, 0 },{ D, 1 },{ D, 2 }, - { D, 3 },{ D, 4 }, - }, - - { // Mode 11 (0x03) - 10 10 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RX, 9 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GX, 9 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BX, 9 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, - { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, - { NA, 0 },{ NA, 0 }, - }, - - { // Mode 12 (0x07) - 11 9 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { RX, 5 },{ RX, 6 },{ RX, 7 },{ RX, 8 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { GX, 5 },{ GX, 6 },{ GX, 7 },{ GX, 8 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BX, 5 },{ BX, 6 },{ BX, 7 },{ BX, 8 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, - { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, - { NA, 0 },{ NA, 0 }, - }, - - { // Mode 13 (0x0b) - 12 8 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RX, 4 }, - { RX, 5 },{ RX, 6 },{ RX, 7 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GX, 4 }, - { GX, 5 },{ GX, 6 },{ GX, 7 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BX, 4 }, - { BX, 5 },{ BX, 6 },{ BX, 7 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, - { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, - { NA, 0 },{ NA, 0 }, - }, - - { // Mode 14 (0x0f) - 16 4 - { M, 0 },{ M, 1 },{ M, 2 },{ M, 3 },{ M, 4 },{ RW, 0 },{ RW, 1 },{ RW, 2 },{ RW, 3 },{ RW, 4 }, - { RW, 5 },{ RW, 6 },{ RW, 7 },{ RW, 8 },{ RW, 9 },{ GW, 0 },{ GW, 1 },{ GW, 2 },{ GW, 3 },{ GW, 4 }, - { GW, 5 },{ GW, 6 },{ GW, 7 },{ GW, 8 },{ GW, 9 },{ BW, 0 },{ BW, 1 },{ BW, 2 },{ BW, 3 },{ BW, 4 }, - { BW, 5 },{ BW, 6 },{ BW, 7 },{ BW, 8 },{ BW, 9 },{ RX, 0 },{ RX, 1 },{ RX, 2 },{ RX, 3 },{ RW,15 }, - { RW,14 },{ RW,13 },{ RW,12 },{ RW,11 },{ RW,10 },{ GX, 0 },{ GX, 1 },{ GX, 2 },{ GX, 3 },{ GW,15 }, - { GW,14 },{ GW,13 },{ GW,12 },{ GW,11 },{ GW,10 },{ BX, 0 },{ BX, 1 },{ BX, 2 },{ BX, 3 },{ BW,15 }, - { BW,14 },{ BW,13 },{ BW,12 },{ BW,11 },{ BW,10 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, - { NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 },{ NA, 0 }, - { NA, 0 },{ NA, 0 }, - }, - }; - } - - struct PackingVector - { - uint32_t m_vector[4]; - int m_offset; - - void Init() - { - for (int i = 0; i < 4; i++) - m_vector[i] = 0; - - m_offset = 0; - } - - inline void Pack(ParallelMath::ScalarUInt16 value, int bits) - { - int vOffset = m_offset >> 5; - int bitOffset = m_offset & 0x1f; - - m_vector[vOffset] |= (static_cast<uint32_t>(value) << bitOffset) & static_cast<uint32_t>(0xffffffff); - - int overflowBits = bitOffset + bits - 32; - if (overflowBits > 0) - m_vector[vOffset + 1] |= (static_cast<uint32_t>(value) >> (bits - overflowBits)); - - m_offset += bits; - } - - inline void Flush(uint8_t* output) - { - assert(m_offset == 128); - - for (int v = 0; v < 4; v++) - { - uint32_t chunk = m_vector[v]; - for (int b = 0; b < 4; b++) - output[v * 4 + b] = static_cast<uint8_t>((chunk >> (b * 8)) & 0xff); - } - } - }; - - - struct UnpackingVector - { - uint32_t m_vector[4]; - - void Init(const uint8_t *bytes) - { - for (int i = 0; i < 4; i++) - m_vector[i] = 0; - - for (int b = 0; b < 16; b++) - m_vector[b / 4] |= (bytes[b] << ((b % 4) * 8)); - } - - inline ParallelMath::ScalarUInt16 Unpack(int bits) - { - uint32_t bitMask = (1 << bits) - 1; - - ParallelMath::ScalarUInt16 result = static_cast<ParallelMath::ScalarUInt16>(m_vector[0] & bitMask); - - for (int i = 0; i < 4; i++) - { - m_vector[i] >>= bits; - if (i != 3) - m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - bits); - } - - return result; - } - }; - - void ComputeTweakFactors(int tweak, int range, float *outFactors) - { - int totalUnits = range - 1; - int minOutsideUnits = ((tweak >> 1) & 1); - int maxOutsideUnits = (tweak & 1); - int insideUnits = totalUnits - minOutsideUnits - maxOutsideUnits; - - outFactors[0] = -static_cast<float>(minOutsideUnits) / static_cast<float>(insideUnits); - outFactors[1] = static_cast<float>(maxOutsideUnits) / static_cast<float>(insideUnits) + 1.0f; - } - - ParallelMath::Float ScaleHDRValue(const ParallelMath::Float &v, bool isSigned) - { - if (isSigned) - { - ParallelMath::Float offset = ParallelMath::Select(ParallelMath::Less(v, ParallelMath::MakeFloatZero()), ParallelMath::MakeFloat(-30.0f), ParallelMath::MakeFloat(30.0f)); - return (v * 32.0f + offset) / 31.0f; - } - else - return (v * 64.0f + 30.0f) / 31.0f; - } - - ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v) - { -#ifdef CVTT_ENABLE_ASSERTS - for (int i = 0; i < ParallelMath::ParallelSize; i++) - assert(ParallelMath::Extract(v, i) != -32768) -#endif - - ParallelMath::Int16CompFlag negative = ParallelMath::Less(v, ParallelMath::MakeSInt16(0)); - ParallelMath::UInt15 absComp = ParallelMath::LosslessCast<ParallelMath::UInt15>::Cast(ParallelMath::Select(negative, ParallelMath::SInt16(ParallelMath::MakeSInt16(0) - v), v)); - - ParallelMath::UInt31 multiplied = ParallelMath::XMultiply(absComp, ParallelMath::MakeUInt15(31)); - ParallelMath::UInt31 shifted = ParallelMath::RightShift(multiplied, 5); - ParallelMath::UInt15 absCompScaled = ParallelMath::ToUInt15(shifted); - ParallelMath::SInt16 signBits = ParallelMath::SelectOrZero(negative, ParallelMath::MakeSInt16(-32768)); - - return ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(absCompScaled) | signBits; - } - - ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v) - { - return ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(v, ParallelMath::MakeUInt15(31)), 6)); - } - - void UnscaleHDREndpoints(const ParallelMath::AInt16 inEP[2][3], ParallelMath::AInt16 outEP[2][3], bool isSigned) - { - for (int epi = 0; epi < 2; epi++) - { - for (int ch = 0; ch < 3; ch++) - { - if (isSigned) - outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueSigned(ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(inEP[epi][ch]))); - else - outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::LosslessCast<ParallelMath::UInt16>::Cast(inEP[epi][ch]))); - } - } - } - - template<int TVectorSize> - class UnfinishedEndpoints - { - public: - typedef ParallelMath::Float MFloat; - typedef ParallelMath::UInt16 MUInt16; - typedef ParallelMath::UInt15 MUInt15; - typedef ParallelMath::SInt16 MSInt16; - typedef ParallelMath::SInt32 MSInt32; - - UnfinishedEndpoints() - { - } - - UnfinishedEndpoints(const MFloat *base, const MFloat *offset) - { - for (int ch = 0; ch < TVectorSize; ch++) - m_base[ch] = base[ch]; - for (int ch = 0; ch < TVectorSize; ch++) - m_offset[ch] = offset[ch]; - } - - UnfinishedEndpoints(const UnfinishedEndpoints& other) - { - for (int ch = 0; ch < TVectorSize; ch++) - m_base[ch] = other.m_base[ch]; - for (int ch = 0; ch < TVectorSize; ch++) - m_offset[ch] = other.m_offset[ch]; - } - - void FinishHDRUnsigned(int tweak, int range, MSInt16 *outEP0, MSInt16 *outEP1, ParallelMath::RoundTowardNearestForScope *roundingMode) - { - float tweakFactors[2]; - ComputeTweakFactors(tweak, range, tweakFactors); - - for (int ch = 0; ch < TVectorSize; ch++) - { - MUInt15 channelEPs[2]; - for (int epi = 0; epi < 2; epi++) - { - MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], 0.0f, 31743.0f); - channelEPs[epi] = ParallelMath::RoundAndConvertToU15(f, roundingMode); - } - - outEP0[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[0]); - outEP1[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[1]); - } - } - - void FinishHDRSigned(int tweak, int range, MSInt16* outEP0, MSInt16* outEP1, ParallelMath::RoundTowardNearestForScope* roundingMode) - { - float tweakFactors[2]; - ComputeTweakFactors(tweak, range, tweakFactors); - - for (int ch = 0; ch < TVectorSize; ch++) - { - MSInt16 channelEPs[2]; - for (int epi = 0; epi < 2; epi++) - { - MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], -31743.0f, 31743.0f); - channelEPs[epi] = ParallelMath::RoundAndConvertToS16(f, roundingMode); - } - - outEP0[ch] = channelEPs[0]; - outEP1[ch] = channelEPs[1]; - } - } - - void FinishLDR(int tweak, int range, MUInt15* outEP0, MUInt15* outEP1) - { - ParallelMath::RoundTowardNearestForScope roundingMode; - - float tweakFactors[2]; - ComputeTweakFactors(tweak, range, tweakFactors); - - for (int ch = 0; ch < TVectorSize; ch++) - { - MFloat ep0f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[0], 0.0f, 255.0f); - MFloat ep1f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[1], 0.0f, 255.0f); - outEP0[ch] = ParallelMath::RoundAndConvertToU15(ep0f, &roundingMode); - outEP1[ch] = ParallelMath::RoundAndConvertToU15(ep1f, &roundingMode); - } - } - - template<int TNewVectorSize> - UnfinishedEndpoints<TNewVectorSize> ExpandTo(float filler) - { - MFloat newBase[TNewVectorSize]; - MFloat newOffset[TNewVectorSize]; - - for (int ch = 0; ch < TNewVectorSize && ch < TVectorSize; ch++) - { - newBase[ch] = m_base[ch]; - newOffset[ch] = m_offset[ch]; - } - - MFloat fillerV = ParallelMath::MakeFloat(filler); - - for (int ch = TVectorSize; ch < TNewVectorSize; ch++) - { - newBase[ch] = fillerV; - newOffset[ch] = ParallelMath::MakeFloatZero(); - } - - return UnfinishedEndpoints<TNewVectorSize>(newBase, newOffset); - } - - private: - MFloat m_base[TVectorSize]; - MFloat m_offset[TVectorSize]; - }; - - template<int TMatrixSize> - class PackedCovarianceMatrix - { - public: - // 0: xx, - // 1: xy, yy - // 3: xz, yz, zz - // 6: xw, yw, zw, ww - // ... etc. - static const int PyramidSize = (TMatrixSize * (TMatrixSize + 1)) / 2; - - typedef ParallelMath::Float MFloat; - - PackedCovarianceMatrix() - { - for (int i = 0; i < PyramidSize; i++) - m_values[i] = ParallelMath::MakeFloatZero(); - } - - void Add(const ParallelMath::Float *vec, const ParallelMath::Float &weight) - { - int index = 0; - for (int row = 0; row < TMatrixSize; row++) - { - for (int col = 0; col <= row; col++) - { - m_values[index] = m_values[index] + vec[row] * vec[col] * weight; - index++; - } - } - } - - void Product(MFloat *outVec, const MFloat *inVec) - { - for (int row = 0; row < TMatrixSize; row++) - { - MFloat sum = ParallelMath::MakeFloatZero(); - - int index = (row * (row + 1)) >> 1; - for (int col = 0; col < TMatrixSize; col++) - { - sum = sum + inVec[col] * m_values[index]; - if (col >= row) - index += col + 1; - else - index++; - } - - outVec[row] = sum; - } - } - - private: - ParallelMath::Float m_values[PyramidSize]; - }; - - static const int NumEndpointSelectorPasses = 3; - - template<int TVectorSize, int TIterationCount> - class EndpointSelector - { - public: - typedef ParallelMath::Float MFloat; - - EndpointSelector() - { - for (int ch = 0; ch < TVectorSize; ch++) - { - m_centroid[ch] = ParallelMath::MakeFloatZero(); - m_direction[ch] = ParallelMath::MakeFloatZero(); - } - m_weightTotal = ParallelMath::MakeFloatZero(); - m_minDist = ParallelMath::MakeFloat(FLT_MAX); - m_maxDist = ParallelMath::MakeFloat(-FLT_MAX); - } - - void ContributePass(const MFloat *value, int pass, const MFloat &weight) - { - if (pass == 0) - ContributeCentroid(value, weight); - else if (pass == 1) - ContributeDirection(value, weight); - else if (pass == 2) - ContributeMinMax(value); - } - - void FinishPass(int pass) - { - if (pass == 0) - FinishCentroid(); - else if (pass == 1) - FinishDirection(); - } - - UnfinishedEndpoints<TVectorSize> GetEndpoints(const float channelWeights[TVectorSize]) const - { - MFloat unweightedBase[TVectorSize]; - MFloat unweightedOffset[TVectorSize]; - - for (int ch = 0; ch < TVectorSize; ch++) - { - MFloat min = m_centroid[ch] + m_direction[ch] * m_minDist; - MFloat max = m_centroid[ch] + m_direction[ch] * m_maxDist; - - float safeWeight = channelWeights[ch]; - if (safeWeight == 0.f) - safeWeight = 1.0f; - - unweightedBase[ch] = min / channelWeights[ch]; - unweightedOffset[ch] = (max - min) / channelWeights[ch]; - } - - return UnfinishedEndpoints<TVectorSize>(unweightedBase, unweightedOffset); - } - - private: - void ContributeCentroid(const MFloat *value, const MFloat &weight) - { - for (int ch = 0; ch < TVectorSize; ch++) - m_centroid[ch] = m_centroid[ch] + value[ch] * weight; - m_weightTotal = m_weightTotal + weight; - } - - void FinishCentroid() - { - MFloat denom = m_weightTotal; - ParallelMath::MakeSafeDenominator(denom); - - for (int ch = 0; ch < TVectorSize; ch++) - m_centroid[ch] = m_centroid[ch] / denom; - } - - void ContributeDirection(const MFloat *value, const MFloat &weight) - { - MFloat diff[TVectorSize]; - for (int ch = 0; ch < TVectorSize; ch++) - diff[ch] = value[ch] - m_centroid[ch]; - - m_covarianceMatrix.Add(diff, weight); - } - - void FinishDirection() - { - MFloat approx[TVectorSize]; - for (int ch = 0; ch < TVectorSize; ch++) - approx[ch] = ParallelMath::MakeFloat(1.0f); - - for (int i = 0; i < TIterationCount; i++) - { - MFloat product[TVectorSize]; - m_covarianceMatrix.Product(product, approx); - - MFloat largestComponent = product[0]; - for (int ch = 1; ch < TVectorSize; ch++) - largestComponent = ParallelMath::Max(largestComponent, product[ch]); - - // product = largestComponent*newApprox - ParallelMath::MakeSafeDenominator(largestComponent); - for (int ch = 0; ch < TVectorSize; ch++) - approx[ch] = product[ch] / largestComponent; - } - - // Normalize - MFloat approxLen = ParallelMath::MakeFloatZero(); - for (int ch = 0; ch < TVectorSize; ch++) - approxLen = approxLen + approx[ch] * approx[ch]; - - approxLen = ParallelMath::Sqrt(approxLen); - - ParallelMath::MakeSafeDenominator(approxLen); - - for (int ch = 0; ch < TVectorSize; ch++) - m_direction[ch] = approx[ch] / approxLen; - } - - void ContributeMinMax(const MFloat *value) - { - MFloat dist = ParallelMath::MakeFloatZero(); - for (int ch = 0; ch < TVectorSize; ch++) - dist = dist + m_direction[ch] * (value[ch] - m_centroid[ch]); - - m_minDist = ParallelMath::Min(m_minDist, dist); - m_maxDist = ParallelMath::Max(m_maxDist, dist); - } - - ParallelMath::Float m_centroid[TVectorSize]; - ParallelMath::Float m_direction[TVectorSize]; - PackedCovarianceMatrix<TVectorSize> m_covarianceMatrix; - ParallelMath::Float m_weightTotal; - - ParallelMath::Float m_minDist; - ParallelMath::Float m_maxDist; - }; - - static const ParallelMath::UInt16 g_weightReciprocals[] = - { - ParallelMath::MakeUInt16(0), // -1 - ParallelMath::MakeUInt16(0), // 0 - ParallelMath::MakeUInt16(32768), // 1 - ParallelMath::MakeUInt16(16384), // 2 - ParallelMath::MakeUInt16(10923), // 3 - ParallelMath::MakeUInt16(8192), // 4 - ParallelMath::MakeUInt16(6554), // 5 - ParallelMath::MakeUInt16(5461), // 6 - ParallelMath::MakeUInt16(4681), // 7 - ParallelMath::MakeUInt16(4096), // 8 - ParallelMath::MakeUInt16(3641), // 9 - ParallelMath::MakeUInt16(3277), // 10 - ParallelMath::MakeUInt16(2979), // 11 - ParallelMath::MakeUInt16(2731), // 12 - ParallelMath::MakeUInt16(2521), // 13 - ParallelMath::MakeUInt16(2341), // 14 - ParallelMath::MakeUInt16(2185), // 15 - }; - - template<int TVectorSize> - class IndexSelector - { - public: - typedef ParallelMath::Float MFloat; - typedef ParallelMath::UInt16 MUInt16; - typedef ParallelMath::UInt15 MUInt15; - typedef ParallelMath::SInt16 MSInt16; - typedef ParallelMath::AInt16 MAInt16; - typedef ParallelMath::SInt32 MSInt32; - typedef ParallelMath::UInt31 MUInt31; - - template<class TInterpolationEPType, class TColorEPType> - void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range) - { - // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space. - // We need to select indexes using the color-space endpoints. - - m_isUniform = true; - for (int ch = 1; ch < TVectorSize; ch++) - { - if (channelWeights[ch] != channelWeights[0]) - m_isUniform = false; - } - - // To work with channel weights, we need something where: - // pxDiff = px - ep[0] - // epDiff = ep[1] - ep[0] - // - // weightedEPDiff = epDiff * channelWeights - // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff) - // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff) - // index = normalizedIndex * maxValue - // - // Equivalent to: - // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights) - // index = dot(axis, pxDiff) - - for (int ep = 0; ep < 2; ep++) - for (int ch = 0; ch < TVectorSize; ch++) - m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]); - - m_range = range; - m_maxValue = static_cast<float>(range - 1); - - MFloat epDiffWeighted[TVectorSize]; - for (int ch = 0; ch < TVectorSize; ch++) - { - m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]); - MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]); - epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch]; - } - - MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0]; - for (int ch = 1; ch < TVectorSize; ch++) - lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch]; - - ParallelMath::MakeSafeDenominator(lenSquared); - - MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared; - - for (int ch = 0; ch < TVectorSize; ch++) - m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared; - } - - template<bool TSigned> - void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range) - { - MAInt16 converted[2][TVectorSize]; - for (int epi = 0; epi < 2; epi++) - for (int ch = 0; ch < TVectorSize; ch++) - converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]); - - Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range); - } - - void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels) - { - MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); - - for (int ch = 0; ch < numRealChannels; ch++) - { - MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); - MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); - pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6)); - } - } - - void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels) - { - MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7)); - - for (int ch = 0; ch < numRealChannels; ch++) - { - MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); - MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); - pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8)); - } - } - - void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel) - { - ReconstructLDR_BC7(index, pixel, TVectorSize); - } - - void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel) - { - ReconstructLDRPrecise(index, pixel, TVectorSize); - } - - MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const - { - MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0]; - for (int ch = 1; ch < TVectorSize; ch++) - dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch]; - - return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn); - } - - protected: - MAInt16 m_endPoint[2][TVectorSize]; - - private: - MFloat m_origin[TVectorSize]; - MFloat m_axis[TVectorSize]; - int m_range; - float m_maxValue; - bool m_isUniform; - }; - - - template<int TVectorSize> - class IndexSelectorHDR : public IndexSelector<TVectorSize> - { - public: - typedef ParallelMath::UInt15 MUInt15; - typedef ParallelMath::UInt16 MUInt16; - typedef ParallelMath::UInt31 MUInt31; - typedef ParallelMath::SInt16 MSInt16; - typedef ParallelMath::SInt32 MSInt32; - typedef ParallelMath::Float MFloat; - - private: - - MUInt15 InvertSingle(const MUInt15& anIndex) const - { - MUInt15 inverted = m_maxValueMinusOne - anIndex; - return ParallelMath::Select(m_isInverted, inverted, anIndex); - } - - void ReconstructHDRSignedUninverted(const MUInt15 &index, MSInt16* pixel) const - { - MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); - - for (int ch = 0; ch < TVectorSize; ch++) - { - MSInt16 ep0 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[0][ch]); - MSInt16 ep1 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[1][ch]); - - MSInt32 pixel32 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1); - - pixel32 = ParallelMath::RightShift(pixel32 + ParallelMath::MakeSInt32(32), 6); - - pixel[ch] = UnscaleHDRValueSigned(ParallelMath::ToSInt16(pixel32)); - } - } - - void ReconstructHDRUnsignedUninverted(const MUInt15 &index, MSInt16* pixel) const - { - MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); - - for (int ch = 0; ch < TVectorSize; ch++) - { - MUInt16 ep0 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[0][ch]); - MUInt16 ep1 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[1][ch]); - - MUInt31 pixel31 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1); - - pixel31 = ParallelMath::RightShift(pixel31 + ParallelMath::MakeUInt31(32), 6); - - pixel[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::ToUInt16(pixel31))); - } - } - - MFloat ErrorForInterpolatorComponent(int index, int ch, const MFloat *pixel) const - { - MFloat diff = pixel[ch] - m_reconstructedInterpolators[index][ch]; - return diff * diff; - } - - MFloat ErrorForInterpolator(int index, const MFloat *pixel) const - { - MFloat error = ErrorForInterpolatorComponent(index, 0, pixel); - for (int ch = 1; ch < TVectorSize; ch++) - error = error + ErrorForInterpolatorComponent(index, ch, pixel); - return error; - } - - public: - - void InitHDR(int range, bool isSigned, bool fastIndexing, const float *channelWeights) - { - assert(range <= 16); - - m_range = range; - - m_isInverted = ParallelMath::MakeBoolInt16(false); - m_maxValueMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(range - 1)); - - if (!fastIndexing) - { - for (int i = 0; i < range; i++) - { - MSInt16 recon2CL[TVectorSize]; - - if (isSigned) - ReconstructHDRSignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL); - else - ReconstructHDRUnsignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL); - - for (int ch = 0; ch < TVectorSize; ch++) - m_reconstructedInterpolators[i][ch] = ParallelMath::TwosCLHalfToFloat(recon2CL[ch]) * channelWeights[ch]; - } - } - } - - void ReconstructHDRSigned(const MUInt15 &index, MSInt16* pixel) const - { - ReconstructHDRSignedUninverted(InvertSingle(index), pixel); - } - - void ReconstructHDRUnsigned(const MUInt15 &index, MSInt16* pixel) const - { - ReconstructHDRUnsignedUninverted(InvertSingle(index), pixel); - } - - void ConditionalInvert(const ParallelMath::Int16CompFlag &invert) - { - m_isInverted = invert; - } - - MUInt15 SelectIndexHDRSlow(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope*) const - { - MUInt15 index = ParallelMath::MakeUInt15(0); - - MFloat bestError = ErrorForInterpolator(0, pixel); - for (int i = 1; i < m_range; i++) - { - MFloat error = ErrorForInterpolator(i, pixel); - ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError); - ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(static_cast<uint16_t>(i))); - bestError = ParallelMath::Min(bestError, error); - } - - return InvertSingle(index); - } - - MUInt15 SelectIndexHDRFast(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const - { - return InvertSingle(this->SelectIndexLDR(pixel, rtn)); - } - - private: - MFloat m_reconstructedInterpolators[16][TVectorSize]; - ParallelMath::Int16CompFlag m_isInverted; - MUInt15 m_maxValueMinusOne; - int m_range; - }; - - // Solve for a, b where v = a*t + b - // This allows endpoints to be mapped to where T=0 and T=1 - // Least squares from totals: - // a = (tv - t*v/w)/(tt - t*t/w) - // b = (v - a*t)/w - template<int TVectorSize> - class EndpointRefiner - { - public: - typedef ParallelMath::Float MFloat; - typedef ParallelMath::UInt16 MUInt16; - typedef ParallelMath::UInt15 MUInt15; - typedef ParallelMath::AInt16 MAInt16; - typedef ParallelMath::SInt16 MSInt16; - typedef ParallelMath::SInt32 MSInt32; - - MFloat m_tv[TVectorSize]; - MFloat m_v[TVectorSize]; - MFloat m_tt; - MFloat m_t; - MFloat m_w; - int m_wu; - - float m_rcpMaxIndex; - float m_channelWeights[TVectorSize]; - float m_rcpChannelWeights[TVectorSize]; - - void Init(int indexRange, const float channelWeights[TVectorSize]) - { - for (int ch = 0; ch < TVectorSize; ch++) - { - m_tv[ch] = ParallelMath::MakeFloatZero(); - m_v[ch] = ParallelMath::MakeFloatZero(); - } - m_tt = ParallelMath::MakeFloatZero(); - m_t = ParallelMath::MakeFloatZero(); - m_w = ParallelMath::MakeFloatZero(); - - m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1); - - for (int ch = 0; ch < TVectorSize; ch++) - { - m_channelWeights[ch] = channelWeights[ch]; - m_rcpChannelWeights[ch] = 1.0f; - if (m_channelWeights[ch] != 0.0f) - m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch]; - } - - m_wu = 0; - } - - void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight) - { - MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex; - - for (int ch = 0; ch < TVectorSize; ch++) - { - MFloat v = pwFloatPixel[ch] * weight; - - m_tv[ch] = m_tv[ch] + t * v; - m_v[ch] = m_v[ch] + v; - } - m_tt = m_tt + weight * t * t; - m_t = m_t + weight * t; - m_w = m_w + weight; - } - - void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels) - { - MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex; - - for (int ch = 0; ch < numRealChannels; ch++) - { - MFloat v = pwFloatPixel[ch]; - - m_tv[ch] = m_tv[ch] + t * v; - m_v[ch] = m_v[ch] + v; - } - m_tt = m_tt + t * t; - m_t = m_t + t; - m_wu++; - } - - void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index) - { - ContributeUnweightedPW(floatPixel, index, TVectorSize); - } - - void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize]) - { - // a = (tv - t*v/w)/(tt - t*t/w) - // b = (v - a*t)/w - MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu)); - - ParallelMath::MakeSafeDenominator(w); - MFloat wRcp = ParallelMath::Reciprocal(w); - - MFloat adenom = (m_tt * w - m_t * m_t) * wRcp; - - ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero()); - ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f)); - - for (int ch = 0; ch < TVectorSize; ch++) - { - /* - if (adenom == 0.0) - p1 = p2 = er.v / er.w; - else - { - float4 a = (er.tv - er.t*er.v / er.w) / adenom; - float4 b = (er.v - a * er.t) / er.w; - p1 = b; - p2 = a + b; - } - */ - - MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom; - MFloat b = (m_v[ch] - a * m_t) * wRcp; - - MFloat p1 = b; - MFloat p2 = a + b; - - ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp)); - ParallelMath::ConditionalSet(p2, adenomZero, p1); - - // Unweight - float inverseWeight = m_rcpChannelWeights[ch]; - - endPoint[0][ch] = p1 * inverseWeight; - endPoint[1][ch] = p2 * inverseWeight; - } - } - - void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - MFloat floatEndPoint[2][TVectorSize]; - GetRefinedEndpoints(floatEndPoint); - - for (int epi = 0; epi < 2; epi++) - for (int ch = 0; ch < TVectorSize; ch++) - endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode); - } - - void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode); - } - - void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - MFloat floatEndPoint[2][TVectorSize]; - GetRefinedEndpoints(floatEndPoint); - - for (int epi = 0; epi < 2; epi++) - { - for (int ch = 0; ch < TVectorSize; ch++) - { - MFloat f = floatEndPoint[epi][ch]; - if (isSigned) - endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode)); - else - endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode)); - } - } - } - }; - - template<int TVectorSize> - class AggregatedError - { - public: - typedef ParallelMath::UInt16 MUInt16; - typedef ParallelMath::UInt31 MUInt31; - typedef ParallelMath::Float MFloat; - - AggregatedError() - { - for (int ch = 0; ch < TVectorSize; ch++) - m_errorUnweighted[ch] = ParallelMath::MakeUInt31(0); - } - - void Add(const MUInt16 &channelErrorUnweighted, int ch) - { - m_errorUnweighted[ch] = m_errorUnweighted[ch] + ParallelMath::ToUInt31(channelErrorUnweighted); - } - - MFloat Finalize(uint32_t flags, const float channelWeightsSq[TVectorSize]) const - { - if (flags & cvtt::Flags::Uniform) - { - MUInt31 total = m_errorUnweighted[0]; - for (int ch = 1; ch < TVectorSize; ch++) - total = total + m_errorUnweighted[ch]; - return ParallelMath::ToFloat(total); - } - else - { - MFloat total = ParallelMath::ToFloat(m_errorUnweighted[0]) * channelWeightsSq[0]; - for (int ch = 1; ch < TVectorSize; ch++) - total = total + ParallelMath::ToFloat(m_errorUnweighted[ch]) * channelWeightsSq[ch]; - return total; - } - } - - private: - MUInt31 m_errorUnweighted[TVectorSize]; - }; - - class BCCommon - { - public: - typedef ParallelMath::Float MFloat; - typedef ParallelMath::UInt16 MUInt16; - typedef ParallelMath::UInt15 MUInt15; - typedef ParallelMath::AInt16 MAInt16; - typedef ParallelMath::SInt16 MSInt16; - typedef ParallelMath::SInt32 MSInt32; - - static int TweakRoundsForRange(int range) - { - if (range == 3) - return 3; - return 4; - } - - template<int TVectorSize> - static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, AggregatedError<TVectorSize> &aggError) - { - for (int ch = 0; ch < numRealChannels; ch++) - aggError.Add(ParallelMath::SqDiffUInt8(reconstructed[ch], original[ch]), ch); - } - - template<int TVectorSize> - static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], AggregatedError<TVectorSize> &aggError) - { - ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, TVectorSize, aggError); - } - - template<int TVectorSize> - static MFloat ComputeErrorLDRSimple(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, const float *channelWeightsSq) - { - AggregatedError<TVectorSize> aggError; - ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, numRealChannels, aggError); - return aggError.Finalize(flags, channelWeightsSq); - } - - template<int TVectorSize> - static MFloat ComputeErrorHDRFast(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize]) - { - MFloat error = ParallelMath::MakeFloatZero(); - if (flags & Flags::Uniform) - { - for (int ch = 0; ch < TVectorSize; ch++) - error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]); - } - else - { - for (int ch = 0; ch < TVectorSize; ch++) - error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]); - } - - return error; - } - - template<int TVectorSize> - static MFloat ComputeErrorHDRSlow(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize]) - { - MFloat error = ParallelMath::MakeFloatZero(); - if (flags & Flags::Uniform) - { - for (int ch = 0; ch < TVectorSize; ch++) - error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]); - } - else - { - for (int ch = 0; ch < TVectorSize; ch++) - error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]); - } - - return error; - } - - template<int TChannelCount> - static void PreWeightPixelsLDR(MFloat preWeightedPixels[16][TChannelCount], const MUInt15 pixels[16][TChannelCount], const float channelWeights[TChannelCount]) - { - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < TChannelCount; ch++) - preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch]; - } - } - - template<int TChannelCount> - static void PreWeightPixelsHDR(MFloat preWeightedPixels[16][TChannelCount], const MSInt16 pixels[16][TChannelCount], const float channelWeights[TChannelCount]) - { - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < TChannelCount; ch++) - preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch]; - } - } - }; - - class BC7Computer - { - public: - static const int MaxTweakRounds = 4; - - typedef ParallelMath::SInt16 MSInt16; - typedef ParallelMath::UInt15 MUInt15; - typedef ParallelMath::UInt16 MUInt16; - typedef ParallelMath::SInt32 MSInt32; - typedef ParallelMath::Float MFloat; - - struct WorkInfo - { - MUInt15 m_mode; - MFloat m_error; - MUInt15 m_ep[3][2][4]; - MUInt15 m_indexes[16]; - MUInt15 m_indexes2[16]; - - union - { - MUInt15 m_partition; - struct IndexSelectorAndRotation - { - MUInt15 m_indexSelector; - MUInt15 m_rotation; - } m_isr; - } m_u; - }; - - static void TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2]) - { - ParallelMath::RoundTowardNearestForScope roundingMode; - - float tf[2]; - ComputeTweakFactors(tweak, range, tf); - - MFloat base = ParallelMath::ToFloat(original[0]); - MFloat offs = ParallelMath::ToFloat(original[1]) - base; - - result[0] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[0], 0.0f, 255.0f), &roundingMode); - result[1] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[1], 0.0f, 255.0f), &roundingMode); - } - - static void Quantize(MUInt15* color, int bits, int channels, const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - float maxColor = static_cast<float>((1 << bits) - 1); - - for (int i = 0; i < channels; i++) - color[i] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(ParallelMath::ToFloat(color[i]) * ParallelMath::MakeFloat(1.0f / 255.0f) * maxColor, 0.f, 255.f), roundingMode); - } - - static void QuantizeP(MUInt15* color, int bits, uint16_t p, int channels, const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - uint16_t pShift = static_cast<uint16_t>(1 << (7 - bits)); - MUInt15 pShiftV = ParallelMath::MakeUInt15(pShift); - - float maxColorF = static_cast<float>(255 - (1 << (7 - bits))); - - float maxQuantized = static_cast<float>((1 << bits) - 1); - - for (int ch = 0; ch < channels; ch++) - { - MUInt15 clr = color[ch]; - if (p) - clr = ParallelMath::Max(clr, pShiftV) - pShiftV; - - MFloat rerangedColor = ParallelMath::ToFloat(clr) * maxQuantized / maxColorF; - - clr = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(rerangedColor, 0.0f, maxQuantized), roundingMode) << 1; - if (p) - clr = clr | ParallelMath::MakeUInt15(1); - - color[ch] = clr; - } - } - - static void Unquantize(MUInt15* color, int bits, int channels) - { - for (int ch = 0; ch < channels; ch++) - { - MUInt15 clr = color[ch]; - clr = clr << (8 - bits); - color[ch] = clr | ParallelMath::RightShift(clr, bits); - } - } - - static void CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2], const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - for (int j = 0; j < 2; j++) - { - QuantizeP(ep[j], 4, p[j], 3, roundingMode); - Unquantize(ep[j], 5, 3); - ep[j][3] = ParallelMath::MakeUInt15(255); - } - } - - static void CompressEndpoints1(MUInt15 ep[2][4], uint16_t p, const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - for (int j = 0; j < 2; j++) - { - QuantizeP(ep[j], 6, p, 3, roundingMode); - Unquantize(ep[j], 7, 3); - ep[j][3] = ParallelMath::MakeUInt15(255); - } - } - - static void CompressEndpoints2(MUInt15 ep[2][4], const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - for (int j = 0; j < 2; j++) - { - Quantize(ep[j], 5, 3, roundingMode); - Unquantize(ep[j], 5, 3); - ep[j][3] = ParallelMath::MakeUInt15(255); - } - } - - static void CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2], const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - for (int j = 0; j < 2; j++) - { - QuantizeP(ep[j], 7, p[j], 3, roundingMode); - ep[j][3] = ParallelMath::MakeUInt15(255); - } - } - - static void CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2], const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - for (int j = 0; j < 2; j++) - { - Quantize(epRGB[j], 5, 3, roundingMode); - Unquantize(epRGB[j], 5, 3); - - Quantize(epA + j, 6, 1, roundingMode); - Unquantize(epA + j, 6, 1); - } - } - - static void CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2], const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - for (int j = 0; j < 2; j++) - { - Quantize(epRGB[j], 7, 3, roundingMode); - Unquantize(epRGB[j], 7, 3); - } - - // Alpha is full precision - (void)epA; - } - - static void CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2], const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - for (int j = 0; j < 2; j++) - QuantizeP(ep[j], 7, p[j], 4, roundingMode); - } - - static void CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2], const ParallelMath::RoundTowardNearestForScope *roundingMode) - { - for (int j = 0; j < 2; j++) - { - QuantizeP(ep[j], 5, p[j], 4, roundingMode); - Unquantize(ep[j], 6, 4); - } - } - - struct SinglePlaneTemporaries - { - UnfinishedEndpoints<3> unfinishedRGB[BC7Data::g_numShapesAll]; - UnfinishedEndpoints<4> unfinishedRGBA[BC7Data::g_numShapes12]; - - MUInt15 fragmentBestIndexes[BC7Data::g_numFragments]; - MUInt15 shapeBestEP[BC7Data::g_maxFragmentsPerMode][2][4]; - MFloat shapeBestError[BC7Data::g_maxFragmentsPerMode]; - }; - - static void TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn) - { - MFloat bestAverageError = ParallelMath::MakeFloat(FLT_MAX); - - MUInt15 intAverage[4]; - for (int ch = 0; ch < 4; ch++) - intAverage[ch] = ParallelMath::RoundAndConvertToU15(average[ch], rtn); - - MUInt15 eps[2][4]; - MUInt15 reconstructed[4]; - MUInt15 index = ParallelMath::MakeUInt15(0); - - for (int epi = 0; epi < 2; epi++) - { - for (int ch = 0; ch < 3; ch++) - eps[epi][ch] = ParallelMath::MakeUInt15(0); - eps[epi][3] = ParallelMath::MakeUInt15(255); - } - - for (int ch = 0; ch < 3; ch++) - reconstructed[ch] = ParallelMath::MakeUInt15(0); - reconstructed[3] = ParallelMath::MakeUInt15(255); - - // Depending on the target index and parity bits, there are multiple valid solid colors. - // We want to find the one closest to the actual average. - MFloat epsAverageDiff = ParallelMath::MakeFloat(FLT_MAX); - for (int t = 0; t < numTables; t++) - { - const cvtt::Tables::BC7SC::Table& table = *(tables[t]); - - ParallelMath::Int16CompFlag pti = punchThroughInvalid[table.m_pBits]; - - MUInt15 candidateReconstructed[4]; - MUInt15 candidateEPs[2][4]; - - for (int i = 0; i < ParallelMath::ParallelSize; i++) - { - for (int ch = 0; ch < numRealChannels; ch++) - { - ParallelMath::ScalarUInt16 avgValue = ParallelMath::Extract(intAverage[ch], i); - assert(avgValue >= 0 && avgValue <= 255); - - const cvtt::Tables::BC7SC::TableEntry &entry = table.m_entries[avgValue]; - - ParallelMath::PutUInt15(candidateEPs[0][ch], i, entry.m_min); - ParallelMath::PutUInt15(candidateEPs[1][ch], i, entry.m_max); - ParallelMath::PutUInt15(candidateReconstructed[ch], i, entry.m_actualColor); - } - } - - MFloat avgError = ParallelMath::MakeFloatZero(); - for (int ch = 0; ch < numRealChannels; ch++) - { - MFloat delta = ParallelMath::ToFloat(candidateReconstructed[ch]) - average[ch]; - avgError = avgError + delta * delta * channelWeightsSq[ch]; - } - - ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(avgError, bestAverageError)); - better = ParallelMath::AndNot(pti, better); // Mask out punch-through invalidations - - if (ParallelMath::AnySet(better)) - { - ParallelMath::ConditionalSet(bestAverageError, ParallelMath::Int16FlagToFloat(better), avgError); - - MUInt15 candidateIndex = ParallelMath::MakeUInt15(table.m_index); - - ParallelMath::ConditionalSet(index, better, candidateIndex); - - for (int ch = 0; ch < numRealChannels; ch++) - ParallelMath::ConditionalSet(reconstructed[ch], better, candidateReconstructed[ch]); - - for (int epi = 0; epi < 2; epi++) - for (int ch = 0; ch < numRealChannels; ch++) - ParallelMath::ConditionalSet(eps[epi][ch], better, candidateEPs[epi][ch]); - } - } - - AggregatedError<4> aggError; - for (int pxi = 0; pxi < shapeLength; pxi++) - { - int px = fragmentStart[pxi]; - - BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError); - } - - MFloat error = aggError.Finalize(flags, channelWeightsSq) + staticAlphaError; - - ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, shapeBestError)); - if (ParallelMath::AnySet(better)) - { - shapeBestError = ParallelMath::Min(shapeBestError, error); - for (int epi = 0; epi < 2; epi++) - { - for (int ch = 0; ch < numRealChannels; ch++) - ParallelMath::ConditionalSet(shapeBestEP[epi][ch], better, eps[epi][ch]); - } - - for (int pxi = 0; pxi < shapeLength; pxi++) - ParallelMath::ConditionalSet(fragmentBestIndexes[pxi], better, index); - } - } - - - static void TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], int numTweakRounds, int numRefineRounds, WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn) - { - if (numRefineRounds < 1) - numRefineRounds = 1; - - if (numTweakRounds < 1) - numTweakRounds = 1; - else if (numTweakRounds > MaxTweakRounds) - numTweakRounds = MaxTweakRounds; - - float channelWeightsSq[4]; - - for (int ch = 0; ch < 4; ch++) - channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; - - SinglePlaneTemporaries temps; - - MUInt15 maxAlpha = ParallelMath::MakeUInt15(0); - MUInt15 minAlpha = ParallelMath::MakeUInt15(255); - ParallelMath::Int16CompFlag isPunchThrough = ParallelMath::MakeBoolInt16(true); - for (int px = 0; px < 16; px++) - { - MUInt15 a = pixels[px][3]; - maxAlpha = ParallelMath::Max(maxAlpha, a); - minAlpha = ParallelMath::Min(minAlpha, a); - - isPunchThrough = (isPunchThrough & (ParallelMath::Equal(a, ParallelMath::MakeUInt15(0)) | ParallelMath::Equal(a, ParallelMath::MakeUInt15(255)))); - } - - ParallelMath::Int16CompFlag blockHasNonMaxAlpha = ParallelMath::Less(minAlpha, ParallelMath::MakeUInt15(255)); - ParallelMath::Int16CompFlag blockHasNonZeroAlpha = ParallelMath::Less(ParallelMath::MakeUInt15(0), maxAlpha); - - bool anyBlockHasAlpha = ParallelMath::AnySet(blockHasNonMaxAlpha); - - // Try RGB modes if any block has a min alpha 251 or higher - bool allowRGBModes = ParallelMath::AnySet(ParallelMath::Less(ParallelMath::MakeUInt15(250), minAlpha)); - - // Try mode 7 if any block has alpha. - // Mode 7 is almost never selected for RGB blocks because mode 4 has very accurate 7.7.7.1 endpoints - // and its parity bit doesn't affect alpha, meaning mode 7 can only be better in extremely specific - // situations, and only by at most 1 unit of error per pixel. - bool allowMode7 = anyBlockHasAlpha; - - MFloat preWeightedPixels[16][4]; - - BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights); - - const int *rgbInitialEPCollapseList = NULL; - - // Get initial RGB endpoints - if (allowRGBModes) - { - const int *shapeList; - int numShapesToEvaluate; - - if (flags & Flags::BC7_EnablePartitioning) - { - if (flags & Flags::BC7_Enable3Subsets) - { - shapeList = BC7Data::g_shapeListAll; - rgbInitialEPCollapseList = BC7Data::g_shapeListAll; - numShapesToEvaluate = BC7Data::g_numShapesAll; - } - else - { - shapeList = BC7Data::g_shapeList12; - rgbInitialEPCollapseList = BC7Data::g_shapeList12Collapse; - numShapesToEvaluate = BC7Data::g_numShapes12; - } - } - else - { - shapeList = BC7Data::g_shapeList1; - rgbInitialEPCollapseList = BC7Data::g_shapeList1Collapse; - numShapesToEvaluate = BC7Data::g_numShapes1; - } - - for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++) - { - int shape = shapeList[shapeIter]; - - int shapeStart = BC7Data::g_shapeRanges[shape][0]; - int shapeSize = BC7Data::g_shapeRanges[shape][1]; - - EndpointSelector<3, 8> epSelector; - - for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) - { - for (int spx = 0; spx < shapeSize; spx++) - { - int px = BC7Data::g_fragments[shapeStart + spx]; - epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f)); - } - epSelector.FinishPass(epPass); - } - temps.unfinishedRGB[shapeIter] = epSelector.GetEndpoints(channelWeights); - } - } - - const int *rgbaInitialEPCollapseList = BC7Data::g_shapeList12Collapse; - - // Get initial RGBA endpoints - { - const int *shapeList = BC7Data::g_shapeList12; - int numShapesToEvaluate = BC7Data::g_numShapes12; - - for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++) - { - int shape = shapeList[shapeIter]; - - if (anyBlockHasAlpha || !allowRGBModes) - { - int shapeStart = BC7Data::g_shapeRanges[shape][0]; - int shapeSize = BC7Data::g_shapeRanges[shape][1]; - - EndpointSelector<4, 8> epSelector; - - for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) - { - for (int spx = 0; spx < shapeSize; spx++) - { - int px = BC7Data::g_fragments[shapeStart + spx]; - epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f)); - } - epSelector.FinishPass(epPass); - } - temps.unfinishedRGBA[shapeIter] = epSelector.GetEndpoints(channelWeights); - } - else - { - temps.unfinishedRGBA[shapeIter] = temps.unfinishedRGB[rgbInitialEPCollapseList[shape]].ExpandTo<4>(255); - } - } - } - - for (uint16_t mode = 0; mode <= 7; mode++) - { - if (!(flags & Flags::BC7_EnablePartitioning) && BC7Data::g_modes[mode].m_numSubsets != 1) - continue; - - if (!(flags & Flags::BC7_Enable3Subsets) && BC7Data::g_modes[mode].m_numSubsets == 3) - continue; - - if (mode == 4 || mode == 5) - continue; - - if (mode < 4 && !allowRGBModes) - continue; - - if (mode == 7 && !allowMode7) - continue; - - bool isRGB = (mode < 4); - - unsigned int numPartitions = 1 << BC7Data::g_modes[mode].m_partitionBits; - int numSubsets = BC7Data::g_modes[mode].m_numSubsets; - int indexPrec = BC7Data::g_modes[mode].m_indexBits; - - int parityBitMax = 1; - if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerEndpoint) - parityBitMax = 4; - else if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerSubset) - parityBitMax = 2; - - int numRealChannels = isRGB ? 3 : 4; - - int numShapes; - const int *shapeList; - const int *shapeCollapseList; - - if (numSubsets == 1) - { - numShapes = BC7Data::g_numShapes1; - shapeList = BC7Data::g_shapeList1; - shapeCollapseList = BC7Data::g_shapeList1Collapse; - } - else if (numSubsets == 2) - { - numShapes = BC7Data::g_numShapes2; - shapeList = BC7Data::g_shapeList2; - shapeCollapseList = BC7Data::g_shapeList2Collapse; - } - else - { - assert(numSubsets == 3); - if (numPartitions == 16) - { - numShapes = BC7Data::g_numShapes3Short; - shapeList = BC7Data::g_shapeList3Short; - shapeCollapseList = BC7Data::g_shapeList3ShortCollapse; - } - else - { - assert(numPartitions == 64); - numShapes = BC7Data::g_numShapes3; - shapeList = BC7Data::g_shapeList3; - shapeCollapseList = BC7Data::g_shapeList3Collapse; - } - } - - for (int slot = 0; slot < BC7Data::g_maxFragmentsPerMode; slot++) - temps.shapeBestError[slot] = ParallelMath::MakeFloat(FLT_MAX); - - for (int shapeIter = 0; shapeIter < numShapes; shapeIter++) - { - int shape = shapeList[shapeIter]; - int shapeStart = BC7Data::g_shapeRanges[shape][0]; - int shapeLength = BC7Data::g_shapeRanges[shape][1]; - int shapeCollapsedEvalIndex = shapeCollapseList[shape]; - - AggregatedError<1> alphaAggError; - if (isRGB && anyBlockHasAlpha) - { - MUInt15 filledAlpha[1] = { ParallelMath::MakeUInt15(255) }; - - for (int pxi = 0; pxi < shapeLength; pxi++) - { - int px = BC7Data::g_fragments[shapeStart + pxi]; - MUInt15 original[1] = { pixels[px][3] }; - BCCommon::ComputeErrorLDR<1>(flags, filledAlpha, original, alphaAggError); - } - } - - float alphaWeightsSq[1] = { channelWeightsSq[3] }; - MFloat staticAlphaError = alphaAggError.Finalize(flags, alphaWeightsSq); - - assert(shapeCollapsedEvalIndex >= 0); - - MUInt15 tweakBaseEP[MaxTweakRounds][2][4]; - - for (int tweak = 0; tweak < numTweakRounds; tweak++) - { - if (isRGB) - { - temps.unfinishedRGB[rgbInitialEPCollapseList[shape]].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]); - tweakBaseEP[tweak][0][3] = tweakBaseEP[tweak][1][3] = ParallelMath::MakeUInt15(255); - } - else - { - temps.unfinishedRGBA[rgbaInitialEPCollapseList[shape]].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]); - } - } - - ParallelMath::Int16CompFlag punchThroughInvalid[4]; - for (int pIter = 0; pIter < parityBitMax; pIter++) - { - punchThroughInvalid[pIter] = ParallelMath::MakeBoolInt16(false); - - if ((flags & Flags::BC7_RespectPunchThrough) && (mode == 6 || mode == 7)) - { - // Modes 6 and 7 have parity bits that affect alpha - if (pIter == 0) - punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonZeroAlpha); - else if (pIter == parityBitMax - 1) - punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonMaxAlpha); - else - punchThroughInvalid[pIter] = isPunchThrough; - } - } - - for (int pIter = 0; pIter < parityBitMax; pIter++) - { - if (ParallelMath::AllSet(punchThroughInvalid[pIter])) - continue; - - bool needPunchThroughCheck = ParallelMath::AnySet(punchThroughInvalid[pIter]); - - for (int tweak = 0; tweak < numTweakRounds; tweak++) - { - uint16_t p[2]; - p[0] = (pIter & 1); - p[1] = ((pIter >> 1) & 1); - - MUInt15 ep[2][4]; - - for (int epi = 0; epi < 2; epi++) - for (int ch = 0; ch < 4; ch++) - ep[epi][ch] = tweakBaseEP[tweak][epi][ch]; - - for (int refine = 0; refine < numRefineRounds; refine++) - { - switch (mode) - { - case 0: - CompressEndpoints0(ep, p, rtn); - break; - case 1: - CompressEndpoints1(ep, p[0], rtn); - break; - case 2: - CompressEndpoints2(ep, rtn); - break; - case 3: - CompressEndpoints3(ep, p, rtn); - break; - case 6: - CompressEndpoints6(ep, p, rtn); - break; - case 7: - CompressEndpoints7(ep, p, rtn); - break; - default: - assert(false); - break; - }; - - MFloat shapeError = ParallelMath::MakeFloatZero(); - - IndexSelector<4> indexSelector; - indexSelector.Init<false>(channelWeights, ep, 1 << indexPrec); - - EndpointRefiner<4> epRefiner; - epRefiner.Init(1 << indexPrec, channelWeights); - - MUInt15 indexes[16]; - - AggregatedError<4> aggError; - for (int pxi = 0; pxi < shapeLength; pxi++) - { - int px = BC7Data::g_fragments[shapeStart + pxi]; - - MUInt15 index; - MUInt15 reconstructed[4]; - - index = indexSelector.SelectIndexLDR(floatPixels[px], rtn); - indexSelector.ReconstructLDR_BC7(index, reconstructed, numRealChannels); - - if (flags & cvtt::Flags::BC7_FastIndexing) - BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError); - else - { - MFloat error = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq); - - MUInt15 altIndexes[2]; - altIndexes[0] = ParallelMath::Max(index, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); - altIndexes[1] = ParallelMath::Min(index + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << indexPrec) - 1))); - - for (int ii = 0; ii < 2; ii++) - { - indexSelector.ReconstructLDR_BC7(altIndexes[ii], reconstructed, numRealChannels); - - MFloat altError = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq); - ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altError, error)); - error = ParallelMath::Min(error, altError); - ParallelMath::ConditionalSet(index, better, altIndexes[ii]); - } - - shapeError = shapeError + error; - } - - if (refine != numRefineRounds - 1) - epRefiner.ContributeUnweightedPW(preWeightedPixels[px], index, numRealChannels); - - indexes[pxi] = index; - } - - if (flags & cvtt::Flags::BC7_FastIndexing) - shapeError = aggError.Finalize(flags, channelWeightsSq); - - if (isRGB) - shapeError = shapeError + staticAlphaError; - - ParallelMath::FloatCompFlag shapeErrorBetter; - ParallelMath::Int16CompFlag shapeErrorBetter16; - - shapeErrorBetter = ParallelMath::Less(shapeError, temps.shapeBestError[shapeCollapsedEvalIndex]); - shapeErrorBetter16 = ParallelMath::FloatFlagToInt16(shapeErrorBetter); - - if (ParallelMath::AnySet(shapeErrorBetter16)) - { - bool punchThroughOK = true; - if (needPunchThroughCheck) - { - shapeErrorBetter16 = ParallelMath::AndNot(punchThroughInvalid[pIter], shapeErrorBetter16); - shapeErrorBetter = ParallelMath::Int16FlagToFloat(shapeErrorBetter16); - - if (!ParallelMath::AnySet(shapeErrorBetter16)) - punchThroughOK = false; - } - - if (punchThroughOK) - { - ParallelMath::ConditionalSet(temps.shapeBestError[shapeCollapsedEvalIndex], shapeErrorBetter, shapeError); - for (int epi = 0; epi < 2; epi++) - for (int ch = 0; ch < numRealChannels; ch++) - ParallelMath::ConditionalSet(temps.shapeBestEP[shapeCollapsedEvalIndex][epi][ch], shapeErrorBetter16, ep[epi][ch]); - - for (int pxi = 0; pxi < shapeLength; pxi++) - ParallelMath::ConditionalSet(temps.fragmentBestIndexes[shapeStart + pxi], shapeErrorBetter16, indexes[pxi]); - } - } - - if (refine != numRefineRounds - 1) - epRefiner.GetRefinedEndpointsLDR(ep, numRealChannels, rtn); - } // refine - } // tweak - } // p - - if (flags & cvtt::Flags::BC7_TrySingleColor) - { - MUInt15 total[4]; - for (int ch = 0; ch < 4; ch++) - total[ch] = ParallelMath::MakeUInt15(0); - - for (int pxi = 0; pxi < shapeLength; pxi++) - { - int px = BC7Data::g_fragments[shapeStart + pxi]; - for (int ch = 0; ch < 4; ch++) - total[ch] = total[ch] + pixels[pxi][ch]; - } - - MFloat rcpShapeLength = ParallelMath::MakeFloat(1.0f / static_cast<float>(shapeLength)); - MFloat average[4]; - for (int ch = 0; ch < 4; ch++) - average[ch] = ParallelMath::ToFloat(total[ch]) * rcpShapeLength; - - const uint8_t *fragment = BC7Data::g_fragments + shapeStart; - MFloat &shapeBestError = temps.shapeBestError[shapeCollapsedEvalIndex]; - MUInt15(&shapeBestEP)[2][4] = temps.shapeBestEP[shapeCollapsedEvalIndex]; - MUInt15 *fragmentBestIndexes = temps.fragmentBestIndexes + shapeStart; - - const cvtt::Tables::BC7SC::Table **scTables = NULL; - int numSCTables = 0; - - switch (mode) - { - case 0: - { - const cvtt::Tables::BC7SC::Table *tables[] = - { - &cvtt::Tables::BC7SC::g_mode0_p00_i1, - &cvtt::Tables::BC7SC::g_mode0_p00_i2, - &cvtt::Tables::BC7SC::g_mode0_p00_i3, - &cvtt::Tables::BC7SC::g_mode0_p01_i1, - &cvtt::Tables::BC7SC::g_mode0_p01_i2, - &cvtt::Tables::BC7SC::g_mode0_p01_i3, - &cvtt::Tables::BC7SC::g_mode0_p10_i1, - &cvtt::Tables::BC7SC::g_mode0_p10_i2, - &cvtt::Tables::BC7SC::g_mode0_p10_i3, - &cvtt::Tables::BC7SC::g_mode0_p11_i1, - &cvtt::Tables::BC7SC::g_mode0_p11_i2, - &cvtt::Tables::BC7SC::g_mode0_p11_i3, - }; - scTables = tables; - numSCTables = sizeof(tables) / sizeof(tables[0]); - } - break; - case 1: - { - const cvtt::Tables::BC7SC::Table *tables[] = - { - &cvtt::Tables::BC7SC::g_mode1_p0_i1, - &cvtt::Tables::BC7SC::g_mode1_p0_i2, - &cvtt::Tables::BC7SC::g_mode1_p0_i3, - &cvtt::Tables::BC7SC::g_mode1_p1_i1, - &cvtt::Tables::BC7SC::g_mode1_p1_i2, - &cvtt::Tables::BC7SC::g_mode1_p1_i3, - }; - scTables = tables; - numSCTables = sizeof(tables) / sizeof(tables[0]); - } - break; - case 2: - { - const cvtt::Tables::BC7SC::Table *tables[] = - { - &cvtt::Tables::BC7SC::g_mode2, - }; - scTables = tables; - numSCTables = sizeof(tables) / sizeof(tables[0]); - } - break; - case 3: - { - const cvtt::Tables::BC7SC::Table *tables[] = - { - &cvtt::Tables::BC7SC::g_mode3_p0, - &cvtt::Tables::BC7SC::g_mode3_p1, - }; - scTables = tables; - numSCTables = sizeof(tables) / sizeof(tables[0]); - } - break; - case 6: - { - const cvtt::Tables::BC7SC::Table *tables[] = - { - &cvtt::Tables::BC7SC::g_mode6_p0_i1, - &cvtt::Tables::BC7SC::g_mode6_p0_i2, - &cvtt::Tables::BC7SC::g_mode6_p0_i3, - &cvtt::Tables::BC7SC::g_mode6_p0_i4, - &cvtt::Tables::BC7SC::g_mode6_p0_i5, - &cvtt::Tables::BC7SC::g_mode6_p0_i6, - &cvtt::Tables::BC7SC::g_mode6_p0_i7, - &cvtt::Tables::BC7SC::g_mode6_p1_i1, - &cvtt::Tables::BC7SC::g_mode6_p1_i2, - &cvtt::Tables::BC7SC::g_mode6_p1_i3, - &cvtt::Tables::BC7SC::g_mode6_p1_i4, - &cvtt::Tables::BC7SC::g_mode6_p1_i5, - &cvtt::Tables::BC7SC::g_mode6_p1_i6, - &cvtt::Tables::BC7SC::g_mode6_p1_i7, - }; - scTables = tables; - numSCTables = sizeof(tables) / sizeof(tables[0]); - } - break; - case 7: - { - const cvtt::Tables::BC7SC::Table *tables[] = - { - &cvtt::Tables::BC7SC::g_mode7_p00, - &cvtt::Tables::BC7SC::g_mode7_p01, - &cvtt::Tables::BC7SC::g_mode7_p10, - &cvtt::Tables::BC7SC::g_mode7_p11, - }; - scTables = tables; - numSCTables = sizeof(tables) / sizeof(tables[0]); - } - break; - default: - assert(false); - break; - } - - TrySingleColorRGBAMultiTable(flags, pixels, average, numRealChannels, fragment, shapeLength, staticAlphaError, punchThroughInvalid, shapeBestError, shapeBestEP, fragmentBestIndexes, channelWeightsSq, scTables, numSCTables, rtn); - } - } // shapeIter - - for (uint16_t partition = 0; partition < numPartitions; partition++) - { - const int *partitionShapes; - if (numSubsets == 1) - partitionShapes = BC7Data::g_shapes1[partition]; - else if (numSubsets == 2) - partitionShapes = BC7Data::g_shapes2[partition]; - else - { - assert(numSubsets == 3); - partitionShapes = BC7Data::g_shapes3[partition]; - } - - MFloat totalError = ParallelMath::MakeFloatZero(); - for (int subset = 0; subset < numSubsets; subset++) - totalError = totalError + temps.shapeBestError[shapeCollapseList[partitionShapes[subset]]]; - - ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(totalError, work.m_error); - ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); - - if (ParallelMath::AnySet(errorBetter16)) - { - for (int subset = 0; subset < numSubsets; subset++) - { - int shape = partitionShapes[subset]; - int shapeStart = BC7Data::g_shapeRanges[shape][0]; - int shapeLength = BC7Data::g_shapeRanges[shape][1]; - int shapeCollapsedEvalIndex = shapeCollapseList[shape]; - - for (int epi = 0; epi < 2; epi++) - for (int ch = 0; ch < 4; ch++) - ParallelMath::ConditionalSet(work.m_ep[subset][epi][ch], errorBetter16, temps.shapeBestEP[shapeCollapsedEvalIndex][epi][ch]); - - for (int pxi = 0; pxi < shapeLength; pxi++) - { - int px = BC7Data::g_fragments[shapeStart + pxi]; - ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, temps.fragmentBestIndexes[shapeStart + pxi]); - } - } - - work.m_error = ParallelMath::Min(totalError, work.m_error); - ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode)); - ParallelMath::ConditionalSet(work.m_u.m_partition, errorBetter16, ParallelMath::MakeUInt15(partition)); - } - } - } - } - - static void TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], int numTweakRounds, int numRefineRounds, WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn) - { - // TODO: These error calculations are not optimal for weight-by-alpha, but this routine needs to be mostly rewritten for that. - // The alpha/color solutions are co-dependent in that case, but a good way to solve it would probably be to - // solve the alpha channel first, then solve the RGB channels, which in turn breaks down into two cases: - // - Separate alpha channel, then weighted RGB - // - Alpha+2 other channels, then the independent channel - - if (!(flags & Flags::BC7_EnableDualPlane)) - return; - - if (numRefineRounds < 1) - numRefineRounds = 1; - - if (numTweakRounds < 1) - numTweakRounds = 1; - else if (numTweakRounds > MaxTweakRounds) - numTweakRounds = MaxTweakRounds; - - float channelWeightsSq[4]; - for (int ch = 0; ch < 4; ch++) - channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; - - for (uint16_t mode = 4; mode <= 5; mode++) - { - for (uint16_t rotation = 0; rotation < 4; rotation++) - { - int alphaChannel = (rotation + 3) & 3; - int redChannel = (rotation == 1) ? 3 : 0; - int greenChannel = (rotation == 2) ? 3 : 1; - int blueChannel = (rotation == 3) ? 3 : 2; - - MUInt15 rotatedRGB[16][3]; - MFloat floatRotatedRGB[16][3]; - - for (int px = 0; px < 16; px++) - { - rotatedRGB[px][0] = pixels[px][redChannel]; - rotatedRGB[px][1] = pixels[px][greenChannel]; - rotatedRGB[px][2] = pixels[px][blueChannel]; - - for (int ch = 0; ch < 3; ch++) - floatRotatedRGB[px][ch] = ParallelMath::ToFloat(rotatedRGB[px][ch]); - } - - uint16_t maxIndexSelector = (mode == 4) ? 2 : 1; - - float rotatedRGBWeights[3] = { channelWeights[redChannel], channelWeights[greenChannel], channelWeights[blueChannel] }; - float rotatedRGBWeightsSq[3] = { channelWeightsSq[redChannel], channelWeightsSq[greenChannel], channelWeightsSq[blueChannel] }; - float rotatedAlphaWeight[1] = { channelWeights[alphaChannel] }; - float rotatedAlphaWeightSq[1] = { channelWeightsSq[alphaChannel] }; - - float uniformWeight[1] = { 1.0f }; // Since the alpha channel is independent, there's no need to bother with weights when doing refinement or selection, only error - - MFloat preWeightedRotatedRGB[16][3]; - BCCommon::PreWeightPixelsLDR<3>(preWeightedRotatedRGB, rotatedRGB, rotatedRGBWeights); - - for (uint16_t indexSelector = 0; indexSelector < maxIndexSelector; indexSelector++) - { - EndpointSelector<3, 8> rgbSelector; - - for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) - { - for (int px = 0; px < 16; px++) - rgbSelector.ContributePass(preWeightedRotatedRGB[px], epPass, ParallelMath::MakeFloat(1.0f)); - - rgbSelector.FinishPass(epPass); - } - - MUInt15 alphaRange[2]; - - alphaRange[0] = alphaRange[1] = pixels[0][alphaChannel]; - for (int px = 1; px < 16; px++) - { - alphaRange[0] = ParallelMath::Min(pixels[px][alphaChannel], alphaRange[0]); - alphaRange[1] = ParallelMath::Max(pixels[px][alphaChannel], alphaRange[1]); - } - - int rgbPrec = 0; - int alphaPrec = 0; - - if (mode == 4) - { - rgbPrec = indexSelector ? 3 : 2; - alphaPrec = indexSelector ? 2 : 3; - } - else - rgbPrec = alphaPrec = 2; - - UnfinishedEndpoints<3> unfinishedRGB = rgbSelector.GetEndpoints(rotatedRGBWeights); - - MFloat bestRGBError = ParallelMath::MakeFloat(FLT_MAX); - MFloat bestAlphaError = ParallelMath::MakeFloat(FLT_MAX); - - MUInt15 bestRGBIndexes[16]; - MUInt15 bestAlphaIndexes[16]; - MUInt15 bestEP[2][4]; - - for (int px = 0; px < 16; px++) - bestRGBIndexes[px] = bestAlphaIndexes[px] = ParallelMath::MakeUInt15(0); - - for (int tweak = 0; tweak < numTweakRounds; tweak++) - { - MUInt15 rgbEP[2][3]; - MUInt15 alphaEP[2]; - - unfinishedRGB.FinishLDR(tweak, 1 << rgbPrec, rgbEP[0], rgbEP[1]); - - TweakAlpha(alphaRange, tweak, 1 << alphaPrec, alphaEP); - - for (int refine = 0; refine < numRefineRounds; refine++) - { - if (mode == 4) - CompressEndpoints4(rgbEP, alphaEP, rtn); - else - CompressEndpoints5(rgbEP, alphaEP, rtn); - - - IndexSelector<1> alphaIndexSelector; - IndexSelector<3> rgbIndexSelector; - - { - MUInt15 alphaEPTemp[2][1] = { { alphaEP[0] },{ alphaEP[1] } }; - alphaIndexSelector.Init<false>(uniformWeight, alphaEPTemp, 1 << alphaPrec); - } - rgbIndexSelector.Init<false>(rotatedRGBWeights, rgbEP, 1 << rgbPrec); - - EndpointRefiner<3> rgbRefiner; - EndpointRefiner<1> alphaRefiner; - - rgbRefiner.Init(1 << rgbPrec, rotatedRGBWeights); - alphaRefiner.Init(1 << alphaPrec, uniformWeight); - - MFloat errorRGB = ParallelMath::MakeFloatZero(); - MFloat errorA = ParallelMath::MakeFloatZero(); - - MUInt15 rgbIndexes[16]; - MUInt15 alphaIndexes[16]; - - AggregatedError<3> rgbAggError; - AggregatedError<1> alphaAggError; - - for (int px = 0; px < 16; px++) - { - MUInt15 rgbIndex = rgbIndexSelector.SelectIndexLDR(floatRotatedRGB[px], rtn); - MUInt15 alphaIndex = alphaIndexSelector.SelectIndexLDR(floatPixels[px] + alphaChannel, rtn); - - MUInt15 reconstructedRGB[3]; - MUInt15 reconstructedAlpha[1]; - - rgbIndexSelector.ReconstructLDR_BC7(rgbIndex, reconstructedRGB); - alphaIndexSelector.ReconstructLDR_BC7(alphaIndex, reconstructedAlpha); - - if (flags & cvtt::Flags::BC7_FastIndexing) - { - BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], rgbAggError); - BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, alphaAggError); - } - else - { - AggregatedError<3> baseRGBAggError; - AggregatedError<1> baseAlphaAggError; - - BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], baseRGBAggError); - BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, baseAlphaAggError); - - MFloat rgbError = baseRGBAggError.Finalize(flags, rotatedRGBWeightsSq); - MFloat alphaError = baseAlphaAggError.Finalize(flags, rotatedAlphaWeightSq); - - MUInt15 altRGBIndexes[2]; - MUInt15 altAlphaIndexes[2]; - - altRGBIndexes[0] = ParallelMath::Max(rgbIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); - altRGBIndexes[1] = ParallelMath::Min(rgbIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << rgbPrec) - 1))); - - altAlphaIndexes[0] = ParallelMath::Max(alphaIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); - altAlphaIndexes[1] = ParallelMath::Min(alphaIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << alphaPrec) - 1))); - - for (int ii = 0; ii < 2; ii++) - { - rgbIndexSelector.ReconstructLDR_BC7(altRGBIndexes[ii], reconstructedRGB); - alphaIndexSelector.ReconstructLDR_BC7(altAlphaIndexes[ii], reconstructedAlpha); - - AggregatedError<3> altRGBAggError; - AggregatedError<1> altAlphaAggError; - - BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], altRGBAggError); - BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, altAlphaAggError); - - MFloat altRGBError = altRGBAggError.Finalize(flags, rotatedRGBWeightsSq); - MFloat altAlphaError = altAlphaAggError.Finalize(flags, rotatedAlphaWeightSq); - - ParallelMath::Int16CompFlag rgbBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altRGBError, rgbError)); - ParallelMath::Int16CompFlag alphaBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altAlphaError, alphaError)); - - rgbError = ParallelMath::Min(altRGBError, rgbError); - alphaError = ParallelMath::Min(altAlphaError, alphaError); - - ParallelMath::ConditionalSet(rgbIndex, rgbBetter, altRGBIndexes[ii]); - ParallelMath::ConditionalSet(alphaIndex, alphaBetter, altAlphaIndexes[ii]); - } - - errorRGB = errorRGB + rgbError; - errorA = errorA + alphaError; - } - - if (refine != numRefineRounds - 1) - { - rgbRefiner.ContributeUnweightedPW(preWeightedRotatedRGB[px], rgbIndex); - alphaRefiner.ContributeUnweightedPW(floatPixels[px] + alphaChannel, alphaIndex); - } - - if (flags & Flags::BC7_FastIndexing) - { - errorRGB = rgbAggError.Finalize(flags, rotatedRGBWeightsSq); - errorA = rgbAggError.Finalize(flags, rotatedAlphaWeightSq); - } - - rgbIndexes[px] = rgbIndex; - alphaIndexes[px] = alphaIndex; - } - - ParallelMath::FloatCompFlag rgbBetter = ParallelMath::Less(errorRGB, bestRGBError); - ParallelMath::FloatCompFlag alphaBetter = ParallelMath::Less(errorA, bestAlphaError); - - ParallelMath::Int16CompFlag rgbBetterInt16 = ParallelMath::FloatFlagToInt16(rgbBetter); - ParallelMath::Int16CompFlag alphaBetterInt16 = ParallelMath::FloatFlagToInt16(alphaBetter); - - if (ParallelMath::AnySet(rgbBetterInt16)) - { - bestRGBError = ParallelMath::Min(errorRGB, bestRGBError); - - for (int px = 0; px < 16; px++) - ParallelMath::ConditionalSet(bestRGBIndexes[px], rgbBetterInt16, rgbIndexes[px]); - - for (int ep = 0; ep < 2; ep++) - { - for (int ch = 0; ch < 3; ch++) - ParallelMath::ConditionalSet(bestEP[ep][ch], rgbBetterInt16, rgbEP[ep][ch]); - } - } - - if (ParallelMath::AnySet(alphaBetterInt16)) - { - bestAlphaError = ParallelMath::Min(errorA, bestAlphaError); - - for (int px = 0; px < 16; px++) - ParallelMath::ConditionalSet(bestAlphaIndexes[px], alphaBetterInt16, alphaIndexes[px]); - - for (int ep = 0; ep < 2; ep++) - ParallelMath::ConditionalSet(bestEP[ep][3], alphaBetterInt16, alphaEP[ep]); - } - - if (refine != numRefineRounds - 1) - { - rgbRefiner.GetRefinedEndpointsLDR(rgbEP, rtn); - - MUInt15 alphaEPTemp[2][1]; - alphaRefiner.GetRefinedEndpointsLDR(alphaEPTemp, rtn); - - for (int i = 0; i < 2; i++) - alphaEP[i] = alphaEPTemp[i][0]; - } - } // refine - } // tweak - - MFloat combinedError = bestRGBError + bestAlphaError; - - ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, work.m_error); - ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); - - work.m_error = ParallelMath::Min(combinedError, work.m_error); - - ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode)); - ParallelMath::ConditionalSet(work.m_u.m_isr.m_rotation, errorBetter16, ParallelMath::MakeUInt15(rotation)); - ParallelMath::ConditionalSet(work.m_u.m_isr.m_indexSelector, errorBetter16, ParallelMath::MakeUInt15(indexSelector)); - - for (int px = 0; px < 16; px++) - { - ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, indexSelector ? bestAlphaIndexes[px] : bestRGBIndexes[px]); - ParallelMath::ConditionalSet(work.m_indexes2[px], errorBetter16, indexSelector ? bestRGBIndexes[px] : bestAlphaIndexes[px]); - } - - for (int ep = 0; ep < 2; ep++) - for (int ch = 0; ch < 4; ch++) - ParallelMath::ConditionalSet(work.m_ep[0][ep][ch], errorBetter16, bestEP[ep][ch]); - } - } - } - } - - template<class T> - static void Swap(T& a, T& b) - { - T temp = a; - a = b; - b = temp; - } - - static void Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], int numTweakRounds, int numRefineRounds) - { - MUInt15 pixels[16][4]; - MFloat floatPixels[16][4]; - - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < 4; ch++) - ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]); - } - - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < 4; ch++) - floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]); - } - - WorkInfo work; - memset(&work, 0, sizeof(work)); - - work.m_error = ParallelMath::MakeFloat(FLT_MAX); - - { - ParallelMath::RoundTowardNearestForScope rtn; - TrySinglePlane(flags, pixels, floatPixels, channelWeights, numTweakRounds, numRefineRounds, work, &rtn); - TryDualPlane(flags, pixels, floatPixels, channelWeights, numTweakRounds, numRefineRounds, work, &rtn); - } - - for (int block = 0; block < ParallelMath::ParallelSize; block++) - { - PackingVector pv; - pv.Init(); - - ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(work.m_mode, block); - ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(work.m_u.m_partition, block); - ParallelMath::ScalarUInt16 indexSelector = ParallelMath::Extract(work.m_u.m_isr.m_indexSelector, block); - - const BC7Data::BC7ModeInfo& modeInfo = BC7Data::g_modes[mode]; - - ParallelMath::ScalarUInt16 indexes[16]; - ParallelMath::ScalarUInt16 indexes2[16]; - ParallelMath::ScalarUInt16 endPoints[3][2][4]; - - for (int i = 0; i < 16; i++) - { - indexes[i] = ParallelMath::Extract(work.m_indexes[i], block); - if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) - indexes2[i] = ParallelMath::Extract(work.m_indexes2[i], block); - } - - for (int subset = 0; subset < 3; subset++) - { - for (int ep = 0; ep < 2; ep++) - { - for (int ch = 0; ch < 4; ch++) - endPoints[subset][ep][ch] = ParallelMath::Extract(work.m_ep[subset][ep][ch], block); - } - } - - int fixups[3] = { 0, 0, 0 }; - - if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) - { - bool flipRGB = ((indexes[0] & (1 << (modeInfo.m_indexBits - 1))) != 0); - bool flipAlpha = ((indexes2[0] & (1 << (modeInfo.m_alphaIndexBits - 1))) != 0); - - if (flipRGB) - { - uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1; - for (int px = 0; px < 16; px++) - indexes[px] = highIndex - indexes[px]; - } - - if (flipAlpha) - { - uint16_t highIndex = (1 << modeInfo.m_alphaIndexBits) - 1; - for (int px = 0; px < 16; px++) - indexes2[px] = highIndex - indexes2[px]; - } - - if (indexSelector) - Swap(flipRGB, flipAlpha); - - if (flipRGB) - { - for (int ch = 0; ch < 3; ch++) - Swap(endPoints[0][0][ch], endPoints[0][1][ch]); - } - if (flipAlpha) - Swap(endPoints[0][0][3], endPoints[0][1][3]); - - } - else - { - if (modeInfo.m_numSubsets == 2) - fixups[1] = BC7Data::g_fixupIndexes2[partition]; - else if (modeInfo.m_numSubsets == 3) - { - fixups[1] = BC7Data::g_fixupIndexes3[partition][0]; - fixups[2] = BC7Data::g_fixupIndexes3[partition][1]; - } - - bool flip[3] = { false, false, false }; - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - flip[subset] = ((indexes[fixups[subset]] & (1 << (modeInfo.m_indexBits - 1))) != 0); - - if (flip[0] || flip[1] || flip[2]) - { - uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1; - for (int px = 0; px < 16; px++) - { - int subset = 0; - if (modeInfo.m_numSubsets == 2) - subset = (BC7Data::g_partitionMap[partition] >> px) & 1; - else if (modeInfo.m_numSubsets == 3) - subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3; - - if (flip[subset]) - indexes[px] = highIndex - indexes[px]; - } - - int maxCH = (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) ? 4 : 3; - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - if (flip[subset]) - for (int ch = 0; ch < maxCH; ch++) - Swap(endPoints[subset][0][ch], endPoints[subset][1][ch]); - } - } - } - - pv.Pack(static_cast<uint8_t>(1 << mode), mode + 1); - - if (modeInfo.m_partitionBits) - pv.Pack(partition, modeInfo.m_partitionBits); - - if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) - { - ParallelMath::ScalarUInt16 rotation = ParallelMath::Extract(work.m_u.m_isr.m_rotation, block); - pv.Pack(rotation, 2); - } - - if (modeInfo.m_hasIndexSelector) - pv.Pack(indexSelector, 1); - - // Encode RGB - for (int ch = 0; ch < 3; ch++) - { - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - for (int ep = 0; ep < 2; ep++) - { - ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][ch]; - epPart >>= (8 - modeInfo.m_rgbBits); - - pv.Pack(epPart, modeInfo.m_rgbBits); - } - } - } - - // Encode alpha - if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) - { - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - for (int ep = 0; ep < 2; ep++) - { - ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][3]; - epPart >>= (8 - modeInfo.m_alphaBits); - - pv.Pack(epPart, modeInfo.m_alphaBits); - } - } - } - - // Encode parity bits - if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset) - { - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - ParallelMath::ScalarUInt16 epPart = endPoints[subset][0][0]; - epPart >>= (7 - modeInfo.m_rgbBits); - epPart &= 1; - - pv.Pack(epPart, 1); - } - } - else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint) - { - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - for (int ep = 0; ep < 2; ep++) - { - ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][0]; - epPart >>= (7 - modeInfo.m_rgbBits); - epPart &= 1; - - pv.Pack(epPart, 1); - } - } - } - - // Encode indexes - for (int px = 0; px < 16; px++) - { - int bits = modeInfo.m_indexBits; - if ((px == 0) || (px == fixups[1]) || (px == fixups[2])) - bits--; - - pv.Pack(indexes[px], bits); - } - - // Encode secondary indexes - if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) - { - for (int px = 0; px < 16; px++) - { - int bits = modeInfo.m_alphaIndexBits; - if (px == 0) - bits--; - - pv.Pack(indexes2[px], bits); - } - } - - pv.Flush(packedBlocks); - - packedBlocks += 16; - } - } - - static void UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock) - { - UnpackingVector pv; - pv.Init(packedBlock); - - int mode = 8; - for (int i = 0; i < 8; i++) - { - if (pv.Unpack(1) == 1) - { - mode = i; - break; - } - } - - if (mode > 7) - { - for (int px = 0; px < 16; px++) - for (int ch = 0; ch < 4; ch++) - output.m_pixels[px][ch] = 0; - - return; - } - - const BC7Data::BC7ModeInfo &modeInfo = BC7Data::g_modes[mode]; - - int partition = 0; - if (modeInfo.m_partitionBits) - partition = pv.Unpack(modeInfo.m_partitionBits); - - int rotation = 0; - if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) - rotation = pv.Unpack(2); - - int indexSelector = 0; - if (modeInfo.m_hasIndexSelector) - indexSelector = pv.Unpack(1); - - // Resolve fixups - int fixups[3] = { 0, 0, 0 }; - - if (modeInfo.m_alphaMode != BC7Data::AlphaMode_Separate) - { - if (modeInfo.m_numSubsets == 2) - fixups[1] = BC7Data::g_fixupIndexes2[partition]; - else if (modeInfo.m_numSubsets == 3) - { - fixups[1] = BC7Data::g_fixupIndexes3[partition][0]; - fixups[2] = BC7Data::g_fixupIndexes3[partition][1]; - } - } - - int endPoints[3][2][4]; - - // Decode RGB - for (int ch = 0; ch < 3; ch++) - { - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - for (int ep = 0; ep < 2; ep++) - endPoints[subset][ep][ch] = (pv.Unpack(modeInfo.m_rgbBits) << (8 - modeInfo.m_rgbBits)); - } - } - - // Decode alpha - if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) - { - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - for (int ep = 0; ep < 2; ep++) - endPoints[subset][ep][3] = (pv.Unpack(modeInfo.m_alphaBits) << (8 - modeInfo.m_alphaBits)); - } - } - else - { - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - for (int ep = 0; ep < 2; ep++) - endPoints[subset][ep][3] = 255; - } - } - - int parityBits = 0; - - // Decode parity bits - if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset) - { - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - int p = pv.Unpack(1); - - for (int ep = 0; ep < 2; ep++) - { - for (int ch = 0; ch < 3; ch++) - endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits); - - if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) - endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits); - } - } - - parityBits = 1; - } - else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint) - { - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - for (int ep = 0; ep < 2; ep++) - { - int p = pv.Unpack(1); - - for (int ch = 0; ch < 3; ch++) - endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits); - - if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) - endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits); - } - } - - parityBits = 1; - } - - // Fill endpoint bits - for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) - { - for (int ep = 0; ep < 2; ep++) - { - for (int ch = 0; ch < 3; ch++) - endPoints[subset][ep][ch] |= (endPoints[subset][ep][ch] >> (modeInfo.m_rgbBits + parityBits)); - - if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) - endPoints[subset][ep][3] |= (endPoints[subset][ep][3] >> (modeInfo.m_alphaBits + parityBits)); - } - } - - int indexes[16]; - int indexes2[16]; - - // Decode indexes - for (int px = 0; px < 16; px++) - { - int bits = modeInfo.m_indexBits; - if ((px == 0) || (px == fixups[1]) || (px == fixups[2])) - bits--; - - indexes[px] = pv.Unpack(bits); - } - - // Decode secondary indexes - if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) - { - for (int px = 0; px < 16; px++) - { - int bits = modeInfo.m_alphaIndexBits; - if (px == 0) - bits--; - - indexes2[px] = pv.Unpack(bits); - } - } - else - { - for (int px = 0; px < 16; px++) - indexes2[px] = 0; - } - - const int *alphaWeights = BC7Data::g_weightTables[modeInfo.m_alphaIndexBits]; - const int *rgbWeights = BC7Data::g_weightTables[modeInfo.m_indexBits]; - - // Decode each pixel - for (int px = 0; px < 16; px++) - { - int rgbWeight = 0; - int alphaWeight = 0; - - int rgbIndex = indexes[px]; - - rgbWeight = rgbWeights[indexes[px]]; - - if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) - alphaWeight = rgbWeight; - else if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) - alphaWeight = alphaWeights[indexes2[px]]; - - if (indexSelector == 1) - { - int temp = rgbWeight; - rgbWeight = alphaWeight; - alphaWeight = temp; - } - - int pixel[4] = { 0, 0, 0, 255 }; - - int subset = 0; - - if (modeInfo.m_numSubsets == 2) - subset = (BC7Data::g_partitionMap[partition] >> px) & 1; - else if (modeInfo.m_numSubsets == 3) - subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3; - - for (int ch = 0; ch < 3; ch++) - pixel[ch] = ((64 - rgbWeight) * endPoints[subset][0][ch] + rgbWeight * endPoints[subset][1][ch] + 32) >> 6; - - if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) - pixel[3] = ((64 - alphaWeight) * endPoints[subset][0][3] + alphaWeight * endPoints[subset][1][3] + 32) >> 6; - - if (rotation != 0) - { - int ch = rotation - 1; - int temp = pixel[ch]; - pixel[ch] = pixel[3]; - pixel[3] = temp; - } - - for (int ch = 0; ch < 4; ch++) - output.m_pixels[px][ch] = static_cast<uint8_t>(pixel[ch]); - } - } - }; - - class BC6HComputer - { - public: - typedef ParallelMath::Float MFloat; - typedef ParallelMath::SInt16 MSInt16; - typedef ParallelMath::UInt16 MUInt16; - typedef ParallelMath::UInt15 MUInt15; - typedef ParallelMath::AInt16 MAInt16; - typedef ParallelMath::SInt32 MSInt32; - typedef ParallelMath::UInt31 MUInt31; - - static const int MaxTweakRounds = 4; - static const int MaxRefineRounds = 3; - - static MSInt16 QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru) - { - assert(ParallelMath::AllSet(ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(31744)))); - assert(ParallelMath::AllSet(ParallelMath::Less(ParallelMath::MakeSInt16(-31744), elem2CL))); - - // Expand to full range - ParallelMath::Int16CompFlag isNegative = ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(0)); - MUInt15 absElem = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - elem2CL, elem2CL)); - - absElem = ParallelMath::RightShift(ParallelMath::RoundAndConvertToU15(ParallelMath::ToFloat(absElem) * 32.0f / 31.0f, ru), 16 - precision); - - MSInt16 absElemS16 = ParallelMath::LosslessCast<MSInt16>::Cast(absElem); - - return ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - absElemS16, absElemS16); - } - - static MUInt15 QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru) - { - MUInt16 expandedElem = ParallelMath::RoundAndConvertToU16(ParallelMath::Min(ParallelMath::ToFloat(elem) * 64.0f / 31.0f, ParallelMath::MakeFloat(65535.0f)), ru); - return ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(expandedElem, 16 - precision)); - } - - static void UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL) - { - MSInt16 zero = ParallelMath::MakeSInt16(0); - - ParallelMath::Int16CompFlag negative = ParallelMath::Less(comp, zero); - MUInt15 absComp = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(negative, MSInt16(zero - comp), comp)); - - MSInt16 unq; - MUInt15 absUnq; - - if (precision >= 16) - { - unq = comp; - absUnq = absComp; - } - else - { - MSInt16 maxCompMinusOne = ParallelMath::MakeSInt16(static_cast<int16_t>((1 << (precision - 1)) - 2)); - ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero); - ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp); - - absUnq = (absComp << (16 - precision)) + ParallelMath::MakeUInt15(static_cast<uint16_t>(0x4000 >> (precision - 1))); - ParallelMath::ConditionalSet(absUnq, isZero, ParallelMath::MakeUInt15(0)); - ParallelMath::ConditionalSet(absUnq, isMax, ParallelMath::MakeUInt15(0x7fff)); - - unq = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(absUnq)); - } - - outUnquantized = unq; - - MUInt15 funq = ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(absUnq, ParallelMath::MakeUInt15(31)), 5)); - - outUnquantizedFinished2CL = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(funq)); - } - - static void UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished) - { - MUInt16 unq = ParallelMath::LosslessCast<MUInt16>::Cast(comp); - if (precision < 15) - { - MUInt15 zero = ParallelMath::MakeUInt15(0); - MUInt15 maxCompMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << precision) - 2)); - - ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero); - ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp); - - unq = (ParallelMath::LosslessCast<MUInt16>::Cast(comp) << (16 - precision)) + ParallelMath::MakeUInt16(static_cast<uint16_t>(0x8000 >> precision)); - - ParallelMath::ConditionalSet(unq, isZero, ParallelMath::MakeUInt16(0)); - ParallelMath::ConditionalSet(unq, isMax, ParallelMath::MakeUInt16(0xffff)); - } - - outUnquantized = unq; - outUnquantizedFinished = ParallelMath::ToUInt16(ParallelMath::RightShift(ParallelMath::XMultiply(unq, ParallelMath::MakeUInt15(31)), 6)); - } - - static void QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn) - { - MSInt16 unquantizedEP[2][3]; - MSInt16 finishedUnquantizedEP[2][3]; - - { - ParallelMath::RoundUpForScope ru; - - for (int epi = 0; epi < 2; epi++) - { - for (int ch = 0; ch < 3; ch++) - { - MSInt16 qee = QuantizeSingleEndpointElementSigned(endPoints[epi][ch], precision, &ru); - UnquantizeSingleEndpointElementSigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]); - quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee); - } - } - } - - indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange); - indexSelector.InitHDR(indexRange, true, fastIndexing, channelWeights); - - MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1); - - MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn); - - ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index); - - if (ParallelMath::AnySet(invert)) - { - ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index)); - - indexSelector.ConditionalInvert(invert); - - for (int ch = 0; ch < 3; ch++) - { - MAInt16 firstEP = quantizedEndPoints[0][ch]; - MAInt16 secondEP = quantizedEndPoints[1][ch]; - - quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP); - quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP); - } - } - - indexes[fixupIndex] = index; - } - - static void QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn) - { - MUInt16 unquantizedEP[2][3]; - MUInt16 finishedUnquantizedEP[2][3]; - - { - ParallelMath::RoundUpForScope ru; - - for (int epi = 0; epi < 2; epi++) - { - for (int ch = 0; ch < 3; ch++) - { - MUInt15 qee = QuantizeSingleEndpointElementUnsigned(ParallelMath::LosslessCast<MUInt15>::Cast(endPoints[epi][ch]), precision, &ru); - UnquantizeSingleEndpointElementUnsigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]); - quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee); - } - } - } - - indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange); - indexSelector.InitHDR(indexRange, false, fastIndexing, channelWeights); - - MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1); - - MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn); - - ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index); - - if (ParallelMath::AnySet(invert)) - { - ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index)); - - indexSelector.ConditionalInvert(invert); - - for (int ch = 0; ch < 3; ch++) - { - MAInt16 firstEP = quantizedEndPoints[0][ch]; - MAInt16 secondEP = quantizedEndPoints[1][ch]; - - quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP); - quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP); - } - } - - indexes[fixupIndex] = index; - } - - static void EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal) - { - ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true); - - MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1)); - - for (int ch = 0; ch < 3; ch++) - { - outEncodedEPs[0][0][ch] = ep0[0][ch]; - outEncodedEPs[0][1][ch] = ep0[1][ch]; - outEncodedEPs[1][0][ch] = ep1[0][ch]; - outEncodedEPs[1][1][ch] = ep1[1][ch]; - - if (isTransformed) - { - for (int subset = 0; subset < 2; subset++) - { - for (int epi = 0; epi < 2; epi++) - { - if (epi == 0 && subset == 0) - continue; - - MAInt16 bReduced = (outEncodedEPs[subset][epi][ch] & aSignificantMask); - - MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch])), bPrec[ch]); - - outEncodedEPs[subset][epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta); - - MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch]) & aSignificantMask); - allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced); - } - } - } - - if (!ParallelMath::AnySet(allLegal)) - break; - } - - outIsLegal = allLegal; - } - - static void EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal) - { - ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true); - - MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1)); - - for (int ch = 0; ch < 3; ch++) - { - outEncodedEPs[0][ch] = ep[0][ch]; - outEncodedEPs[1][ch] = ep[1][ch]; - - if (isTransformed) - { - MAInt16 bReduced = (outEncodedEPs[1][ch] & aSignificantMask); - - MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[1][ch], outEncodedEPs[0][ch])), bPrec[ch]); - - outEncodedEPs[1][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta); - - MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[1][ch], outEncodedEPs[0][ch]) & aSignificantMask); - allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced); - } - } - - outIsLegal = allLegal; - } - - static void Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds) - { - if (numTweakRounds < 1) - numTweakRounds = 1; - else if (numTweakRounds > MaxTweakRounds) - numTweakRounds = MaxTweakRounds; - - if (numRefineRounds < 1) - numRefineRounds = 1; - else if (numRefineRounds > MaxRefineRounds) - numRefineRounds = MaxRefineRounds; - - bool fastIndexing = ((flags & cvtt::Flags::BC6H_FastIndexing) != 0); - float channelWeightsSq[3]; - - ParallelMath::RoundTowardNearestForScope rtn; - - MSInt16 pixels[16][3]; - MFloat floatPixels2CL[16][3]; - MFloat floatPixelsLinearWeighted[16][3]; - - MSInt16 low15Bits = ParallelMath::MakeSInt16(32767); - - for (int ch = 0; ch < 3; ch++) - channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; - - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < 3; ch++) - { - MSInt16 pixelValue; - ParallelMath::ConvertHDRInputs(inputs, px, ch, pixelValue); - - // Convert from sign+magnitude to 2CL - if (isSigned) - { - ParallelMath::Int16CompFlag negative = ParallelMath::Less(pixelValue, ParallelMath::MakeSInt16(0)); - MSInt16 magnitude = (pixelValue & low15Bits); - ParallelMath::ConditionalSet(pixelValue, negative, ParallelMath::MakeSInt16(0) - magnitude); - pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(-31743)); - } - else - pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(0)); - - pixelValue = ParallelMath::Min(pixelValue, ParallelMath::MakeSInt16(31743)); - - pixels[px][ch] = pixelValue; - floatPixels2CL[px][ch] = ParallelMath::ToFloat(pixelValue); - floatPixelsLinearWeighted[px][ch] = ParallelMath::TwosCLHalfToFloat(pixelValue) * channelWeights[ch]; - } - } - - MFloat preWeightedPixels[16][3]; - - BCCommon::PreWeightPixelsHDR<3>(preWeightedPixels, pixels, channelWeights); - - MAInt16 bestEndPoints[2][2][3]; - MUInt15 bestIndexes[16]; - MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); - MUInt15 bestMode = ParallelMath::MakeUInt15(0); - MUInt15 bestPartition = ParallelMath::MakeUInt15(0); - - for (int px = 0; px < 16; px++) - bestIndexes[px] = ParallelMath::MakeUInt15(0); - - for (int subset = 0; subset < 2; subset++) - for (int epi = 0; epi < 2; epi++) - for (int ch = 0; ch < 3; ch++) - bestEndPoints[subset][epi][ch] = ParallelMath::MakeAInt16(0); - - UnfinishedEndpoints<3> partitionedUFEP[32][2]; - UnfinishedEndpoints<3> singleUFEP; - - // Generate UFEP for partitions - for (int p = 0; p < 32; p++) - { - int partitionMask = BC7Data::g_partitionMap[p]; - - EndpointSelector<3, 8> epSelectors[2]; - - for (int pass = 0; pass < NumEndpointSelectorPasses; pass++) - { - for (int px = 0; px < 16; px++) - { - int subset = (partitionMask >> px) & 1; - epSelectors[subset].ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f)); - } - - for (int subset = 0; subset < 2; subset++) - epSelectors[subset].FinishPass(pass); - } - - for (int subset = 0; subset < 2; subset++) - partitionedUFEP[p][subset] = epSelectors[subset].GetEndpoints(channelWeights); - } - - // Generate UFEP for single - { - EndpointSelector<3, 8> epSelector; - - for (int pass = 0; pass < NumEndpointSelectorPasses; pass++) - { - for (int px = 0; px < 16; px++) - epSelector.ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f)); - - epSelector.FinishPass(pass); - } - - singleUFEP = epSelector.GetEndpoints(channelWeights); - } - - for (int partitionedInt = 0; partitionedInt < 2; partitionedInt++) - { - bool partitioned = (partitionedInt == 1); - - for (int aPrec = BC7Data::g_maxHDRPrecision; aPrec >= 0; aPrec--) - { - if (!BC7Data::g_hdrModesExistForPrecision[partitionedInt][aPrec]) - continue; - - int numPartitions = partitioned ? 32 : 1; - int numSubsets = partitioned ? 2 : 1; - int indexBits = partitioned ? 3 : 4; - int indexRange = (1 << indexBits); - - for (int p = 0; p < numPartitions; p++) - { - int partitionMask = partitioned ? BC7Data::g_partitionMap[p] : 0; - - const int MaxMetaRounds = MaxTweakRounds * MaxRefineRounds; - - MAInt16 metaEndPointsQuantized[MaxMetaRounds][2][2][3]; - MUInt15 metaIndexes[MaxMetaRounds][16]; - MFloat metaError[MaxMetaRounds][2]; - - bool roundValid[MaxMetaRounds][2]; - - for (int r = 0; r < MaxMetaRounds; r++) - for (int subset = 0; subset < 2; subset++) - roundValid[r][subset] = true; - - for (int subset = 0; subset < numSubsets; subset++) - { - for (int tweak = 0; tweak < MaxTweakRounds; tweak++) - { - EndpointRefiner<3> refiners[2]; - - bool abortRemainingRefines = false; - for (int refinePass = 0; refinePass < MaxRefineRounds; refinePass++) - { - int metaRound = tweak * MaxRefineRounds + refinePass; - - if (tweak >= numTweakRounds || refinePass >= numRefineRounds) - abortRemainingRefines = true; - - if (abortRemainingRefines) - { - roundValid[metaRound][subset] = false; - continue; - } - - MAInt16(&mrQuantizedEndPoints)[2][2][3] = metaEndPointsQuantized[metaRound]; - MUInt15(&mrIndexes)[16] = metaIndexes[metaRound]; - - MSInt16 endPointsColorSpace[2][3]; - - if (refinePass == 0) - { - UnfinishedEndpoints<3> ufep = partitioned ? partitionedUFEP[p][subset] : singleUFEP; - - if (isSigned) - ufep.FinishHDRSigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn); - else - ufep.FinishHDRUnsigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn); - } - else - refiners[subset].GetRefinedEndpointsHDR(endPointsColorSpace, isSigned, &rtn); - - refiners[subset].Init(indexRange, channelWeights); - - int fixupIndex = (subset == 0) ? 0 : BC7Data::g_fixupIndexes2[p]; - - IndexSelectorHDR<3> indexSelector; - if (isSigned) - QuantizeEndpointsSigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn); - else - QuantizeEndpointsUnsigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn); - - if (metaRound > 0) - { - ParallelMath::Int16CompFlag anySame = ParallelMath::MakeBoolInt16(false); - - for (int prevRound = 0; prevRound < metaRound; prevRound++) - { - MAInt16(&prevRoundEPs)[2][3] = metaEndPointsQuantized[prevRound][subset]; - - ParallelMath::Int16CompFlag same = ParallelMath::MakeBoolInt16(true); - - for (int epi = 0; epi < 2; epi++) - for (int ch = 0; ch < 3; ch++) - same = (same & ParallelMath::Equal(prevRoundEPs[epi][ch], mrQuantizedEndPoints[subset][epi][ch])); - - anySame = (anySame | same); - if (ParallelMath::AllSet(anySame)) - break; - } - - if (ParallelMath::AllSet(anySame)) - { - roundValid[metaRound][subset] = false; - continue; - } - } - - MFloat subsetError = ParallelMath::MakeFloatZero(); - - { - for (int px = 0; px < 16; px++) - { - if (subset != ((partitionMask >> px) & 1)) - continue; - - MUInt15 index; - if (px == fixupIndex) - index = mrIndexes[px]; - else - { - index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixels2CL[px], &rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[px], &rtn); - mrIndexes[px] = index; - } - - MSInt16 reconstructed[3]; - if (isSigned) - indexSelector.ReconstructHDRSigned(mrIndexes[px], reconstructed); - else - indexSelector.ReconstructHDRUnsigned(mrIndexes[px], reconstructed); - - subsetError = subsetError + (fastIndexing ? BCCommon::ComputeErrorHDRFast<3>(flags, reconstructed, pixels[px], channelWeightsSq) : BCCommon::ComputeErrorHDRSlow<3>(flags, reconstructed, pixels[px], channelWeightsSq)); - - if (refinePass != numRefineRounds - 1) - refiners[subset].ContributeUnweightedPW(preWeightedPixels[px], index); - } - } - - metaError[metaRound][subset] = subsetError; - } - } - } - - // Now we have a bunch of attempts, but not all of them will fit in the delta coding scheme - int numMeta1 = partitioned ? MaxMetaRounds : 1; - for (int meta0 = 0; meta0 < MaxMetaRounds; meta0++) - { - if (!roundValid[meta0][0]) - continue; - - for (int meta1 = 0; meta1 < numMeta1; meta1++) - { - MFloat combinedError = metaError[meta0][0]; - if (partitioned) - { - if (!roundValid[meta1][1]) - continue; - - combinedError = combinedError + metaError[meta1][1]; - } - - ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, bestError); - if (!ParallelMath::AnySet(errorBetter)) - continue; - - ParallelMath::Int16CompFlag needsCommit = ParallelMath::FloatFlagToInt16(errorBetter); - - // Figure out if this is encodable - for (int mode = 0; mode < BC7Data::g_numHDRModes; mode++) - { - const BC7Data::BC6HModeInfo &modeInfo = BC7Data::g_hdrModes[mode]; - - if (modeInfo.m_partitioned != partitioned || modeInfo.m_aPrec != aPrec) - continue; - - MAInt16 encodedEPs[2][2][3]; - ParallelMath::Int16CompFlag isLegal; - if (partitioned) - EvaluatePartitionedLegality(metaEndPointsQuantized[meta0][0], metaEndPointsQuantized[meta1][1], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs, isLegal); - else - EvaluateSingleLegality(metaEndPointsQuantized[meta0][0], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs[0], isLegal); - - ParallelMath::Int16CompFlag isLegalAndBetter = (ParallelMath::FloatFlagToInt16(errorBetter) & isLegal); - if (!ParallelMath::AnySet(isLegalAndBetter)) - continue; - - ParallelMath::FloatCompFlag isLegalAndBetterFloat = ParallelMath::Int16FlagToFloat(isLegalAndBetter); - - ParallelMath::ConditionalSet(bestError, isLegalAndBetterFloat, combinedError); - ParallelMath::ConditionalSet(bestMode, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(mode))); - ParallelMath::ConditionalSet(bestPartition, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(p))); - - for (int subset = 0; subset < numSubsets; subset++) - { - for (int epi = 0; epi < 2; epi++) - { - for (int ch = 0; ch < 3; ch++) - ParallelMath::ConditionalSet(bestEndPoints[subset][epi][ch], isLegalAndBetter, encodedEPs[subset][epi][ch]); - } - } - - for (int px = 0; px < 16; px++) - { - int subset = ((partitionMask >> px) & 1); - if (subset == 0) - ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta0][px]); - else - ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta1][px]); - } - - needsCommit = ParallelMath::AndNot(needsCommit, isLegalAndBetter); - if (!ParallelMath::AnySet(needsCommit)) - break; - } - } - } - } - } - } - - // At this point, everything should be set - for (int block = 0; block < ParallelMath::ParallelSize; block++) - { - ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(bestMode, block); - ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(bestPartition, block); - int32_t eps[2][2][3]; - ParallelMath::ScalarUInt16 indexes[16]; - - const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode]; - - const BC6HData::ModeDescriptor* desc = BC6HData::g_modeDescriptors[mode]; - - const size_t headerBits = modeInfo.m_partitioned ? 82 : 65; - - for (int subset = 0; subset < 2; subset++) - { - for (int epi = 0; epi < 2; epi++) - { - for (int ch = 0; ch < 3; ch++) - eps[subset][epi][ch] = ParallelMath::Extract(bestEndPoints[subset][epi][ch], block); - } - } - - for (int px = 0; px < 16; px++) - indexes[px] = ParallelMath::Extract(bestIndexes[px], block); - - uint16_t modeID = modeInfo.m_modeID; - - PackingVector pv; - pv.Init(); - - for (size_t i = 0; i < headerBits; i++) - { - int32_t codedValue = 0; - switch (desc[i].m_eField) - { - case BC6HData::M: codedValue = modeID; break; - case BC6HData::D: codedValue = partition; break; - case BC6HData::RW: codedValue = eps[0][0][0]; break; - case BC6HData::RX: codedValue = eps[0][1][0]; break; - case BC6HData::RY: codedValue = eps[1][0][0]; break; - case BC6HData::RZ: codedValue = eps[1][1][0]; break; - case BC6HData::GW: codedValue = eps[0][0][1]; break; - case BC6HData::GX: codedValue = eps[0][1][1]; break; - case BC6HData::GY: codedValue = eps[1][0][1]; break; - case BC6HData::GZ: codedValue = eps[1][1][1]; break; - case BC6HData::BW: codedValue = eps[0][0][2]; break; - case BC6HData::BX: codedValue = eps[0][1][2]; break; - case BC6HData::BY: codedValue = eps[1][0][2]; break; - case BC6HData::BZ: codedValue = eps[1][1][2]; break; - default: assert(false); break; - } - - pv.Pack(static_cast<uint16_t>((codedValue >> desc[i].m_uBit) & 1), 1); - } - - int fixupIndex1 = 0; - int indexBits = 4; - if (modeInfo.m_partitioned) - { - fixupIndex1 = BC7Data::g_fixupIndexes2[partition]; - indexBits = 3; - } - - for (int px = 0; px < 16; px++) - { - ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[px], block); - if (px == 0 || px == fixupIndex1) - pv.Pack(index, indexBits - 1); - else - pv.Pack(index, indexBits); - } - - pv.Flush(packedBlocks + 16 * block); - } - } - - static void SignExtendSingle(int &v, int bits) - { - if (v & (1 << (bits - 1))) - v |= -(1 << bits); - } - - static void UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned) - { - UnpackingVector pv; - pv.Init(pBC); - - int numModeBits = 2; - int modeBits = pv.Unpack(2); - if (modeBits != 0 && modeBits != 1) - { - modeBits |= pv.Unpack(3) << 2; - numModeBits += 3; - } - - int mode = -1; - for (int possibleMode = 0; possibleMode < BC7Data::g_numHDRModes; possibleMode++) - { - if (BC7Data::g_hdrModes[possibleMode].m_modeID == modeBits) - { - mode = possibleMode; - break; - } - } - - if (mode < 0) - { - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < 3; ch++) - output.m_pixels[px][ch] = 0; - output.m_pixels[px][3] = 0x3c00; // 1.0 - } - return; - } - - const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode]; - const size_t headerBits = modeInfo.m_partitioned ? 82 : 65; - const BC6HData::ModeDescriptor* desc = BC6HData::g_modeDescriptors[mode]; - - int32_t partition = 0; - int32_t eps[2][2][3]; - - for (int subset = 0; subset < 2; subset++) - for (int epi = 0; epi < 2; epi++) - for (int ch = 0; ch < 3; ch++) - eps[subset][epi][ch] = 0; - - for (size_t i = numModeBits; i < headerBits; i++) - { - int32_t *pCodedValue = NULL; - - switch (desc[i].m_eField) - { - case BC6HData::D: pCodedValue = &partition; break; - case BC6HData::RW: pCodedValue = &eps[0][0][0]; break; - case BC6HData::RX: pCodedValue = &eps[0][1][0]; break; - case BC6HData::RY: pCodedValue = &eps[1][0][0]; break; - case BC6HData::RZ: pCodedValue = &eps[1][1][0]; break; - case BC6HData::GW: pCodedValue = &eps[0][0][1]; break; - case BC6HData::GX: pCodedValue = &eps[0][1][1]; break; - case BC6HData::GY: pCodedValue = &eps[1][0][1]; break; - case BC6HData::GZ: pCodedValue = &eps[1][1][1]; break; - case BC6HData::BW: pCodedValue = &eps[0][0][2]; break; - case BC6HData::BX: pCodedValue = &eps[0][1][2]; break; - case BC6HData::BY: pCodedValue = &eps[1][0][2]; break; - case BC6HData::BZ: pCodedValue = &eps[1][1][2]; break; - default: assert(false); break; - } - - (*pCodedValue) |= pv.Unpack(1) << desc[i].m_uBit; - } - - - uint16_t modeID = modeInfo.m_modeID; - - int fixupIndex1 = 0; - int indexBits = 4; - int numSubsets = 1; - if (modeInfo.m_partitioned) - { - fixupIndex1 = BC7Data::g_fixupIndexes2[partition]; - indexBits = 3; - numSubsets = 2; - } - - int indexes[16]; - for (int px = 0; px < 16; px++) - { - if (px == 0 || px == fixupIndex1) - indexes[px] = pv.Unpack(indexBits - 1); - else - indexes[px] = pv.Unpack(indexBits); - } - - if (modeInfo.m_partitioned) - { - for (int ch = 0; ch < 3; ch++) - { - if (isSigned) - SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec); - if (modeInfo.m_transformed || isSigned) - { - SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]); - SignExtendSingle(eps[1][0][ch], modeInfo.m_bPrec[ch]); - SignExtendSingle(eps[1][1][ch], modeInfo.m_bPrec[ch]); - } - } - } - else - { - for (int ch = 0; ch < 3; ch++) - { - if (isSigned) - SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec); - if (modeInfo.m_transformed || isSigned) - SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]); - } - } - - int aPrec = modeInfo.m_aPrec; - - if (modeInfo.m_transformed) - { - for (int ch = 0; ch < 3; ch++) - { - int wrapMask = (1 << aPrec) - 1; - - eps[0][1][ch] = ((eps[0][0][ch] + eps[0][1][ch]) & wrapMask); - if (isSigned) - SignExtendSingle(eps[0][1][ch], aPrec); - - if (modeInfo.m_partitioned) - { - eps[1][0][ch] = ((eps[0][0][ch] + eps[1][0][ch]) & wrapMask); - eps[1][1][ch] = ((eps[0][0][ch] + eps[1][1][ch]) & wrapMask); - - if (isSigned) - { - SignExtendSingle(eps[1][0][ch], aPrec); - SignExtendSingle(eps[1][1][ch], aPrec); - } - } - } - } - - // Unquantize endpoints - for (int subset = 0; subset < numSubsets; subset++) - { - for (int epi = 0; epi < 2; epi++) - { - for (int ch = 0; ch < 3; ch++) - { - int &v = eps[subset][epi][ch]; - - if (isSigned) - { - if (aPrec >= 16) - { - // Nothing - } - else - { - bool s = false; - int comp = v; - if (v < 0) - { - s = true; - comp = -comp; - } - - int unq = 0; - if (comp == 0) - unq = 0; - else if (comp >= ((1 << (aPrec - 1)) - 1)) - unq = 0x7fff; - else - unq = ((comp << 15) + 0x4000) >> (aPrec - 1); - - if (s) - unq = -unq; - - v = unq; - } - } - else - { - if (aPrec >= 15) - { - // Nothing - } - else if (v == 0) - { - // Nothing - } - else if (v == ((1 << aPrec) - 1)) - v = 0xffff; - else - v = ((v << 16) + 0x8000) >> aPrec; - } - } - } - } - - const int *weights = BC7Data::g_weightTables[indexBits]; - - for (int px = 0; px < 16; px++) - { - int subset = 0; - if (modeInfo.m_partitioned) - subset = (BC7Data::g_partitionMap[partition] >> px) & 1; - - int w = weights[indexes[px]]; - for (int ch = 0; ch < 3; ch++) - { - int comp = ((64 - w) * eps[subset][0][ch] + w * eps[subset][1][ch] + 32) >> 6; - - if (isSigned) - { - if (comp < 0) - comp = -(((-comp) * 31) >> 5); - else - comp = (comp * 31) >> 5; - - int s = 0; - if (comp < 0) - { - s = 0x8000; - comp = -comp; - } - - output.m_pixels[px][ch] = static_cast<uint16_t>(s | comp); - } - else - { - comp = (comp * 31) >> 6; - output.m_pixels[px][ch] = static_cast<uint16_t>(comp); - } - } - output.m_pixels[px][3] = 0x3c00; // 1.0 - } - } - }; - - namespace S3TCSingleColorTables - { - struct SingleColorTableEntry - { - uint8_t m_min; - uint8_t m_max; - uint8_t m_actualColor; - uint8_t m_span; - }; - - SingleColorTableEntry g_singleColor5_3[256] = - { - { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 8, 0, 2, 8 }, { 8, 0, 2, 8 }, { 0, 8, 5, 8 }, { 0, 8, 5, 8 }, { 0, 8, 5, 8 }, { 8, 8, 8, 0 }, - { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 16, 8, 10, 8 }, { 33, 0, 11, 33 }, { 8, 16, 13, 8 }, { 8, 16, 13, 8 }, { 8, 16, 13, 8 }, { 16, 16, 16, 0 }, - { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 24, 16, 18, 8 }, { 41, 8, 19, 33 }, { 16, 24, 21, 8 }, { 16, 24, 21, 8 }, { 0, 33, 22, 33 }, { 24, 24, 24, 0 }, - { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 33, 24, 27, 9 }, { 33, 24, 27, 9 }, { 33, 24, 27, 9 }, { 41, 24, 29, 17 }, { 24, 33, 30, 9 }, { 24, 33, 30, 9 }, - { 16, 41, 32, 25 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 41, 33, 35, 8 }, { 41, 33, 35, 8 }, { 33, 41, 38, 8 }, { 33, 41, 38, 8 }, { 33, 41, 38, 8 }, - { 24, 49, 40, 25 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 49, 41, 43, 8 }, { 66, 33, 44, 33 }, { 41, 49, 46, 8 }, { 41, 49, 46, 8 }, { 41, 49, 46, 8 }, - { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 57, 49, 51, 8 }, { 74, 41, 52, 33 }, { 49, 57, 54, 8 }, { 49, 57, 54, 8 }, { 33, 66, 55, 33 }, - { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 66, 57, 60, 9 }, { 66, 57, 60, 9 }, { 66, 57, 60, 9 }, { 74, 57, 62, 17 }, { 57, 66, 63, 9 }, - { 57, 66, 63, 9 }, { 49, 74, 65, 25 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 74, 66, 68, 8 }, { 74, 66, 68, 8 }, { 66, 74, 71, 8 }, { 66, 74, 71, 8 }, - { 66, 74, 71, 8 }, { 57, 82, 73, 25 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 82, 74, 76, 8 }, { 99, 66, 77, 33 }, { 74, 82, 79, 8 }, { 74, 82, 79, 8 }, - { 74, 82, 79, 8 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 90, 82, 84, 8 }, { 107, 74, 85, 33 }, { 82, 90, 87, 8 }, { 82, 90, 87, 8 }, - { 66, 99, 88, 33 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 99, 90, 93, 9 }, { 99, 90, 93, 9 }, { 99, 90, 93, 9 }, { 107, 90, 95, 17 }, - { 90, 99, 96, 9 }, { 90, 99, 96, 9 }, { 82, 107, 98, 25 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 107, 99, 101, 8 }, { 107, 99, 101, 8 }, { 99, 107, 104, 8 }, - { 99, 107, 104, 8 }, { 99, 107, 104, 8 }, { 90, 115, 106, 25 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 115, 107, 109, 8 }, { 132, 99, 110, 33 }, { 107, 115, 112, 8 }, - { 107, 115, 112, 8 }, { 107, 115, 112, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 123, 115, 117, 8 }, { 140, 107, 118, 33 }, { 115, 123, 120, 8 }, - { 115, 123, 120, 8 }, { 99, 132, 121, 33 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 132, 123, 126, 9 }, { 132, 123, 126, 9 }, { 132, 123, 126, 9 }, - { 140, 123, 128, 17 }, { 123, 132, 129, 9 }, { 123, 132, 129, 9 }, { 115, 140, 131, 25 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 140, 132, 134, 8 }, { 140, 132, 134, 8 }, - { 132, 140, 137, 8 }, { 132, 140, 137, 8 }, { 132, 140, 137, 8 }, { 123, 148, 139, 25 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 148, 140, 142, 8 }, { 165, 132, 143, 33 }, - { 140, 148, 145, 8 }, { 140, 148, 145, 8 }, { 140, 148, 145, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 156, 148, 150, 8 }, { 173, 140, 151, 33 }, - { 148, 156, 153, 8 }, { 148, 156, 153, 8 }, { 132, 165, 154, 33 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 165, 156, 159, 9 }, { 165, 156, 159, 9 }, - { 165, 156, 159, 9 }, { 173, 156, 161, 17 }, { 156, 165, 162, 9 }, { 156, 165, 162, 9 }, { 148, 173, 164, 25 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 173, 165, 167, 8 }, - { 173, 165, 167, 8 }, { 165, 173, 170, 8 }, { 165, 173, 170, 8 }, { 165, 173, 170, 8 }, { 156, 181, 172, 25 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 181, 173, 175, 8 }, - { 198, 165, 176, 33 }, { 173, 181, 178, 8 }, { 173, 181, 178, 8 }, { 173, 181, 178, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 189, 181, 183, 8 }, - { 206, 173, 184, 33 }, { 181, 189, 186, 8 }, { 181, 189, 186, 8 }, { 165, 198, 187, 33 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 198, 189, 192, 9 }, - { 198, 189, 192, 9 }, { 198, 189, 192, 9 }, { 206, 189, 194, 17 }, { 189, 198, 195, 9 }, { 189, 198, 195, 9 }, { 181, 206, 197, 25 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 }, - { 206, 198, 200, 8 }, { 206, 198, 200, 8 }, { 198, 206, 203, 8 }, { 198, 206, 203, 8 }, { 198, 206, 203, 8 }, { 189, 214, 205, 25 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 }, - { 214, 206, 208, 8 }, { 231, 198, 209, 33 }, { 206, 214, 211, 8 }, { 206, 214, 211, 8 }, { 206, 214, 211, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, - { 222, 214, 216, 8 }, { 239, 206, 217, 33 }, { 214, 222, 219, 8 }, { 214, 222, 219, 8 }, { 198, 231, 220, 33 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, - { 231, 222, 225, 9 }, { 231, 222, 225, 9 }, { 231, 222, 225, 9 }, { 239, 222, 227, 17 }, { 222, 231, 228, 9 }, { 222, 231, 228, 9 }, { 214, 239, 230, 25 }, { 231, 231, 231, 0 }, - { 231, 231, 231, 0 }, { 239, 231, 233, 8 }, { 239, 231, 233, 8 }, { 231, 239, 236, 8 }, { 231, 239, 236, 8 }, { 231, 239, 236, 8 }, { 222, 247, 238, 25 }, { 239, 239, 239, 0 }, - { 239, 239, 239, 0 }, { 247, 239, 241, 8 }, { 247, 239, 241, 8 }, { 239, 247, 244, 8 }, { 239, 247, 244, 8 }, { 239, 247, 244, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, - { 247, 247, 247, 0 }, { 255, 247, 249, 8 }, { 255, 247, 249, 8 }, { 247, 255, 252, 8 }, { 247, 255, 252, 8 }, { 247, 255, 252, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, - }; - - SingleColorTableEntry g_singleColor6_3[256] = - { - { 0, 0, 0, 0 }, { 4, 0, 1, 4 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 8, 4, 5, 4 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 }, - { 8, 8, 8, 0 }, { 12, 8, 9, 4 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 16, 12, 13, 4 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 }, - { 16, 16, 16, 0 }, { 20, 16, 17, 4 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 24, 20, 21, 4 }, { 20, 24, 22, 4 }, { 69, 0, 23, 69 }, - { 24, 24, 24, 0 }, { 28, 24, 25, 4 }, { 24, 28, 26, 4 }, { 65, 8, 27, 57 }, { 28, 28, 28, 0 }, { 32, 28, 29, 4 }, { 28, 32, 30, 4 }, { 69, 12, 31, 57 }, - { 32, 32, 32, 0 }, { 36, 32, 33, 4 }, { 32, 36, 34, 4 }, { 65, 20, 35, 45 }, { 36, 36, 36, 0 }, { 40, 36, 37, 4 }, { 36, 40, 38, 4 }, { 69, 24, 39, 45 }, - { 40, 40, 40, 0 }, { 44, 40, 41, 4 }, { 40, 44, 42, 4 }, { 65, 32, 43, 33 }, { 44, 44, 44, 0 }, { 48, 44, 45, 4 }, { 44, 48, 46, 4 }, { 69, 36, 47, 33 }, - { 48, 48, 48, 0 }, { 52, 48, 49, 4 }, { 48, 52, 50, 4 }, { 65, 44, 51, 21 }, { 52, 52, 52, 0 }, { 56, 52, 53, 4 }, { 52, 56, 54, 4 }, { 69, 48, 55, 21 }, - { 56, 56, 56, 0 }, { 60, 56, 57, 4 }, { 56, 60, 58, 4 }, { 65, 56, 59, 9 }, { 60, 60, 60, 0 }, { 65, 60, 61, 5 }, { 56, 65, 62, 9 }, { 60, 65, 63, 5 }, - { 56, 69, 64, 13 }, { 65, 65, 65, 0 }, { 69, 65, 66, 4 }, { 65, 69, 67, 4 }, { 60, 73, 68, 13 }, { 69, 69, 69, 0 }, { 73, 69, 70, 4 }, { 69, 73, 71, 4 }, - { 56, 81, 72, 25 }, { 73, 73, 73, 0 }, { 77, 73, 74, 4 }, { 73, 77, 75, 4 }, { 60, 85, 76, 25 }, { 77, 77, 77, 0 }, { 81, 77, 78, 4 }, { 77, 81, 79, 4 }, - { 56, 93, 80, 37 }, { 81, 81, 81, 0 }, { 85, 81, 82, 4 }, { 81, 85, 83, 4 }, { 60, 97, 84, 37 }, { 85, 85, 85, 0 }, { 89, 85, 86, 4 }, { 85, 89, 87, 4 }, - { 56, 105, 88, 49 }, { 89, 89, 89, 0 }, { 93, 89, 90, 4 }, { 89, 93, 91, 4 }, { 60, 109, 92, 49 }, { 93, 93, 93, 0 }, { 97, 93, 94, 4 }, { 93, 97, 95, 4 }, - { 134, 77, 96, 57 }, { 97, 97, 97, 0 }, { 101, 97, 98, 4 }, { 97, 101, 99, 4 }, { 130, 85, 100, 45 }, { 101, 101, 101, 0 }, { 105, 101, 102, 4 }, { 101, 105, 103, 4 }, - { 134, 89, 104, 45 }, { 105, 105, 105, 0 }, { 109, 105, 106, 4 }, { 105, 109, 107, 4 }, { 130, 97, 108, 33 }, { 109, 109, 109, 0 }, { 113, 109, 110, 4 }, { 109, 113, 111, 4 }, - { 134, 101, 112, 33 }, { 113, 113, 113, 0 }, { 117, 113, 114, 4 }, { 113, 117, 115, 4 }, { 130, 109, 116, 21 }, { 117, 117, 117, 0 }, { 121, 117, 118, 4 }, { 117, 121, 119, 4 }, - { 134, 113, 120, 21 }, { 121, 121, 121, 0 }, { 125, 121, 122, 4 }, { 121, 125, 123, 4 }, { 130, 121, 124, 9 }, { 125, 125, 125, 0 }, { 130, 125, 126, 5 }, { 121, 130, 127, 9 }, - { 125, 130, 128, 5 }, { 121, 134, 129, 13 }, { 130, 130, 130, 0 }, { 134, 130, 131, 4 }, { 130, 134, 132, 4 }, { 125, 138, 133, 13 }, { 134, 134, 134, 0 }, { 138, 134, 135, 4 }, - { 134, 138, 136, 4 }, { 121, 146, 137, 25 }, { 138, 138, 138, 0 }, { 142, 138, 139, 4 }, { 138, 142, 140, 4 }, { 125, 150, 141, 25 }, { 142, 142, 142, 0 }, { 146, 142, 143, 4 }, - { 142, 146, 144, 4 }, { 121, 158, 145, 37 }, { 146, 146, 146, 0 }, { 150, 146, 147, 4 }, { 146, 150, 148, 4 }, { 125, 162, 149, 37 }, { 150, 150, 150, 0 }, { 154, 150, 151, 4 }, - { 150, 154, 152, 4 }, { 121, 170, 153, 49 }, { 154, 154, 154, 0 }, { 158, 154, 155, 4 }, { 154, 158, 156, 4 }, { 125, 174, 157, 49 }, { 158, 158, 158, 0 }, { 162, 158, 159, 4 }, - { 158, 162, 160, 4 }, { 199, 142, 161, 57 }, { 162, 162, 162, 0 }, { 166, 162, 163, 4 }, { 162, 166, 164, 4 }, { 195, 150, 165, 45 }, { 166, 166, 166, 0 }, { 170, 166, 167, 4 }, - { 166, 170, 168, 4 }, { 199, 154, 169, 45 }, { 170, 170, 170, 0 }, { 174, 170, 171, 4 }, { 170, 174, 172, 4 }, { 195, 162, 173, 33 }, { 174, 174, 174, 0 }, { 178, 174, 175, 4 }, - { 174, 178, 176, 4 }, { 199, 166, 177, 33 }, { 178, 178, 178, 0 }, { 182, 178, 179, 4 }, { 178, 182, 180, 4 }, { 195, 174, 181, 21 }, { 182, 182, 182, 0 }, { 186, 182, 183, 4 }, - { 182, 186, 184, 4 }, { 199, 178, 185, 21 }, { 186, 186, 186, 0 }, { 190, 186, 187, 4 }, { 186, 190, 188, 4 }, { 195, 186, 189, 9 }, { 190, 190, 190, 0 }, { 195, 190, 191, 5 }, - { 186, 195, 192, 9 }, { 190, 195, 193, 5 }, { 186, 199, 194, 13 }, { 195, 195, 195, 0 }, { 199, 195, 196, 4 }, { 195, 199, 197, 4 }, { 190, 203, 198, 13 }, { 199, 199, 199, 0 }, - { 203, 199, 200, 4 }, { 199, 203, 201, 4 }, { 186, 211, 202, 25 }, { 203, 203, 203, 0 }, { 207, 203, 204, 4 }, { 203, 207, 205, 4 }, { 190, 215, 206, 25 }, { 207, 207, 207, 0 }, - { 211, 207, 208, 4 }, { 207, 211, 209, 4 }, { 186, 223, 210, 37 }, { 211, 211, 211, 0 }, { 215, 211, 212, 4 }, { 211, 215, 213, 4 }, { 190, 227, 214, 37 }, { 215, 215, 215, 0 }, - { 219, 215, 216, 4 }, { 215, 219, 217, 4 }, { 186, 235, 218, 49 }, { 219, 219, 219, 0 }, { 223, 219, 220, 4 }, { 219, 223, 221, 4 }, { 190, 239, 222, 49 }, { 223, 223, 223, 0 }, - { 227, 223, 224, 4 }, { 223, 227, 225, 4 }, { 186, 247, 226, 61 }, { 227, 227, 227, 0 }, { 231, 227, 228, 4 }, { 227, 231, 229, 4 }, { 190, 251, 230, 61 }, { 231, 231, 231, 0 }, - { 235, 231, 232, 4 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 239, 235, 236, 4 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, - { 243, 239, 240, 4 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 247, 243, 244, 4 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, - { 251, 247, 248, 4 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 255, 251, 252, 4 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, - }; - - SingleColorTableEntry g_singleColor5_2[256] = - { - { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, - { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, - { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, - { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 41, 32, 17 }, - { 24, 41, 32, 17 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 24, 49, 36, 25 }, { 24, 49, 36, 25 }, { 33, 41, 37, 8 }, { 33, 41, 37, 8 }, { 24, 57, 40, 33 }, - { 24, 57, 40, 33 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 49, 49, 49, 0 }, - { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 57, 57, 57, 0 }, - { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, - { 57, 74, 65, 17 }, { 57, 74, 65, 17 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 57, 82, 69, 25 }, { 57, 82, 69, 25 }, { 66, 74, 70, 8 }, { 66, 74, 70, 8 }, - { 57, 90, 73, 33 }, { 57, 90, 73, 33 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, - { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, - { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, - { 90, 99, 94, 9 }, { 90, 107, 98, 17 }, { 90, 107, 98, 17 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 90, 115, 102, 25 }, { 90, 115, 102, 25 }, { 99, 107, 103, 8 }, - { 99, 107, 103, 8 }, { 90, 123, 106, 33 }, { 90, 123, 106, 33 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 111, 8 }, { 107, 115, 111, 8 }, - { 107, 115, 111, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 119, 8 }, { 115, 123, 119, 8 }, - { 115, 123, 119, 8 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, - { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, { 123, 140, 131, 17 }, { 123, 140, 131, 17 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 123, 148, 135, 25 }, { 123, 148, 135, 25 }, - { 132, 140, 136, 8 }, { 132, 140, 136, 8 }, { 123, 156, 139, 33 }, { 123, 156, 139, 33 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 144, 8 }, - { 140, 148, 144, 8 }, { 140, 148, 144, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 152, 8 }, - { 148, 156, 152, 8 }, { 148, 156, 152, 8 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 160, 9 }, - { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 173, 164, 17 }, { 156, 173, 164, 17 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 156, 181, 168, 25 }, - { 156, 181, 168, 25 }, { 165, 173, 169, 8 }, { 165, 173, 169, 8 }, { 156, 189, 172, 33 }, { 156, 189, 172, 33 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, - { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, - { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, - { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 206, 197, 17 }, { 189, 206, 197, 17 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 }, - { 189, 214, 201, 25 }, { 189, 214, 201, 25 }, { 198, 206, 202, 8 }, { 198, 206, 202, 8 }, { 189, 222, 205, 33 }, { 189, 222, 205, 33 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 }, - { 206, 206, 206, 0 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, - { 214, 214, 214, 0 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, - { 222, 222, 222, 0 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 239, 230, 17 }, { 222, 239, 230, 17 }, { 231, 231, 231, 0 }, - { 231, 231, 231, 0 }, { 222, 247, 234, 25 }, { 222, 247, 234, 25 }, { 231, 239, 235, 8 }, { 231, 239, 235, 8 }, { 222, 255, 238, 33 }, { 222, 255, 238, 33 }, { 239, 239, 239, 0 }, - { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, - { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, - }; - - SingleColorTableEntry g_singleColor6_2[256] = - { - { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 }, - { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 }, - { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 }, - { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 }, - { 32, 32, 32, 0 }, { 32, 32, 32, 0 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 }, - { 40, 40, 40, 0 }, { 40, 40, 40, 0 }, { 40, 44, 42, 4 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 48, 46, 4 }, { 48, 48, 48, 0 }, - { 48, 48, 48, 0 }, { 48, 48, 48, 0 }, { 48, 52, 50, 4 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 56, 54, 4 }, { 56, 56, 56, 0 }, - { 56, 56, 56, 0 }, { 56, 56, 56, 0 }, { 56, 60, 58, 4 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 65, 62, 5 }, { 60, 65, 62, 5 }, - { 60, 69, 64, 9 }, { 65, 65, 65, 0 }, { 60, 73, 66, 13 }, { 65, 69, 67, 4 }, { 60, 77, 68, 17 }, { 69, 69, 69, 0 }, { 60, 81, 70, 21 }, { 69, 73, 71, 4 }, - { 60, 85, 72, 25 }, { 73, 73, 73, 0 }, { 60, 89, 74, 29 }, { 73, 77, 75, 4 }, { 60, 93, 76, 33 }, { 77, 77, 77, 0 }, { 60, 97, 78, 37 }, { 77, 81, 79, 4 }, - { 60, 101, 80, 41 }, { 81, 81, 81, 0 }, { 60, 105, 82, 45 }, { 81, 85, 83, 4 }, { 60, 109, 84, 49 }, { 85, 85, 85, 0 }, { 60, 113, 86, 53 }, { 85, 89, 87, 4 }, - { 60, 117, 88, 57 }, { 89, 89, 89, 0 }, { 60, 121, 90, 61 }, { 89, 93, 91, 4 }, { 60, 125, 92, 65 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 95, 4 }, - { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 103, 4 }, - { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 107, 4 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 113, 111, 4 }, - { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 117, 115, 4 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 121, 119, 4 }, - { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 125, 123, 4 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 130, 127, 5 }, - { 125, 130, 127, 5 }, { 125, 134, 129, 9 }, { 130, 130, 130, 0 }, { 125, 138, 131, 13 }, { 130, 134, 132, 4 }, { 125, 142, 133, 17 }, { 134, 134, 134, 0 }, { 125, 146, 135, 21 }, - { 134, 138, 136, 4 }, { 125, 150, 137, 25 }, { 138, 138, 138, 0 }, { 125, 154, 139, 29 }, { 138, 142, 140, 4 }, { 125, 158, 141, 33 }, { 142, 142, 142, 0 }, { 125, 162, 143, 37 }, - { 142, 146, 144, 4 }, { 125, 166, 145, 41 }, { 146, 146, 146, 0 }, { 125, 170, 147, 45 }, { 146, 150, 148, 4 }, { 125, 174, 149, 49 }, { 150, 150, 150, 0 }, { 125, 178, 151, 53 }, - { 150, 154, 152, 4 }, { 125, 182, 153, 57 }, { 154, 154, 154, 0 }, { 125, 186, 155, 61 }, { 154, 158, 156, 4 }, { 125, 190, 157, 65 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, - { 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, - { 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 172, 4 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, - { 174, 178, 176, 4 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 182, 180, 4 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, - { 182, 186, 184, 4 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 190, 188, 4 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, - { 190, 195, 192, 5 }, { 190, 195, 192, 5 }, { 190, 199, 194, 9 }, { 195, 195, 195, 0 }, { 190, 203, 196, 13 }, { 195, 199, 197, 4 }, { 190, 207, 198, 17 }, { 199, 199, 199, 0 }, - { 190, 211, 200, 21 }, { 199, 203, 201, 4 }, { 190, 215, 202, 25 }, { 203, 203, 203, 0 }, { 190, 219, 204, 29 }, { 203, 207, 205, 4 }, { 190, 223, 206, 33 }, { 207, 207, 207, 0 }, - { 190, 227, 208, 37 }, { 207, 211, 209, 4 }, { 190, 231, 210, 41 }, { 211, 211, 211, 0 }, { 190, 235, 212, 45 }, { 211, 215, 213, 4 }, { 190, 239, 214, 49 }, { 215, 215, 215, 0 }, - { 190, 243, 216, 53 }, { 215, 219, 217, 4 }, { 190, 247, 218, 57 }, { 219, 219, 219, 0 }, { 190, 251, 220, 61 }, { 219, 223, 221, 4 }, { 190, 255, 222, 65 }, { 223, 223, 223, 0 }, - { 223, 223, 223, 0 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 }, - { 231, 231, 231, 0 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, - { 239, 239, 239, 0 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, - { 247, 247, 247, 0 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, - }; - - SingleColorTableEntry g_singleColor5_3_p[256] = - { - { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 8, 0, 2, 8 }, { 8, 0, 2, 8 }, { 0, 8, 5, 8 }, { 0, 8, 5, 8 }, { 0, 8, 5, 8 }, { 8, 8, 8, 0 }, - { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 16, 8, 10, 8 }, { 33, 0, 11, 33 }, { 8, 16, 13, 8 }, { 8, 16, 13, 8 }, { 8, 16, 13, 8 }, { 16, 16, 16, 0 }, - { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 24, 16, 18, 8 }, { 41, 8, 19, 33 }, { 16, 24, 21, 8 }, { 16, 24, 21, 8 }, { 0, 33, 22, 33 }, { 24, 24, 24, 0 }, - { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 33, 24, 27, 9 }, { 33, 24, 27, 9 }, { 33, 24, 27, 9 }, { 41, 24, 29, 17 }, { 24, 33, 30, 9 }, { 24, 33, 30, 9 }, - { 16, 41, 32, 25 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 41, 33, 35, 8 }, { 41, 33, 35, 8 }, { 33, 41, 38, 8 }, { 33, 41, 38, 8 }, { 33, 41, 38, 8 }, - { 24, 49, 40, 25 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 49, 41, 43, 8 }, { 66, 33, 44, 33 }, { 41, 49, 46, 8 }, { 41, 49, 46, 8 }, { 41, 49, 46, 8 }, - { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 57, 49, 51, 8 }, { 74, 41, 52, 33 }, { 49, 57, 54, 8 }, { 49, 57, 54, 8 }, { 33, 66, 55, 33 }, - { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 66, 57, 60, 9 }, { 66, 57, 60, 9 }, { 66, 57, 60, 9 }, { 74, 57, 62, 17 }, { 57, 66, 63, 9 }, - { 57, 66, 63, 9 }, { 49, 74, 65, 25 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 74, 66, 68, 8 }, { 74, 66, 68, 8 }, { 66, 74, 71, 8 }, { 66, 74, 71, 8 }, - { 66, 74, 71, 8 }, { 57, 82, 73, 25 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 82, 74, 76, 8 }, { 99, 66, 77, 33 }, { 74, 82, 79, 8 }, { 74, 82, 79, 8 }, - { 74, 82, 79, 8 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 90, 82, 84, 8 }, { 107, 74, 85, 33 }, { 82, 90, 87, 8 }, { 82, 90, 87, 8 }, - { 66, 99, 88, 33 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 99, 90, 93, 9 }, { 99, 90, 93, 9 }, { 99, 90, 93, 9 }, { 107, 90, 95, 17 }, - { 90, 99, 96, 9 }, { 90, 99, 96, 9 }, { 82, 107, 98, 25 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 107, 99, 101, 8 }, { 107, 99, 101, 8 }, { 99, 107, 104, 8 }, - { 99, 107, 104, 8 }, { 99, 107, 104, 8 }, { 90, 115, 106, 25 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 115, 107, 109, 8 }, { 132, 99, 110, 33 }, { 107, 115, 112, 8 }, - { 107, 115, 112, 8 }, { 107, 115, 112, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 123, 115, 117, 8 }, { 140, 107, 118, 33 }, { 115, 123, 120, 8 }, - { 115, 123, 120, 8 }, { 99, 132, 121, 33 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 132, 123, 126, 9 }, { 132, 123, 126, 9 }, { 132, 123, 126, 9 }, - { 140, 123, 128, 17 }, { 123, 132, 129, 9 }, { 123, 132, 129, 9 }, { 115, 140, 131, 25 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 140, 132, 134, 8 }, { 140, 132, 134, 8 }, - { 132, 140, 137, 8 }, { 132, 140, 137, 8 }, { 132, 140, 137, 8 }, { 123, 148, 139, 25 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 148, 140, 142, 8 }, { 165, 132, 143, 33 }, - { 140, 148, 145, 8 }, { 140, 148, 145, 8 }, { 140, 148, 145, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 156, 148, 150, 8 }, { 173, 140, 151, 33 }, - { 148, 156, 153, 8 }, { 148, 156, 153, 8 }, { 132, 165, 154, 33 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 165, 156, 159, 9 }, { 165, 156, 159, 9 }, - { 165, 156, 159, 9 }, { 173, 156, 161, 17 }, { 156, 165, 162, 9 }, { 156, 165, 162, 9 }, { 148, 173, 164, 25 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 173, 165, 167, 8 }, - { 173, 165, 167, 8 }, { 165, 173, 170, 8 }, { 165, 173, 170, 8 }, { 165, 173, 170, 8 }, { 156, 181, 172, 25 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 181, 173, 175, 8 }, - { 198, 165, 176, 33 }, { 173, 181, 178, 8 }, { 173, 181, 178, 8 }, { 173, 181, 178, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 189, 181, 183, 8 }, - { 206, 173, 184, 33 }, { 181, 189, 186, 8 }, { 181, 189, 186, 8 }, { 165, 198, 187, 33 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 198, 189, 192, 9 }, - { 198, 189, 192, 9 }, { 198, 189, 192, 9 }, { 206, 189, 194, 17 }, { 189, 198, 195, 9 }, { 189, 198, 195, 9 }, { 181, 206, 197, 25 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 }, - { 206, 198, 200, 8 }, { 206, 198, 200, 8 }, { 198, 206, 203, 8 }, { 198, 206, 203, 8 }, { 198, 206, 203, 8 }, { 189, 214, 205, 25 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 }, - { 214, 206, 208, 8 }, { 231, 198, 209, 33 }, { 206, 214, 211, 8 }, { 206, 214, 211, 8 }, { 206, 214, 211, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, - { 222, 214, 216, 8 }, { 239, 206, 217, 33 }, { 214, 222, 219, 8 }, { 214, 222, 219, 8 }, { 198, 231, 220, 33 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, - { 231, 222, 225, 9 }, { 231, 222, 225, 9 }, { 231, 222, 225, 9 }, { 239, 222, 227, 17 }, { 222, 231, 228, 9 }, { 222, 231, 228, 9 }, { 214, 239, 230, 25 }, { 231, 231, 231, 0 }, - { 231, 231, 231, 0 }, { 239, 231, 233, 8 }, { 239, 231, 233, 8 }, { 231, 239, 236, 8 }, { 231, 239, 236, 8 }, { 231, 239, 236, 8 }, { 222, 247, 238, 25 }, { 239, 239, 239, 0 }, - { 239, 239, 239, 0 }, { 247, 239, 241, 8 }, { 247, 239, 241, 8 }, { 239, 247, 244, 8 }, { 239, 247, 244, 8 }, { 239, 247, 244, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, - { 247, 247, 247, 0 }, { 255, 247, 249, 8 }, { 255, 247, 249, 8 }, { 247, 255, 252, 8 }, { 247, 255, 252, 8 }, { 247, 255, 252, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, - }; - - SingleColorTableEntry g_singleColor6_3_p[256] = - { - { 0, 0, 0, 0 }, { 4, 0, 1, 4 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 8, 4, 5, 4 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 }, - { 8, 8, 8, 0 }, { 12, 8, 9, 4 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 16, 12, 13, 4 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 }, - { 16, 16, 16, 0 }, { 20, 16, 17, 4 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 24, 20, 21, 4 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 }, - { 24, 24, 24, 0 }, { 28, 24, 25, 4 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 32, 28, 29, 4 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 }, - { 32, 32, 32, 0 }, { 36, 32, 33, 4 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 40, 36, 37, 4 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 }, - { 40, 40, 40, 0 }, { 44, 40, 41, 4 }, { 40, 44, 42, 4 }, { 65, 32, 43, 33 }, { 44, 44, 44, 0 }, { 48, 44, 45, 4 }, { 44, 48, 46, 4 }, { 69, 36, 47, 33 }, - { 48, 48, 48, 0 }, { 52, 48, 49, 4 }, { 48, 52, 50, 4 }, { 65, 44, 51, 21 }, { 52, 52, 52, 0 }, { 56, 52, 53, 4 }, { 52, 56, 54, 4 }, { 69, 48, 55, 21 }, - { 56, 56, 56, 0 }, { 60, 56, 57, 4 }, { 56, 60, 58, 4 }, { 65, 56, 59, 9 }, { 60, 60, 60, 0 }, { 65, 60, 61, 5 }, { 56, 65, 62, 9 }, { 60, 65, 63, 5 }, - { 56, 69, 64, 13 }, { 65, 65, 65, 0 }, { 69, 65, 66, 4 }, { 65, 69, 67, 4 }, { 60, 73, 68, 13 }, { 69, 69, 69, 0 }, { 73, 69, 70, 4 }, { 69, 73, 71, 4 }, - { 56, 81, 72, 25 }, { 73, 73, 73, 0 }, { 77, 73, 74, 4 }, { 73, 77, 75, 4 }, { 60, 85, 76, 25 }, { 77, 77, 77, 0 }, { 81, 77, 78, 4 }, { 77, 81, 79, 4 }, - { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 85, 81, 82, 4 }, { 81, 85, 83, 4 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 89, 85, 86, 4 }, { 85, 89, 87, 4 }, - { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 93, 89, 90, 4 }, { 89, 93, 91, 4 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 97, 93, 94, 4 }, { 93, 97, 95, 4 }, - { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 101, 97, 98, 4 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 105, 101, 102, 4 }, { 101, 105, 103, 4 }, - { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 109, 105, 106, 4 }, { 105, 109, 107, 4 }, { 130, 97, 108, 33 }, { 109, 109, 109, 0 }, { 113, 109, 110, 4 }, { 109, 113, 111, 4 }, - { 134, 101, 112, 33 }, { 113, 113, 113, 0 }, { 117, 113, 114, 4 }, { 113, 117, 115, 4 }, { 130, 109, 116, 21 }, { 117, 117, 117, 0 }, { 121, 117, 118, 4 }, { 117, 121, 119, 4 }, - { 134, 113, 120, 21 }, { 121, 121, 121, 0 }, { 125, 121, 122, 4 }, { 121, 125, 123, 4 }, { 130, 121, 124, 9 }, { 125, 125, 125, 0 }, { 130, 125, 126, 5 }, { 121, 130, 127, 9 }, - { 125, 130, 128, 5 }, { 121, 134, 129, 13 }, { 130, 130, 130, 0 }, { 134, 130, 131, 4 }, { 130, 134, 132, 4 }, { 125, 138, 133, 13 }, { 134, 134, 134, 0 }, { 138, 134, 135, 4 }, - { 134, 138, 136, 4 }, { 121, 146, 137, 25 }, { 138, 138, 138, 0 }, { 142, 138, 139, 4 }, { 138, 142, 140, 4 }, { 125, 150, 141, 25 }, { 142, 142, 142, 0 }, { 146, 142, 143, 4 }, - { 142, 146, 144, 4 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 150, 146, 147, 4 }, { 146, 150, 148, 4 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, { 154, 150, 151, 4 }, - { 150, 154, 152, 4 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 158, 154, 155, 4 }, { 154, 158, 156, 4 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, { 162, 158, 159, 4 }, - { 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 166, 162, 163, 4 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 170, 166, 167, 4 }, - { 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 174, 170, 171, 4 }, { 170, 174, 172, 4 }, { 195, 162, 173, 33 }, { 174, 174, 174, 0 }, { 178, 174, 175, 4 }, - { 174, 178, 176, 4 }, { 199, 166, 177, 33 }, { 178, 178, 178, 0 }, { 182, 178, 179, 4 }, { 178, 182, 180, 4 }, { 195, 174, 181, 21 }, { 182, 182, 182, 0 }, { 186, 182, 183, 4 }, - { 182, 186, 184, 4 }, { 199, 178, 185, 21 }, { 186, 186, 186, 0 }, { 190, 186, 187, 4 }, { 186, 190, 188, 4 }, { 195, 186, 189, 9 }, { 190, 190, 190, 0 }, { 195, 190, 191, 5 }, - { 186, 195, 192, 9 }, { 190, 195, 193, 5 }, { 186, 199, 194, 13 }, { 195, 195, 195, 0 }, { 199, 195, 196, 4 }, { 195, 199, 197, 4 }, { 190, 203, 198, 13 }, { 199, 199, 199, 0 }, - { 203, 199, 200, 4 }, { 199, 203, 201, 4 }, { 186, 211, 202, 25 }, { 203, 203, 203, 0 }, { 207, 203, 204, 4 }, { 203, 207, 205, 4 }, { 190, 215, 206, 25 }, { 207, 207, 207, 0 }, - { 211, 207, 208, 4 }, { 207, 211, 209, 4 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 215, 211, 212, 4 }, { 211, 215, 213, 4 }, { 215, 215, 215, 0 }, { 215, 215, 215, 0 }, - { 219, 215, 216, 4 }, { 215, 219, 217, 4 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 223, 219, 220, 4 }, { 219, 223, 221, 4 }, { 223, 223, 223, 0 }, { 223, 223, 223, 0 }, - { 227, 223, 224, 4 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 231, 227, 228, 4 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 }, - { 235, 231, 232, 4 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 239, 235, 236, 4 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, - { 243, 239, 240, 4 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 247, 243, 244, 4 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, - { 251, 247, 248, 4 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 255, 251, 252, 4 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, - }; - - SingleColorTableEntry g_singleColor5_2_p[256] = - { - { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, - { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, - { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, - { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 41, 32, 17 }, - { 24, 41, 32, 17 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 24, 49, 36, 25 }, { 24, 49, 36, 25 }, { 33, 41, 37, 8 }, { 33, 41, 37, 8 }, { 24, 57, 40, 33 }, - { 24, 57, 40, 33 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 49, 49, 49, 0 }, - { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 57, 57, 57, 0 }, - { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, - { 57, 74, 65, 17 }, { 57, 74, 65, 17 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 57, 82, 69, 25 }, { 57, 82, 69, 25 }, { 66, 74, 70, 8 }, { 66, 74, 70, 8 }, - { 57, 90, 73, 33 }, { 57, 90, 73, 33 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, - { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, - { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, - { 90, 99, 94, 9 }, { 90, 107, 98, 17 }, { 90, 107, 98, 17 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 90, 115, 102, 25 }, { 90, 115, 102, 25 }, { 99, 107, 103, 8 }, - { 99, 107, 103, 8 }, { 90, 123, 106, 33 }, { 90, 123, 106, 33 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 111, 8 }, { 107, 115, 111, 8 }, - { 107, 115, 111, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 119, 8 }, { 115, 123, 119, 8 }, - { 115, 123, 119, 8 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, - { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, { 123, 140, 131, 17 }, { 123, 140, 131, 17 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 123, 148, 135, 25 }, { 123, 148, 135, 25 }, - { 132, 140, 136, 8 }, { 132, 140, 136, 8 }, { 123, 156, 139, 33 }, { 123, 156, 139, 33 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 144, 8 }, - { 140, 148, 144, 8 }, { 140, 148, 144, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 152, 8 }, - { 148, 156, 152, 8 }, { 148, 156, 152, 8 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 160, 9 }, - { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 173, 164, 17 }, { 156, 173, 164, 17 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 156, 181, 168, 25 }, - { 156, 181, 168, 25 }, { 165, 173, 169, 8 }, { 165, 173, 169, 8 }, { 156, 189, 172, 33 }, { 156, 189, 172, 33 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, - { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, - { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, - { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 206, 197, 17 }, { 189, 206, 197, 17 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 }, - { 189, 214, 201, 25 }, { 189, 214, 201, 25 }, { 198, 206, 202, 8 }, { 198, 206, 202, 8 }, { 189, 222, 205, 33 }, { 189, 222, 205, 33 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 }, - { 206, 206, 206, 0 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, - { 214, 214, 214, 0 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, - { 222, 222, 222, 0 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 239, 230, 17 }, { 222, 239, 230, 17 }, { 231, 231, 231, 0 }, - { 231, 231, 231, 0 }, { 222, 247, 234, 25 }, { 222, 247, 234, 25 }, { 231, 239, 235, 8 }, { 231, 239, 235, 8 }, { 222, 255, 238, 33 }, { 222, 255, 238, 33 }, { 239, 239, 239, 0 }, - { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, - { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, - }; - - SingleColorTableEntry g_singleColor6_2_p[256] = - { - { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 }, - { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 }, - { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 }, - { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 }, - { 32, 32, 32, 0 }, { 32, 32, 32, 0 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 }, - { 40, 40, 40, 0 }, { 40, 40, 40, 0 }, { 40, 44, 42, 4 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 48, 46, 4 }, { 48, 48, 48, 0 }, - { 48, 48, 48, 0 }, { 48, 48, 48, 0 }, { 48, 52, 50, 4 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 56, 54, 4 }, { 56, 56, 56, 0 }, - { 56, 56, 56, 0 }, { 56, 56, 56, 0 }, { 56, 60, 58, 4 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 65, 62, 5 }, { 60, 65, 62, 5 }, - { 60, 69, 64, 9 }, { 65, 65, 65, 0 }, { 60, 73, 66, 13 }, { 65, 69, 67, 4 }, { 60, 77, 68, 17 }, { 69, 69, 69, 0 }, { 60, 81, 70, 21 }, { 69, 73, 71, 4 }, - { 60, 85, 72, 25 }, { 73, 73, 73, 0 }, { 60, 89, 74, 29 }, { 73, 77, 75, 4 }, { 60, 93, 76, 33 }, { 77, 77, 77, 0 }, { 77, 77, 77, 0 }, { 77, 81, 79, 4 }, - { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 85, 83, 4 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 89, 87, 4 }, - { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 93, 91, 4 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 95, 4 }, - { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 103, 4 }, - { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 107, 4 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 113, 111, 4 }, - { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 117, 115, 4 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 121, 119, 4 }, - { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 125, 123, 4 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 130, 127, 5 }, - { 125, 130, 127, 5 }, { 125, 134, 129, 9 }, { 130, 130, 130, 0 }, { 125, 138, 131, 13 }, { 130, 134, 132, 4 }, { 125, 142, 133, 17 }, { 134, 134, 134, 0 }, { 125, 146, 135, 21 }, - { 134, 138, 136, 4 }, { 125, 150, 137, 25 }, { 138, 138, 138, 0 }, { 125, 154, 139, 29 }, { 138, 142, 140, 4 }, { 125, 158, 141, 33 }, { 142, 142, 142, 0 }, { 142, 142, 142, 0 }, - { 142, 146, 144, 4 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 150, 148, 4 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, - { 150, 154, 152, 4 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 158, 156, 4 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, - { 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, - { 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 172, 4 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, - { 174, 178, 176, 4 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 182, 180, 4 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, - { 182, 186, 184, 4 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 190, 188, 4 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, - { 190, 195, 192, 5 }, { 190, 195, 192, 5 }, { 190, 199, 194, 9 }, { 195, 195, 195, 0 }, { 190, 203, 196, 13 }, { 195, 199, 197, 4 }, { 190, 207, 198, 17 }, { 199, 199, 199, 0 }, - { 190, 211, 200, 21 }, { 199, 203, 201, 4 }, { 190, 215, 202, 25 }, { 203, 203, 203, 0 }, { 190, 219, 204, 29 }, { 203, 207, 205, 4 }, { 190, 223, 206, 33 }, { 207, 207, 207, 0 }, - { 207, 207, 207, 0 }, { 207, 211, 209, 4 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 215, 213, 4 }, { 215, 215, 215, 0 }, { 215, 215, 215, 0 }, - { 215, 215, 215, 0 }, { 215, 219, 217, 4 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 223, 221, 4 }, { 223, 223, 223, 0 }, { 223, 223, 223, 0 }, - { 223, 223, 223, 0 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 }, - { 231, 231, 231, 0 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, - { 239, 239, 239, 0 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, - { 247, 247, 247, 0 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, - }; - } - - class S3TCComputer - { - public: - typedef ParallelMath::Float MFloat; - typedef ParallelMath::SInt16 MSInt16; - typedef ParallelMath::UInt15 MUInt15; - typedef ParallelMath::UInt16 MUInt16; - typedef ParallelMath::SInt32 MSInt32; - - static void Init(MFloat& error) - { - error = ParallelMath::MakeFloat(FLT_MAX); - } - - static void QuantizeTo6Bits(MUInt15& v) - { - MUInt15 reduced = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(v, ParallelMath::MakeUInt15(253)) + ParallelMath::MakeUInt16(512), 10)); - v = (reduced << 2) | ParallelMath::RightShift(reduced, 4); - } - - static void QuantizeTo5Bits(MUInt15& v) - { - MUInt15 reduced = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(v, ParallelMath::MakeUInt15(249)) + ParallelMath::MakeUInt16(1024), 11)); - v = (reduced << 3) | ParallelMath::RightShift(reduced, 2); - } - - static void QuantizeTo565(MUInt15 endPoint[3]) - { - QuantizeTo5Bits(endPoint[0]); - QuantizeTo6Bits(endPoint[1]); - QuantizeTo5Bits(endPoint[2]); - } - - static MFloat ParanoidFactorForSpan(const MSInt16& span) - { - return ParallelMath::Abs(ParallelMath::ToFloat(span)) * 0.03f; - } - - static MFloat ParanoidDiff(const MUInt15& a, const MUInt15& b, const MFloat& d) - { - MFloat absDiff = ParallelMath::Abs(ParallelMath::ToFloat(ParallelMath::LosslessCast<MSInt16>::Cast(a) - ParallelMath::LosslessCast<MSInt16>::Cast(b))); - absDiff = absDiff + d; - return absDiff * absDiff; - } - - static void TestSingleColor(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], int range, const float* channelWeights, - MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, const ParallelMath::RoundTowardNearestForScope *rtn) - { - float channelWeightsSq[3]; - - for (int ch = 0; ch < 3; ch++) - channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; - - MUInt15 totals[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) }; - - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < 3; ch++) - totals[ch] = totals[ch] + pixels[px][ch]; - } - - MUInt15 average[3]; - for (int ch = 0; ch < 3; ch++) - average[ch] = ParallelMath::RightShift(totals[ch] + ParallelMath::MakeUInt15(8), 4); - - const S3TCSingleColorTables::SingleColorTableEntry* rbTable = NULL; - const S3TCSingleColorTables::SingleColorTableEntry* gTable = NULL; - if (flags & cvtt::Flags::S3TC_Paranoid) - { - if (range == 4) - { - rbTable = S3TCSingleColorTables::g_singleColor5_3_p; - gTable = S3TCSingleColorTables::g_singleColor6_3_p; - } - else - { - assert(range == 3); - rbTable = S3TCSingleColorTables::g_singleColor5_2_p; - gTable = S3TCSingleColorTables::g_singleColor6_2_p; - } - } - else - { - if (range == 4) - { - rbTable = S3TCSingleColorTables::g_singleColor5_3; - gTable = S3TCSingleColorTables::g_singleColor6_3; - } - else - { - assert(range == 3); - rbTable = S3TCSingleColorTables::g_singleColor5_2; - gTable = S3TCSingleColorTables::g_singleColor6_2; - } - } - - MUInt15 interpolated[3]; - MUInt15 eps[2][3]; - MSInt16 spans[3]; - for (int i = 0; i < ParallelMath::ParallelSize; i++) - { - for (int ch = 0; ch < 3; ch++) - { - uint16_t avg = ParallelMath::Extract(average[ch], i); - const S3TCSingleColorTables::SingleColorTableEntry& tableEntry = ((ch == 1) ? gTable[avg] : rbTable[avg]); - ParallelMath::PutUInt15(eps[0][ch], i, tableEntry.m_min); - ParallelMath::PutUInt15(eps[1][ch], i, tableEntry.m_max); - ParallelMath::PutUInt15(interpolated[ch], i, tableEntry.m_actualColor); - ParallelMath::PutSInt16(spans[ch], i, tableEntry.m_span); - } - } - - MFloat error = ParallelMath::MakeFloatZero(); - if (flags & cvtt::Flags::S3TC_Paranoid) - { - MFloat spanParanoidFactors[3]; - for (int ch = 0; ch < 3; ch++) - spanParanoidFactors[ch] = ParanoidFactorForSpan(spans[ch]); - - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < 3; ch++) - error = error + ParanoidDiff(interpolated[ch], pixels[px][ch], spanParanoidFactors[ch]) * channelWeightsSq[ch]; - } - } - else - { - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < 3; ch++) - error = error + ParallelMath::ToFloat(ParallelMath::SqDiffUInt8(interpolated[ch], pixels[px][ch])) * channelWeightsSq[ch]; - } - } - - ParallelMath::FloatCompFlag better = ParallelMath::Less(error, bestError); - ParallelMath::Int16CompFlag better16 = ParallelMath::FloatFlagToInt16(better); - - if (ParallelMath::AnySet(better16)) - { - bestError = ParallelMath::Min(bestError, error); - for (int epi = 0; epi < 2; epi++) - for (int ch = 0; ch < 3; ch++) - ParallelMath::ConditionalSet(bestEndpoints[epi][ch], better16, eps[epi][ch]); - - MUInt15 vindexes = ParallelMath::MakeUInt15(1); - for (int px = 0; px < 16; px++) - ParallelMath::ConditionalSet(bestIndexes[px], better16, vindexes); - - ParallelMath::ConditionalSet(bestRange, better16, ParallelMath::MakeUInt15(range)); - } - } - - static void TestEndpoints(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], const MUInt15 unquantizedEndPoints[2][3], int range, const float* channelWeights, - MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, EndpointRefiner<3> *refiner, const ParallelMath::RoundTowardNearestForScope *rtn) - { - float channelWeightsSq[3]; - - for (int ch = 0; ch < 3; ch++) - channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; - - MUInt15 endPoints[2][3]; - - for (int ep = 0; ep < 2; ep++) - for (int ch = 0; ch < 3; ch++) - endPoints[ep][ch] = unquantizedEndPoints[ep][ch]; - - QuantizeTo565(endPoints[0]); - QuantizeTo565(endPoints[1]); - - IndexSelector<3> selector; - selector.Init<false>(channelWeights, endPoints, range); - - MUInt15 indexes[16]; - - MFloat paranoidFactors[3]; - for (int ch = 0; ch < 3; ch++) - paranoidFactors[ch] = ParanoidFactorForSpan(ParallelMath::LosslessCast<MSInt16>::Cast(endPoints[0][ch]) - ParallelMath::LosslessCast<MSInt16>::Cast(endPoints[1][ch])); - - MFloat error = ParallelMath::MakeFloatZero(); - AggregatedError<3> aggError; - for (int px = 0; px < 16; px++) - { - MUInt15 index = selector.SelectIndexLDR(floatPixels[px], rtn); - indexes[px] = index; - - if (refiner) - refiner->ContributeUnweightedPW(preWeightedPixels[px], index); - - MUInt15 reconstructed[3]; - selector.ReconstructLDRPrecise(index, reconstructed); - - if (flags & Flags::S3TC_Paranoid) - { - for (int ch = 0; ch < 3; ch++) - error = error + ParanoidDiff(reconstructed[ch], pixels[px][ch], paranoidFactors[ch]) * channelWeightsSq[ch]; - } - else - BCCommon::ComputeErrorLDR<3>(flags, reconstructed, pixels[px], aggError); - } - - if (!(flags & Flags::S3TC_Paranoid)) - error = aggError.Finalize(flags, channelWeightsSq); - - ParallelMath::FloatCompFlag better = ParallelMath::Less(error, bestError); - - if (ParallelMath::AnySet(better)) - { - ParallelMath::Int16CompFlag betterInt16 = ParallelMath::FloatFlagToInt16(better); - - ParallelMath::ConditionalSet(bestError, better, error); - - for (int ep = 0; ep < 2; ep++) - for (int ch = 0; ch < 3; ch++) - ParallelMath::ConditionalSet(bestEndpoints[ep][ch], betterInt16, endPoints[ep][ch]); - - for (int px = 0; px < 16; px++) - ParallelMath::ConditionalSet(bestIndexes[px], betterInt16, indexes[px]); - - ParallelMath::ConditionalSet(bestRange, betterInt16, ParallelMath::MakeUInt15(static_cast<uint16_t>(range))); - } - } - - static void TestCounts(uint32_t flags, const int *counts, int nCounts, const MUInt15 &numElements, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], bool alphaTest, - const MFloat floatSortedInputs[16][4], const MFloat preWeightedFloatSortedInputs[16][4], const float *channelWeights, MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, - const ParallelMath::RoundTowardNearestForScope* rtn) - { - UNREFERENCED_PARAMETER(alphaTest); - UNREFERENCED_PARAMETER(flags); - - EndpointRefiner<3> refiner; - - refiner.Init(nCounts, channelWeights); - - bool escape = false; - int e = 0; - for (int i = 0; i < nCounts; i++) - { - for (int n = 0; n < counts[i]; n++) - { - ParallelMath::Int16CompFlag valid = ParallelMath::Less(ParallelMath::MakeUInt15(static_cast<uint16_t>(n)), numElements); - if (!ParallelMath::AnySet(valid)) - { - escape = true; - break; - } - - if (ParallelMath::AllSet(valid)) - refiner.ContributeUnweightedPW(preWeightedFloatSortedInputs[e++], ParallelMath::MakeUInt15(static_cast<uint16_t>(i))); - else - { - MFloat weight = ParallelMath::Select(ParallelMath::Int16FlagToFloat(valid), ParallelMath::MakeFloat(1.0f), ParallelMath::MakeFloat(0.0f)); - refiner.ContributePW(preWeightedFloatSortedInputs[e++], ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), weight); - } - } - - if (escape) - break; - } - - MUInt15 endPoints[2][3]; - refiner.GetRefinedEndpointsLDR(endPoints, rtn); - - TestEndpoints(flags, pixels, floatPixels, preWeightedPixels, endPoints, nCounts, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, NULL, rtn); - } - - static void PackExplicitAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride) - { - UNREFERENCED_PARAMETER(flags); - ParallelMath::RoundTowardNearestForScope rtn; - - float weights[1] = { 1.0f }; - - MUInt15 pixels[16]; - MFloat floatPixels[16]; - - for (int px = 0; px < 16; px++) - { - ParallelMath::ConvertLDRInputs(inputs, px, inputChannel, pixels[px]); - floatPixels[px] = ParallelMath::ToFloat(pixels[px]); - } - - MUInt15 ep[2][1] = { { ParallelMath::MakeUInt15(0) },{ ParallelMath::MakeUInt15(255) } }; - - IndexSelector<1> selector; - selector.Init<false>(weights, ep, 16); - - MUInt15 indexes[16]; - - for (int px = 0; px < 16; px++) - indexes[px] = selector.SelectIndexLDR(&floatPixels[px], &rtn); - - for (int block = 0; block < ParallelMath::ParallelSize; block++) - { - for (int px = 0; px < 16; px += 8) - { - int index0 = ParallelMath::Extract(indexes[px], block); - int index1 = ParallelMath::Extract(indexes[px], block); - - packedBlocks[px / 2] = static_cast<uint8_t>(index0 | (index1 << 4)); - } - - packedBlocks += packedBlockStride; - } - } - - static void PackInterpolatedAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride, bool isSigned, int maxTweakRounds, int numRefineRounds) - { - if (maxTweakRounds < 1) - maxTweakRounds = 1; - - if (numRefineRounds < 1) - numRefineRounds = 1; - - ParallelMath::RoundTowardNearestForScope rtn; - - float oneWeight[1] = { 1.0f }; - - MUInt15 pixels[16]; - MFloat floatPixels[16]; - - MUInt15 highTerminal = isSigned ? ParallelMath::MakeUInt15(254) : ParallelMath::MakeUInt15(255); - MUInt15 highTerminalMinusOne = highTerminal - ParallelMath::MakeUInt15(1); - - for (int px = 0; px < 16; px++) - { - ParallelMath::ConvertLDRInputs(inputs, px, inputChannel, pixels[px]); - - if (isSigned) - pixels[px] = ParallelMath::Min(pixels[px], highTerminal); - - floatPixels[px] = ParallelMath::ToFloat(pixels[px]); - } - - MUInt15 sortedPixels[16]; - for (int px = 0; px < 16; px++) - sortedPixels[px] = pixels[px]; - - for (int sortEnd = 15; sortEnd > 0; sortEnd--) - { - for (int sortOffset = 0; sortOffset < sortEnd; sortOffset++) - { - MUInt15 a = sortedPixels[sortOffset]; - MUInt15 b = sortedPixels[sortOffset + 1]; - - sortedPixels[sortOffset] = ParallelMath::Min(a, b); - sortedPixels[sortOffset + 1] = ParallelMath::Max(a, b); - } - } - - MUInt15 zero = ParallelMath::MakeUInt15(0); - MUInt15 one = ParallelMath::MakeUInt15(1); - - MUInt15 bestIsFullRange = zero; - MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); - MUInt15 bestEP[2] = { zero, zero }; - MUInt15 bestIndexes[16] = { - zero, zero, zero, zero, - zero, zero, zero, zero, - zero, zero, zero, zero, - zero, zero, zero, zero - }; - - // Full-precision - { - MUInt15 minEP = sortedPixels[0]; - MUInt15 maxEP = sortedPixels[15]; - - MFloat base[1] = { ParallelMath::ToFloat(minEP) }; - MFloat offset[1] = { ParallelMath::ToFloat(maxEP - minEP) }; - - UnfinishedEndpoints<1> ufep = UnfinishedEndpoints<1>(base, offset); - - int numTweakRounds = BCCommon::TweakRoundsForRange(8); - if (numTweakRounds > maxTweakRounds) - numTweakRounds = maxTweakRounds; - - for (int tweak = 0; tweak < numTweakRounds; tweak++) - { - MUInt15 ep[2][1]; - - ufep.FinishLDR(tweak, 8, ep[0], ep[1]); - - for (int refinePass = 0; refinePass < numRefineRounds; refinePass++) - { - EndpointRefiner<1> refiner; - refiner.Init(8, oneWeight); - - if (isSigned) - for (int epi = 0; epi < 2; epi++) - ep[epi][0] = ParallelMath::Min(ep[epi][0], highTerminal); - - IndexSelector<1> indexSelector; - indexSelector.Init<false>(oneWeight, ep, 8); - - MUInt15 indexes[16]; - - AggregatedError<1> aggError; - for (int px = 0; px < 16; px++) - { - MUInt15 index = indexSelector.SelectIndexLDR(&floatPixels[px], &rtn); - - MUInt15 reconstructedPixel; - - indexSelector.ReconstructLDRPrecise(index, &reconstructedPixel); - BCCommon::ComputeErrorLDR<1>(flags, &reconstructedPixel, &pixels[px], aggError); - - if (refinePass != numRefineRounds - 1) - refiner.ContributeUnweightedPW(&floatPixels[px], index); - - indexes[px] = index; - } - MFloat error = aggError.Finalize(flags | Flags::Uniform, oneWeight); - - ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError); - ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); - - if (ParallelMath::AnySet(errorBetter16)) - { - bestError = ParallelMath::Min(error, bestError); - ParallelMath::ConditionalSet(bestIsFullRange, errorBetter16, one); - for (int px = 0; px < 16; px++) - ParallelMath::ConditionalSet(bestIndexes[px], errorBetter16, indexes[px]); - - for (int epi = 0; epi < 2; epi++) - ParallelMath::ConditionalSet(bestEP[epi], errorBetter16, ep[epi][0]); - } - - if (refinePass != numRefineRounds - 1) - refiner.GetRefinedEndpointsLDR(ep, &rtn); - } - } - } - - // Reduced precision with special endpoints - { - MUInt15 bestHeuristicMin = sortedPixels[0]; - MUInt15 bestHeuristicMax = sortedPixels[15]; - - ParallelMath::Int16CompFlag canTryClipping; - - // In reduced precision, we want try putting endpoints at the reserved indexes at the ends. - // The heuristic we use is to assign indexes to the end as long as they aren't off by more than half of the index range. - // This will usually not find anything, but it's cheap to check. - - { - MUInt15 largestPossibleRange = bestHeuristicMax - bestHeuristicMin; // Max: 255 - MUInt15 lowestPossibleClearance = ParallelMath::Min(bestHeuristicMin, static_cast<MUInt15>(highTerminal - bestHeuristicMax)); - - MUInt15 lowestPossibleClearanceTimes10 = (lowestPossibleClearance << 2) + (lowestPossibleClearance << 4); - canTryClipping = ParallelMath::LessOrEqual(lowestPossibleClearanceTimes10, largestPossibleRange); - } - - if (ParallelMath::AnySet(canTryClipping)) - { - MUInt15 lowClearances[16]; - MUInt15 highClearances[16]; - MUInt15 bestSkipCount = ParallelMath::MakeUInt15(0); - - lowClearances[0] = highClearances[0] = ParallelMath::MakeUInt15(0); - - for (int px = 1; px < 16; px++) - { - lowClearances[px] = sortedPixels[px - 1]; - highClearances[px] = highTerminal - sortedPixels[16 - px]; - } - - for (uint16_t firstIndex = 0; firstIndex < 16; firstIndex++) - { - uint16_t numSkippedLow = firstIndex; - - MUInt15 lowClearance = lowClearances[firstIndex]; - - for (uint16_t lastIndex = firstIndex; lastIndex < 16; lastIndex++) - { - uint16_t numSkippedHigh = 15 - lastIndex; - uint16_t numSkipped = numSkippedLow + numSkippedHigh; - - MUInt15 numSkippedV = ParallelMath::MakeUInt15(numSkipped); - - ParallelMath::Int16CompFlag areMoreSkipped = ParallelMath::Less(bestSkipCount, numSkippedV); - - if (!ParallelMath::AnySet(areMoreSkipped)) - continue; - - MUInt15 clearance = ParallelMath::Max(highClearances[numSkippedHigh], lowClearance); - MUInt15 clearanceTimes10 = (clearance << 2) + (clearance << 4); - - MUInt15 range = sortedPixels[lastIndex] - sortedPixels[firstIndex]; - - ParallelMath::Int16CompFlag isBetter = (areMoreSkipped & ParallelMath::LessOrEqual(clearanceTimes10, range)); - ParallelMath::ConditionalSet(bestHeuristicMin, isBetter, sortedPixels[firstIndex]); - ParallelMath::ConditionalSet(bestHeuristicMax, isBetter, sortedPixels[lastIndex]); - } - } - } - - MUInt15 bestSimpleMin = one; - MUInt15 bestSimpleMax = highTerminalMinusOne; - - for (int px = 0; px < 16; px++) - { - ParallelMath::ConditionalSet(bestSimpleMin, ParallelMath::Less(zero, sortedPixels[15 - px]), sortedPixels[15 - px]); - ParallelMath::ConditionalSet(bestSimpleMax, ParallelMath::Less(sortedPixels[px], highTerminal), sortedPixels[px]); - } - - MUInt15 minEPs[2] = { bestSimpleMin, bestHeuristicMin }; - MUInt15 maxEPs[2] = { bestSimpleMax, bestHeuristicMax }; - - int minEPRange = 2; - if (ParallelMath::AllSet(ParallelMath::Equal(minEPs[0], minEPs[1]))) - minEPRange = 1; - - int maxEPRange = 2; - if (ParallelMath::AllSet(ParallelMath::Equal(maxEPs[0], maxEPs[1]))) - maxEPRange = 1; - - for (int minEPIndex = 0; minEPIndex < minEPRange; minEPIndex++) - { - for (int maxEPIndex = 0; maxEPIndex < maxEPRange; maxEPIndex++) - { - MFloat base[1] = { ParallelMath::ToFloat(minEPs[minEPIndex]) }; - MFloat offset[1] = { ParallelMath::ToFloat(maxEPs[maxEPIndex] - minEPs[minEPIndex]) }; - - UnfinishedEndpoints<1> ufep = UnfinishedEndpoints<1>(base, offset); - - int numTweakRounds = BCCommon::TweakRoundsForRange(6); - if (numTweakRounds > maxTweakRounds) - numTweakRounds = maxTweakRounds; - - for (int tweak = 0; tweak < numTweakRounds; tweak++) - { - MUInt15 ep[2][1]; - - ufep.FinishLDR(tweak, 8, ep[0], ep[1]); - - for (int refinePass = 0; refinePass < numRefineRounds; refinePass++) - { - EndpointRefiner<1> refiner; - refiner.Init(6, oneWeight); - - if (isSigned) - for (int epi = 0; epi < 2; epi++) - ep[epi][0] = ParallelMath::Min(ep[epi][0], highTerminal); - - IndexSelector<1> indexSelector; - indexSelector.Init<false>(oneWeight, ep, 6); - - MUInt15 indexes[16]; - MFloat error = ParallelMath::MakeFloatZero(); - - for (int px = 0; px < 16; px++) - { - MUInt15 selectedIndex = indexSelector.SelectIndexLDR(&floatPixels[px], &rtn); - - MUInt15 reconstructedPixel; - - indexSelector.ReconstructLDRPrecise(selectedIndex, &reconstructedPixel); - - MFloat zeroError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &zero, &pixels[px], 1, oneWeight); - MFloat highTerminalError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &highTerminal, &pixels[px], 1, oneWeight); - MFloat selectedIndexError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &reconstructedPixel, &pixels[px], 1, oneWeight); - - MFloat bestPixelError = zeroError; - MUInt15 index = ParallelMath::MakeUInt15(6); - - ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(ParallelMath::Less(highTerminalError, bestPixelError)), ParallelMath::MakeUInt15(7)); - bestPixelError = ParallelMath::Min(bestPixelError, highTerminalError); - - ParallelMath::FloatCompFlag selectedIndexBetter = ParallelMath::Less(selectedIndexError, bestPixelError); - - if (ParallelMath::AllSet(selectedIndexBetter)) - { - if (refinePass != numRefineRounds - 1) - refiner.ContributeUnweightedPW(&floatPixels[px], selectedIndex); - } - else - { - MFloat refineWeight = ParallelMath::Select(selectedIndexBetter, ParallelMath::MakeFloat(1.0f), ParallelMath::MakeFloatZero()); - - if (refinePass != numRefineRounds - 1) - refiner.ContributePW(&floatPixels[px], selectedIndex, refineWeight); - } - - ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(selectedIndexBetter), selectedIndex); - bestPixelError = ParallelMath::Min(bestPixelError, selectedIndexError); - - error = error + bestPixelError; - - indexes[px] = index; - } - - ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError); - ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); - - if (ParallelMath::AnySet(errorBetter16)) - { - bestError = ParallelMath::Min(error, bestError); - ParallelMath::ConditionalSet(bestIsFullRange, errorBetter16, zero); - for (int px = 0; px < 16; px++) - ParallelMath::ConditionalSet(bestIndexes[px], errorBetter16, indexes[px]); - - for (int epi = 0; epi < 2; epi++) - ParallelMath::ConditionalSet(bestEP[epi], errorBetter16, ep[epi][0]); - } - - if (refinePass != numRefineRounds - 1) - refiner.GetRefinedEndpointsLDR(ep, &rtn); - } - } - } - } - } - - for (int block = 0; block < ParallelMath::ParallelSize; block++) - { - int ep0 = ParallelMath::Extract(bestEP[0], block); - int ep1 = ParallelMath::Extract(bestEP[1], block); - int isFullRange = ParallelMath::Extract(bestIsFullRange, block); - - if (isSigned) - { - ep0 -= 127; - ep1 -= 127; - - assert(ep0 >= -127 && ep0 <= 127); - assert(ep1 >= -127 && ep1 <= 127); - } - - - bool swapEndpoints = (isFullRange != 0) != (ep0 > ep1); - - if (swapEndpoints) - std::swap(ep0, ep1); - - uint16_t dumpBits = 0; - int dumpBitsOffset = 0; - int dumpByteOffset = 2; - packedBlocks[0] = static_cast<uint8_t>(ep0 & 0xff); - packedBlocks[1] = static_cast<uint8_t>(ep1 & 0xff); - - int maxValue = (isFullRange != 0) ? 7 : 5; - - for (int px = 0; px < 16; px++) - { - int index = ParallelMath::Extract(bestIndexes[px], block); - - if (swapEndpoints && index <= maxValue) - index = maxValue - index; - - if (index != 0) - { - if (index == maxValue) - index = 1; - else if (index < maxValue) - index++; - } - - assert(index >= 0 && index < 8); - - dumpBits |= static_cast<uint16_t>(index << dumpBitsOffset); - dumpBitsOffset += 3; - - if (dumpBitsOffset >= 8) - { - assert(dumpByteOffset < 8); - packedBlocks[dumpByteOffset] = static_cast<uint8_t>(dumpBits & 0xff); - dumpBits >>= 8; - dumpBitsOffset -= 8; - dumpByteOffset++; - } - } - - assert(dumpBitsOffset == 0); - assert(dumpByteOffset == 8); - - packedBlocks += packedBlockStride; - } - } - - static void PackRGB(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, size_t packedBlockStride, const float channelWeights[4], bool alphaTest, float alphaThreshold, bool exhaustive, int maxTweakRounds, int numRefineRounds) - { - ParallelMath::RoundTowardNearestForScope rtn; - - if (numRefineRounds < 1) - numRefineRounds = 1; - - if (maxTweakRounds < 1) - maxTweakRounds = 1; - - EndpointSelector<3, 8> endpointSelector; - - MUInt15 pixels[16][4]; - MFloat floatPixels[16][4]; - - MFloat preWeightedPixels[16][4]; - - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < 4; ch++) - ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]); - } - - for (int px = 0; px < 16; px++) - { - for (int ch = 0; ch < 4; ch++) - floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]); - } - - if (alphaTest) - { - MUInt15 threshold = ParallelMath::MakeUInt15(static_cast<uint16_t>(floor(alphaThreshold * 255.0f + 0.5f))); - - for (int px = 0; px < 16; px++) - { - ParallelMath::Int16CompFlag belowThreshold = ParallelMath::Less(pixels[px][3], threshold); - pixels[px][3] = ParallelMath::Select(belowThreshold, ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(255)); - } - } - - BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights); - - MUInt15 minAlpha = ParallelMath::MakeUInt15(255); - - for (int px = 0; px < 16; px++) - minAlpha = ParallelMath::Min(minAlpha, pixels[px][3]); - - MFloat pixelWeights[16]; - for (int px = 0; px < 16; px++) - { - pixelWeights[px] = ParallelMath::MakeFloat(1.0f); - if (alphaTest) - { - ParallelMath::Int16CompFlag isTransparent = ParallelMath::Less(pixels[px][3], ParallelMath::MakeUInt15(255)); - - ParallelMath::ConditionalSet(pixelWeights[px], ParallelMath::Int16FlagToFloat(isTransparent), ParallelMath::MakeFloatZero()); - } - } - - for (int pass = 0; pass < NumEndpointSelectorPasses; pass++) - { - for (int px = 0; px < 16; px++) - endpointSelector.ContributePass(preWeightedPixels[px], pass, pixelWeights[px]); - - endpointSelector.FinishPass(pass); - } - - UnfinishedEndpoints<3> ufep = endpointSelector.GetEndpoints(channelWeights); - - MUInt15 bestEndpoints[2][3]; - MUInt15 bestIndexes[16]; - MUInt15 bestRange = ParallelMath::MakeUInt15(0); - MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); - - for (int px = 0; px < 16; px++) - bestIndexes[px] = ParallelMath::MakeUInt15(0); - - for (int ep = 0; ep < 2; ep++) - for (int ch = 0; ch < 3; ch++) - bestEndpoints[ep][ch] = ParallelMath::MakeUInt15(0); - - if (exhaustive) - { - MSInt16 sortBins[16]; - - { - // Compute an 11-bit index, change it to signed, stuff it in the high bits of the sort bins, - // and pack the original indexes into the low bits. - - MUInt15 sortEP[2][3]; - ufep.FinishLDR(0, 11, sortEP[0], sortEP[1]); - - IndexSelector<3> sortSelector; - sortSelector.Init<false>(channelWeights, sortEP, 1 << 11); - - for (int16_t px = 0; px < 16; px++) - { - MSInt16 sortBin = ParallelMath::LosslessCast<MSInt16>::Cast(sortSelector.SelectIndexLDR(floatPixels[px], &rtn) << 4); - - if (alphaTest) - { - ParallelMath::Int16CompFlag isTransparent = ParallelMath::Less(pixels[px][3], ParallelMath::MakeUInt15(255)); - - ParallelMath::ConditionalSet(sortBin, isTransparent, ParallelMath::MakeSInt16(-16)); // 0xfff0 - } - - sortBin = sortBin + ParallelMath::MakeSInt16(px); - - sortBins[px] = sortBin; - } - } - - // Sort bins - for (int sortEnd = 1; sortEnd < 16; sortEnd++) - { - for (int sortLoc = sortEnd; sortLoc > 0; sortLoc--) - { - MSInt16 a = sortBins[sortLoc]; - MSInt16 b = sortBins[sortLoc - 1]; - - sortBins[sortLoc] = ParallelMath::Max(a, b); - sortBins[sortLoc - 1] = ParallelMath::Min(a, b); - } - } - - MUInt15 firstElement = ParallelMath::MakeUInt15(0); - for (uint16_t e = 0; e < 16; e++) - { - ParallelMath::Int16CompFlag isInvalid = ParallelMath::Less(sortBins[e], ParallelMath::MakeSInt16(0)); - ParallelMath::ConditionalSet(firstElement, isInvalid, ParallelMath::MakeUInt15(e + 1)); - if (!ParallelMath::AnySet(isInvalid)) - break; - } - - MUInt15 numElements = ParallelMath::MakeUInt15(16) - firstElement; - - MUInt15 sortedInputs[16][4]; - MFloat floatSortedInputs[16][4]; - MFloat pwFloatSortedInputs[16][4]; - - for (int e = 0; e < 16; e++) - { - for (int ch = 0; ch < 4; ch++) - sortedInputs[e][ch] = ParallelMath::MakeUInt15(0); - } - - for (int block = 0; block < ParallelMath::ParallelSize; block++) - { - for (int e = ParallelMath::Extract(firstElement, block); e < 16; e++) - { - ParallelMath::ScalarUInt16 sortBin = ParallelMath::Extract(sortBins[e], block); - int originalIndex = (sortBin & 15); - - for (int ch = 0; ch < 4; ch++) - ParallelMath::PutUInt15(sortedInputs[15 - e][ch], block, ParallelMath::Extract(pixels[originalIndex][ch], block)); - } - } - - for (int e = 0; e < 16; e++) - { - for (int ch = 0; ch < 4; ch++) - { - MFloat f = ParallelMath::ToFloat(sortedInputs[e][ch]); - floatSortedInputs[e][ch] = f; - pwFloatSortedInputs[e][ch] = f * channelWeights[ch]; - } - } - - for (int n0 = 0; n0 <= 15; n0++) - { - int remainingFor1 = 16 - n0; - if (remainingFor1 == 16) - remainingFor1 = 15; - - for (int n1 = 0; n1 <= remainingFor1; n1++) - { - int remainingFor2 = 16 - n1 - n0; - if (remainingFor2 == 16) - remainingFor2 = 15; - - for (int n2 = 0; n2 <= remainingFor2; n2++) - { - int n3 = 16 - n2 - n1 - n0; - - if (n3 == 16) - continue; - - int counts[4] = { n0, n1, n2, n3 }; - - TestCounts(flags, counts, 4, numElements, pixels, floatPixels, preWeightedPixels, alphaTest, floatSortedInputs, pwFloatSortedInputs, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn); - } - } - } - - TestSingleColor(flags, pixels, floatPixels, 4, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn); - - if (alphaTest) - { - for (int n0 = 0; n0 <= 15; n0++) - { - int remainingFor1 = 16 - n0; - if (remainingFor1 == 16) - remainingFor1 = 15; - - for (int n1 = 0; n1 <= remainingFor1; n1++) - { - int n2 = 16 - n1 - n0; - - if (n2 == 16) - continue; - - int counts[3] = { n0, n1, n2 }; - - TestCounts(flags, counts, 3, numElements, pixels, floatPixels, preWeightedPixels, alphaTest, floatSortedInputs, pwFloatSortedInputs, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn); - } - } - - TestSingleColor(flags, pixels, floatPixels, 3, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn); - } - } - else - { - int minRange = alphaTest ? 3 : 4; - - for (int range = minRange; range <= 4; range++) - { - int tweakRounds = BCCommon::TweakRoundsForRange(range); - if (tweakRounds > maxTweakRounds) - tweakRounds = maxTweakRounds; - - for (int tweak = 0; tweak < tweakRounds; tweak++) - { - MUInt15 endPoints[2][3]; - - ufep.FinishLDR(tweak, range, endPoints[0], endPoints[1]); - - for (int refine = 0; refine < numRefineRounds; refine++) - { - EndpointRefiner<3> refiner; - refiner.Init(range, channelWeights); - - TestEndpoints(flags, pixels, floatPixels, preWeightedPixels, endPoints, range, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &refiner, &rtn); - - if (refine != numRefineRounds - 1) - refiner.GetRefinedEndpointsLDR(endPoints, &rtn); - } - } - } - } - - for (int block = 0; block < ParallelMath::ParallelSize; block++) - { - ParallelMath::ScalarUInt16 range = ParallelMath::Extract(bestRange, block); - assert(range == 3 || range == 4); - - ParallelMath::ScalarUInt16 compressedEP[2]; - for (int ep = 0; ep < 2; ep++) - { - ParallelMath::ScalarUInt16 endPoint[3]; - for (int ch = 0; ch < 3; ch++) - endPoint[ch] = ParallelMath::Extract(bestEndpoints[ep][ch], block); - - int compressed = (endPoint[0] & 0xf8) << 8; - compressed |= (endPoint[1] & 0xfc) << 3; - compressed |= (endPoint[2] & 0xf8) >> 3; - - compressedEP[ep] = static_cast<ParallelMath::ScalarUInt16>(compressed); - } - - int indexOrder[4]; - - if (range == 4) - { - if (compressedEP[0] == compressedEP[1]) - { - indexOrder[0] = 0; - indexOrder[1] = 0; - indexOrder[2] = 0; - indexOrder[3] = 0; - } - else if (compressedEP[0] < compressedEP[1]) - { - std::swap(compressedEP[0], compressedEP[1]); - indexOrder[0] = 1; - indexOrder[1] = 3; - indexOrder[2] = 2; - indexOrder[3] = 0; - } - else - { - indexOrder[0] = 0; - indexOrder[1] = 2; - indexOrder[2] = 3; - indexOrder[3] = 1; - } - } - else - { - assert(range == 3); - - if (compressedEP[0] > compressedEP[1]) - { - std::swap(compressedEP[0], compressedEP[1]); - indexOrder[0] = 1; - indexOrder[1] = 2; - indexOrder[2] = 0; - } - else - { - indexOrder[0] = 0; - indexOrder[1] = 2; - indexOrder[2] = 1; - } - indexOrder[3] = 3; - } - - packedBlocks[0] = static_cast<uint8_t>(compressedEP[0] & 0xff); - packedBlocks[1] = static_cast<uint8_t>((compressedEP[0] >> 8) & 0xff); - packedBlocks[2] = static_cast<uint8_t>(compressedEP[1] & 0xff); - packedBlocks[3] = static_cast<uint8_t>((compressedEP[1] >> 8) & 0xff); - - for (int i = 0; i < 16; i += 4) - { - int packedIndexes = 0; - for (int subi = 0; subi < 4; subi++) - { - ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[i + subi], block); - packedIndexes |= (indexOrder[index] << (subi * 2)); - } - - packedBlocks[4 + i / 4] = static_cast<uint8_t>(packedIndexes); - } - - packedBlocks += packedBlockStride; - } - } - }; - - // Signed input blocks are converted into unsigned space, with the maximum value being 254 - void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize]) - { - for (size_t block = 0; block < ParallelMath::ParallelSize; block++) - { - const PixelBlockS8& inputSignedBlock = inputSigned[block]; - PixelBlockU8& inputNormalizedBlock = inputNormalized[block]; - - for (size_t px = 0; px < 16; px++) - { - for (size_t ch = 0; ch < 4; ch++) - inputNormalizedBlock.m_pixels[px][ch] = static_cast<uint8_t>(std::max<int>(inputSignedBlock.m_pixels[px][ch], -127) + 127); - } - } - } - - void FillWeights(const Options &options, float channelWeights[4]) - { - if (options.flags & Flags::Uniform) - channelWeights[0] = channelWeights[1] = channelWeights[2] = channelWeights[3] = 1.0f; - else - { - channelWeights[0] = options.redWeight; - channelWeights[1] = options.greenWeight; - channelWeights[2] = options.blueWeight; - channelWeights[3] = options.alphaWeight; - } - } - } - - namespace Kernels - { - void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options) - { - assert(pBlocks); - assert(pBC); - - float channelWeights[4]; - Internal::FillWeights(options, channelWeights); - - for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) - { - Internal::BC7Computer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, options.seedPoints, options.refineRoundsBC7); - pBC += ParallelMath::ParallelSize * 16; - } - } - - void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options) - { - assert(pBlocks); - assert(pBC); - - float channelWeights[4]; - Internal::FillWeights(options, channelWeights); - - for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) - { - Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, false, options.seedPoints, options.refineRoundsBC6H); - pBC += ParallelMath::ParallelSize * 16; - } - } - - void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options) - { - assert(pBlocks); - assert(pBC); - - float channelWeights[4]; - Internal::FillWeights(options, channelWeights); - - for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) - { - Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, true, options.seedPoints, options.refineRoundsBC6H); - pBC += ParallelMath::ParallelSize * 16; - } - } - - void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options) - { - assert(pBlocks); - assert(pBC); - - float channelWeights[4]; - Internal::FillWeights(options, channelWeights); - - for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) - { - Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC, 8, channelWeights, true, options.threshold, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); - pBC += ParallelMath::ParallelSize * 8; - } - } - - void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) - { - assert(pBlocks); - assert(pBC); - - float channelWeights[4]; - Internal::FillWeights(options, channelWeights); - - for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) - { - Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); - Internal::S3TCComputer::PackExplicitAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16); - pBC += ParallelMath::ParallelSize * 16; - } - } - - void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) - { - assert(pBlocks); - assert(pBC); - - float channelWeights[4]; - Internal::FillWeights(options, channelWeights); - - for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) - { - Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); - Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16, false, options.seedPoints, options.refineRoundsIIC); - pBC += ParallelMath::ParallelSize * 16; - } - } - - void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) - { - assert(pBlocks); - assert(pBC); - - float channelWeights[4]; - Internal::FillWeights(options, channelWeights); - - for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) - { - Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 8, false, options.seedPoints, options.refineRoundsIIC); - pBC += ParallelMath::ParallelSize * 8; - } - } - - void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options) - { - assert(pBlocks); - assert(pBC); - - float channelWeights[4]; - Internal::FillWeights(options, channelWeights); - - for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) - { - PixelBlockU8 inputBlocks[ParallelMath::ParallelSize]; - Internal::BiasSignedInput(inputBlocks, pBlocks + blockBase); - - Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 8, true, options.seedPoints, options.refineRoundsIIC); - pBC += ParallelMath::ParallelSize * 8; - } - } - - void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) - { - assert(pBlocks); - assert(pBC); - - float channelWeights[4]; - Internal::FillWeights(options, channelWeights); - - for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) - { - Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 16, false, options.seedPoints, options.refineRoundsIIC); - Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 1, pBC + 8, 16, false, options.seedPoints, options.refineRoundsIIC); - pBC += ParallelMath::ParallelSize * 16; - } - } - - void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options) - { - assert(pBlocks); - assert(pBC); - - float channelWeights[4]; - Internal::FillWeights(options, channelWeights); - - for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) - { - PixelBlockU8 inputBlocks[ParallelMath::ParallelSize]; - Internal::BiasSignedInput(inputBlocks, pBlocks + blockBase); - - Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 16, true, options.seedPoints, options.refineRoundsIIC); - Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 1, pBC + 8, 16, true, options.seedPoints, options.refineRoundsIIC); - pBC += ParallelMath::ParallelSize * 16; - } - } - - void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC) - { - assert(pBlocks); - assert(pBC); - - for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) - { - Internal::BC7Computer::UnpackOne(pBlocks[blockBase], pBC); - pBC += 16; - } - } - - void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC) - { - assert(pBlocks); - assert(pBC); - - for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) - { - Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, false); - pBC += 16; - } - } - - void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC) - { - assert(pBlocks); - assert(pBC); - - for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) - { - Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, true); - pBC += 16; - } - } - } -} diff --git a/thirdparty/cvtt/ConvectionKernels.h b/thirdparty/cvtt/ConvectionKernels.h index fb5ca130f9..3da48405ff 100644 --- a/thirdparty/cvtt/ConvectionKernels.h +++ b/thirdparty/cvtt/ConvectionKernels.h @@ -25,21 +25,13 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #ifndef __CVTT_CONVECTION_KERNELS__ #define __CVTT_CONVECTION_KERNELS__ +#include <stddef.h> #include <stdint.h> namespace cvtt { namespace Flags { - // Enable partitioned modes in BC7 encoding (slower, better quality) - const uint32_t BC7_EnablePartitioning = 0x001; - - // Enable 3-partition modes in BC7 encoding (slower, better quality, requires BC7_EnablePartitioning) - const uint32_t BC7_Enable3Subsets = 0x002; - - // Enable dual-plane modes in BC7 encoding (slower, better quality) - const uint32_t BC7_EnableDualPlane = 0x004; - // Use fast indexing in BC7 encoding (about 2x faster, slightly worse quality) const uint32_t BC7_FastIndexing = 0x008; @@ -61,13 +53,19 @@ namespace cvtt // Uniform color channel importance const uint32_t Uniform = 0x200; + // Use fake BT.709 color space for etc2comp compatibility (slower) + const uint32_t ETC_UseFakeBT709 = 0x400; + + // Use accurate quantization functions when quantizing fake BT.709 (much slower, marginal improvement on specific blocks) + const uint32_t ETC_FakeBT709Accurate = 0x800; + // Misc useful default flag combinations - const uint32_t Fastest = (BC6H_FastIndexing | S3TC_Paranoid); - const uint32_t Faster = (BC7_EnableDualPlane | BC6H_FastIndexing | S3TC_Paranoid); - const uint32_t Fast = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_FastIndexing | S3TC_Paranoid); - const uint32_t Default = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_Enable3Subsets | BC7_FastIndexing | S3TC_Paranoid); - const uint32_t Better = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_Enable3Subsets | S3TC_Paranoid | S3TC_Exhaustive); - const uint32_t Ultra = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_Enable3Subsets | BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive); + const uint32_t Fastest = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid); + const uint32_t Faster = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid); + const uint32_t Fast = (BC7_FastIndexing | S3TC_Paranoid); + const uint32_t Default = (BC7_FastIndexing | S3TC_Paranoid); + const uint32_t Better = (S3TC_Paranoid | S3TC_Exhaustive); + const uint32_t Ultra = (BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive | ETC_FakeBT709Accurate); } const unsigned int NumParallelBlocks = 8; @@ -81,7 +79,7 @@ namespace cvtt float blueWeight; // Blue channel importance float alphaWeight; // Alpha channel importance - int refineRoundsBC7; // Number of refine rounds for BC7 + int refineRoundsBC7; // Number of refine rounds for BC7 int refineRoundsBC6H; // Number of refine rounds for BC6H (max 3) int refineRoundsIIC; // Number of refine rounds for independent interpolated channels (BC3 alpha, BC4, BC5) int refineRoundsS3TC; // Number of refine rounds for S3TC RGB @@ -104,6 +102,102 @@ namespace cvtt } }; + struct BC7FineTuningParams + { + // Seed point counts for each mode+configuration combination + uint8_t mode0SP[16]; + uint8_t mode1SP[64]; + uint8_t mode2SP[64]; + uint8_t mode3SP[64]; + uint8_t mode4SP[4][2]; + uint8_t mode5SP[4]; + uint8_t mode6SP; + uint8_t mode7SP[64]; + + BC7FineTuningParams() + { + for (int i = 0; i < 16; i++) + this->mode0SP[i] = 4; + + for (int i = 0; i < 64; i++) + { + this->mode1SP[i] = 4; + this->mode2SP[i] = 4; + this->mode3SP[i] = 4; + this->mode7SP[i] = 4; + } + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 2; j++) + this->mode4SP[i][j] = 4; + + this->mode5SP[i] = 4; + } + + this->mode6SP = 4; + } + }; + + struct BC7EncodingPlan + { + static const int kNumRGBAShapes = 129; + static const int kNumRGBShapes = 243; + + uint64_t mode1PartitionEnabled; + uint64_t mode2PartitionEnabled; + uint64_t mode3PartitionEnabled; + uint16_t mode0PartitionEnabled; + uint64_t mode7RGBAPartitionEnabled; + uint64_t mode7RGBPartitionEnabled; + uint8_t mode4SP[4][2]; + uint8_t mode5SP[4]; + bool mode6Enabled; + + uint8_t seedPointsForShapeRGB[kNumRGBShapes]; + uint8_t seedPointsForShapeRGBA[kNumRGBAShapes]; + + uint8_t rgbaShapeList[kNumRGBAShapes]; + uint8_t rgbaNumShapesToEvaluate; + + uint8_t rgbShapeList[kNumRGBShapes]; + uint8_t rgbNumShapesToEvaluate; + + BC7EncodingPlan() + { + for (int i = 0; i < kNumRGBShapes; i++) + { + this->rgbShapeList[i] = i; + this->seedPointsForShapeRGB[i] = 4; + } + this->rgbNumShapesToEvaluate = kNumRGBShapes; + + for (int i = 0; i < kNumRGBAShapes; i++) + { + this->rgbaShapeList[i] = i; + this->seedPointsForShapeRGBA[i] = 4; + } + this->rgbaNumShapesToEvaluate = kNumRGBAShapes; + + + this->mode0PartitionEnabled = 0xffff; + this->mode1PartitionEnabled = 0xffffffffffffffffULL; + this->mode2PartitionEnabled = 0xffffffffffffffffULL; + this->mode3PartitionEnabled = 0xffffffffffffffffULL; + this->mode6Enabled = true; + this->mode7RGBPartitionEnabled = 0xffffffffffffffffULL; + this->mode7RGBAPartitionEnabled = 0xffffffffffffffffULL; + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 2; j++) + this->mode4SP[i][j] = 4; + + this->mode5SP[i] = 4; + } + } + }; + // RGBA input block for unsigned 8-bit formats struct PixelBlockU8 { @@ -116,14 +210,34 @@ namespace cvtt int8_t m_pixels[16][4]; }; + struct PixelBlockScalarS16 + { + int16_t m_pixels[16]; + }; + // RGBA input block for half-precision float formats (bit-cast to int16_t) struct PixelBlockF16 { int16_t m_pixels[16][4]; }; + class ETC2CompressionData + { + protected: + ETC2CompressionData() {} + }; + + class ETC1CompressionData + { + protected: + ETC1CompressionData() {} + }; + namespace Kernels { + typedef void* allocFunc_t(void *context, size_t size); + typedef void freeFunc_t(void *context, void* ptr, size_t size); + // NOTE: All functions accept and output NumParallelBlocks blocks at once void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); @@ -134,7 +248,28 @@ namespace cvtt void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options); void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options); void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options); - void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); + void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, const BC7EncodingPlan &encodingPlan); + void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC1CompressionData *compressionData); + void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC2CompressionData *compressionData); + void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData); + void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData); + + void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options); + void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options); + + // Generates a BC7 encoding plan from a quality parameter that ranges from 1 (fastest) to 100 (best) + void ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality); + + // Generates a BC7 encoding plan from fine-tuning parameters. + bool ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams ¶ms); + + // ETC compression requires temporary storage that normally consumes a large amount of stack space. + // To allocate and release it, use one of these functions. + ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options); + void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc); + + ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context); + void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc); void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC); void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC); diff --git a/thirdparty/cvtt/ConvectionKernels_API.cpp b/thirdparty/cvtt/ConvectionKernels_API.cpp new file mode 100644 index 0000000000..707e71d474 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_API.cpp @@ -0,0 +1,346 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ +#include "ConvectionKernels_Config.h" + +#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) + +#include <stdint.h> +#include "ConvectionKernels.h" +#include "ConvectionKernels_Util.h" +#include "ConvectionKernels_BC67.h" +#include "ConvectionKernels_ETC.h" +#include "ConvectionKernels_S3TC.h" + +#include <assert.h> + +namespace cvtt +{ + namespace Kernels + { + void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, const BC7EncodingPlan &encodingPlan) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::BC7Computer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, encodingPlan, options.refineRoundsBC7); + pBC += ParallelMath::ParallelSize * 16; + } + } + + void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, false, options.seedPoints, options.refineRoundsBC6H); + pBC += ParallelMath::ParallelSize * 16; + } + } + + void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, true, options.seedPoints, options.refineRoundsBC6H); + pBC += ParallelMath::ParallelSize * 16; + } + } + + void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC, 8, channelWeights, true, options.threshold, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); + pBC += ParallelMath::ParallelSize * 8; + } + } + + void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); + Internal::S3TCComputer::PackExplicitAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16); + pBC += ParallelMath::ParallelSize * 16; + } + } + + void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC); + Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16, false, options.seedPoints, options.refineRoundsIIC); + pBC += ParallelMath::ParallelSize * 16; + } + } + + void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 8, false, options.seedPoints, options.refineRoundsIIC); + pBC += ParallelMath::ParallelSize * 8; + } + } + + void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + PixelBlockU8 inputBlocks[ParallelMath::ParallelSize]; + Util::BiasSignedInput(inputBlocks, pBlocks + blockBase); + + Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 8, true, options.seedPoints, options.refineRoundsIIC); + pBC += ParallelMath::ParallelSize * 8; + } + } + + void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 16, false, options.seedPoints, options.refineRoundsIIC); + Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 1, pBC + 8, 16, false, options.seedPoints, options.refineRoundsIIC); + pBC += ParallelMath::ParallelSize * 16; + } + } + + void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + PixelBlockU8 inputBlocks[ParallelMath::ParallelSize]; + Util::BiasSignedInput(inputBlocks, pBlocks + blockBase); + + Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 16, true, options.seedPoints, options.refineRoundsIIC); + Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 1, pBC + 8, 16, true, options.seedPoints, options.refineRoundsIIC); + pBC += ParallelMath::ParallelSize * 16; + } + } + + void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC1CompressionData *compressionData) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::ETCComputer::CompressETC1Block(pBC, pBlocks + blockBase, compressionData, options); + pBC += ParallelMath::ParallelSize * 8; + } + } + + void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, false); + pBC += ParallelMath::ParallelSize * 8; + } + } + + void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData) + { + assert(pBlocks); + assert(pBC); + + float channelWeights[4]; + Util::FillWeights(options, channelWeights); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, true); + pBC += ParallelMath::ParallelSize * 8; + } + } + + void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options) + { + assert(pBlocks); + assert(pBC); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::ETCComputer::CompressETC2AlphaBlock(pBC, pBlocks + blockBase, options); + pBC += ParallelMath::ParallelSize * 8; + } + } + + void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options) + { + assert(pBlocks); + assert(pBC); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize) + { + Internal::ETCComputer::CompressEACBlock(pBC, pBlocks + blockBase, isSigned, options); + pBC += ParallelMath::ParallelSize * 8; + } + } + + void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData) + { + uint8_t alphaBlockData[cvtt::NumParallelBlocks * 8]; + uint8_t colorBlockData[cvtt::NumParallelBlocks * 8]; + + EncodeETC2(colorBlockData, pBlocks, options, compressionData); + EncodeETC2Alpha(alphaBlockData, pBlocks, options); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) + { + for (size_t blockData = 0; blockData < 8; blockData++) + pBC[blockBase * 16 + blockData] = alphaBlockData[blockBase * 8 + blockData]; + + for (size_t blockData = 0; blockData < 8; blockData++) + pBC[blockBase * 16 + 8 + blockData] = colorBlockData[blockBase * 8 + blockData]; + } + } + + void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC) + { + assert(pBlocks); + assert(pBC); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) + { + Internal::BC7Computer::UnpackOne(pBlocks[blockBase], pBC); + pBC += 16; + } + } + + void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC) + { + assert(pBlocks); + assert(pBC); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) + { + Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, false); + pBC += 16; + } + } + + void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC) + { + assert(pBlocks); + assert(pBC); + + for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++) + { + Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, true); + pBC += 16; + } + } + + ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context) + { + return cvtt::Internal::ETCComputer::AllocETC1Data(allocFunc, context); + } + + void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc) + { + cvtt::Internal::ETCComputer::ReleaseETC1Data(compressionData, freeFunc); + } + + ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options) + { + return cvtt::Internal::ETCComputer::AllocETC2Data(allocFunc, context, options); + } + + void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc) + { + cvtt::Internal::ETCComputer::ReleaseETC2Data(compressionData, freeFunc); + } + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_AggregatedError.h b/thirdparty/cvtt/ConvectionKernels_AggregatedError.h new file mode 100644 index 0000000000..9f9356a345 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_AggregatedError.h @@ -0,0 +1,55 @@ +#pragma once +#ifndef __CVTT_AGGREGATEDERROR_H__ +#define __CVTT_AGGREGATEDERROR_H__ + +#include "ConvectionKernels_ParallelMath.h" + +namespace cvtt +{ + namespace Internal + { + template<int TVectorSize> + class AggregatedError + { + public: + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::UInt31 MUInt31; + typedef ParallelMath::Float MFloat; + + AggregatedError() + { + for (int ch = 0; ch < TVectorSize; ch++) + m_errorUnweighted[ch] = ParallelMath::MakeUInt31(0); + } + + void Add(const MUInt16 &channelErrorUnweighted, int ch) + { + m_errorUnweighted[ch] = m_errorUnweighted[ch] + ParallelMath::ToUInt31(channelErrorUnweighted); + } + + MFloat Finalize(uint32_t flags, const float channelWeightsSq[TVectorSize]) const + { + if (flags & cvtt::Flags::Uniform) + { + MUInt31 total = m_errorUnweighted[0]; + for (int ch = 1; ch < TVectorSize; ch++) + total = total + m_errorUnweighted[ch]; + return ParallelMath::ToFloat(total); + } + else + { + MFloat total = ParallelMath::ToFloat(m_errorUnweighted[0]) * channelWeightsSq[0]; + for (int ch = 1; ch < TVectorSize; ch++) + total = total + ParallelMath::ToFloat(m_errorUnweighted[ch]) * channelWeightsSq[ch]; + return total; + } + } + + private: + MUInt31 m_errorUnweighted[TVectorSize]; + }; + } +} + +#endif + diff --git a/thirdparty/cvtt/ConvectionKernels_BC67.cpp b/thirdparty/cvtt/ConvectionKernels_BC67.cpp new file mode 100644 index 0000000000..791859b232 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_BC67.cpp @@ -0,0 +1,3485 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------------------- + +Portions based on DirectX Texture Library (DirectXTex) + +Copyright (c) Microsoft Corporation. All rights reserved. +Licensed under the MIT License. + +http://go.microsoft.com/fwlink/?LinkId=248926 +*/ +#include "ConvectionKernels_Config.h" + +#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) + +#include "ConvectionKernels_BC67.h" + +#include "ConvectionKernels_AggregatedError.h" +#include "ConvectionKernels_BCCommon.h" +#include "ConvectionKernels_BC7_Prio.h" +#include "ConvectionKernels_BC7_SingleColor.h" +#include "ConvectionKernels_BC6H_IO.h" +#include "ConvectionKernels_EndpointRefiner.h" +#include "ConvectionKernels_EndpointSelector.h" +#include "ConvectionKernels_IndexSelectorHDR.h" +#include "ConvectionKernels_ParallelMath.h" +#include "ConvectionKernels_UnfinishedEndpoints.h" + +namespace cvtt +{ + namespace Internal + { + namespace BC67 + { + typedef ParallelMath::Float MFloat; + typedef ParallelMath::UInt15 MUInt15; + + struct WorkInfo + { + MUInt15 m_mode; + MFloat m_error; + MUInt15 m_ep[3][2][4]; + MUInt15 m_indexes[16]; + MUInt15 m_indexes2[16]; + + union + { + MUInt15 m_partition; + struct IndexSelectorAndRotation + { + MUInt15 m_indexSelector; + MUInt15 m_rotation; + } m_isr; + } m_u; + }; + } + + namespace BC7Data + { + enum AlphaMode + { + AlphaMode_Combined, + AlphaMode_Separate, + AlphaMode_None, + }; + + enum PBitMode + { + PBitMode_PerEndpoint, + PBitMode_PerSubset, + PBitMode_None + }; + + struct BC7ModeInfo + { + PBitMode m_pBitMode; + AlphaMode m_alphaMode; + int m_rgbBits; + int m_alphaBits; + int m_partitionBits; + int m_numSubsets; + int m_indexBits; + int m_alphaIndexBits; + bool m_hasIndexSelector; + }; + + BC7ModeInfo g_modes[] = + { + { PBitMode_PerEndpoint, AlphaMode_None, 4, 0, 4, 3, 3, 0, false }, // 0 + { PBitMode_PerSubset, AlphaMode_None, 6, 0, 6, 2, 3, 0, false }, // 1 + { PBitMode_None, AlphaMode_None, 5, 0, 6, 3, 2, 0, false }, // 2 + { PBitMode_PerEndpoint, AlphaMode_None, 7, 0, 6, 2, 2, 0, false }, // 3 (Mode reference has an error, P-bit is really per-endpoint) + + { PBitMode_None, AlphaMode_Separate, 5, 6, 0, 1, 2, 3, true }, // 4 + { PBitMode_None, AlphaMode_Separate, 7, 8, 0, 1, 2, 2, false }, // 5 + { PBitMode_PerEndpoint, AlphaMode_Combined, 7, 7, 0, 1, 4, 0, false }, // 6 + { PBitMode_PerEndpoint, AlphaMode_Combined, 5, 5, 6, 2, 2, 0, false } // 7 + }; + + const int g_weight2[] = { 0, 21, 43, 64 }; + const int g_weight3[] = { 0, 9, 18, 27, 37, 46, 55, 64 }; + const int g_weight4[] = { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 }; + + const int *g_weightTables[] = + { + NULL, + NULL, + g_weight2, + g_weight3, + g_weight4 + }; + + struct BC6HModeInfo + { + uint16_t m_modeID; + bool m_partitioned; + bool m_transformed; + int m_aPrec; + int m_bPrec[3]; + }; + + // [partitioned][precision] + bool g_hdrModesExistForPrecision[2][17] = + { + //0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 + { false, false, false, false, false, false, false, false, false, false, true, true, true, false, false, false, true }, + { false, false, false, false, false, false, true, true, true, true, true, true, false, false, false, false, false }, + }; + + BC6HModeInfo g_hdrModes[] = + { + { 0x00, true, true, 10,{ 5, 5, 5 } }, + { 0x01, true, true, 7,{ 6, 6, 6 } }, + { 0x02, true, true, 11,{ 5, 4, 4 } }, + { 0x06, true, true, 11,{ 4, 5, 4 } }, + { 0x0a, true, true, 11,{ 4, 4, 5 } }, + { 0x0e, true, true, 9,{ 5, 5, 5 } }, + { 0x12, true, true, 8,{ 6, 5, 5 } }, + { 0x16, true, true, 8,{ 5, 6, 5 } }, + { 0x1a, true, true, 8,{ 5, 5, 6 } }, + { 0x1e, true, false, 6,{ 6, 6, 6 } }, + { 0x03, false, false, 10,{ 10, 10, 10 } }, + { 0x07, false, true, 11,{ 9, 9, 9 } }, + { 0x0b, false, true, 12,{ 8, 8, 8 } }, + { 0x0f, false, true, 16,{ 4, 4, 4 } }, + }; + + const int g_maxHDRPrecision = 16; + + static const size_t g_numHDRModes = sizeof(g_hdrModes) / sizeof(g_hdrModes[0]); + + static uint16_t g_partitionMap[64] = + { + 0xCCCC, 0x8888, 0xEEEE, 0xECC8, + 0xC880, 0xFEEC, 0xFEC8, 0xEC80, + 0xC800, 0xFFEC, 0xFE80, 0xE800, + 0xFFE8, 0xFF00, 0xFFF0, 0xF000, + 0xF710, 0x008E, 0x7100, 0x08CE, + 0x008C, 0x7310, 0x3100, 0x8CCE, + 0x088C, 0x3110, 0x6666, 0x366C, + 0x17E8, 0x0FF0, 0x718E, 0x399C, + 0xaaaa, 0xf0f0, 0x5a5a, 0x33cc, + 0x3c3c, 0x55aa, 0x9696, 0xa55a, + 0x73ce, 0x13c8, 0x324c, 0x3bdc, + 0x6996, 0xc33c, 0x9966, 0x660, + 0x272, 0x4e4, 0x4e40, 0x2720, + 0xc936, 0x936c, 0x39c6, 0x639c, + 0x9336, 0x9cc6, 0x817e, 0xe718, + 0xccf0, 0xfcc, 0x7744, 0xee22, + }; + + static uint32_t g_partitionMap2[64] = + { + 0xaa685050, 0x6a5a5040, 0x5a5a4200, 0x5450a0a8, + 0xa5a50000, 0xa0a05050, 0x5555a0a0, 0x5a5a5050, + 0xaa550000, 0xaa555500, 0xaaaa5500, 0x90909090, + 0x94949494, 0xa4a4a4a4, 0xa9a59450, 0x2a0a4250, + 0xa5945040, 0x0a425054, 0xa5a5a500, 0x55a0a0a0, + 0xa8a85454, 0x6a6a4040, 0xa4a45000, 0x1a1a0500, + 0x0050a4a4, 0xaaa59090, 0x14696914, 0x69691400, + 0xa08585a0, 0xaa821414, 0x50a4a450, 0x6a5a0200, + 0xa9a58000, 0x5090a0a8, 0xa8a09050, 0x24242424, + 0x00aa5500, 0x24924924, 0x24499224, 0x50a50a50, + 0x500aa550, 0xaaaa4444, 0x66660000, 0xa5a0a5a0, + 0x50a050a0, 0x69286928, 0x44aaaa44, 0x66666600, + 0xaa444444, 0x54a854a8, 0x95809580, 0x96969600, + 0xa85454a8, 0x80959580, 0xaa141414, 0x96960000, + 0xaaaa1414, 0xa05050a0, 0xa0a5a5a0, 0x96000000, + 0x40804080, 0xa9a8a9a8, 0xaaaaaa44, 0x2a4a5254, + }; + + static int g_fixupIndexes2[64] = + { + 15,15,15,15, + 15,15,15,15, + 15,15,15,15, + 15,15,15,15, + 15, 2, 8, 2, + 2, 8, 8,15, + 2, 8, 2, 2, + 8, 8, 2, 2, + + 15,15, 6, 8, + 2, 8,15,15, + 2, 8, 2, 2, + 2,15,15, 6, + 6, 2, 6, 8, + 15,15, 2, 2, + 15,15,15,15, + 15, 2, 2,15, + }; + + static int g_fixupIndexes3[64][2] = + { + { 3,15 },{ 3, 8 },{ 15, 8 },{ 15, 3 }, + { 8,15 },{ 3,15 },{ 15, 3 },{ 15, 8 }, + { 8,15 },{ 8,15 },{ 6,15 },{ 6,15 }, + { 6,15 },{ 5,15 },{ 3,15 },{ 3, 8 }, + { 3,15 },{ 3, 8 },{ 8,15 },{ 15, 3 }, + { 3,15 },{ 3, 8 },{ 6,15 },{ 10, 8 }, + { 5, 3 },{ 8,15 },{ 8, 6 },{ 6,10 }, + { 8,15 },{ 5,15 },{ 15,10 },{ 15, 8 }, + + { 8,15 },{ 15, 3 },{ 3,15 },{ 5,10 }, + { 6,10 },{ 10, 8 },{ 8, 9 },{ 15,10 }, + { 15, 6 },{ 3,15 },{ 15, 8 },{ 5,15 }, + { 15, 3 },{ 15, 6 },{ 15, 6 },{ 15, 8 }, + { 3,15 },{ 15, 3 },{ 5,15 },{ 5,15 }, + { 5,15 },{ 8,15 },{ 5,15 },{ 10,15 }, + { 5,15 },{ 10,15 },{ 8,15 },{ 13,15 }, + { 15, 3 },{ 12,15 },{ 3,15 },{ 3, 8 }, + }; + + static const unsigned char g_fragments[] = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0, 16 + 0, 1, 2, 3, // 16, 4 + 0, 1, 4, // 20, 3 + 0, 1, 2, 4, // 23, 4 + 2, 3, 7, // 27, 3 + 1, 2, 3, 7, // 30, 4 + 0, 1, 2, 3, 4, 5, 6, 7, // 34, 8 + 0, 1, 4, 8, // 42, 4 + 0, 1, 2, 4, 5, 8, // 46, 6 + 0, 1, 2, 3, 4, 5, 6, 8, // 52, 8 + 1, 4, 5, 6, 9, // 60, 5 + 2, 5, 6, 7, 10, // 65, 5 + 5, 6, 9, 10, // 70, 4 + 2, 3, 7, 11, // 74, 4 + 1, 2, 3, 6, 7, 11, // 78, 6 + 0, 1, 2, 3, 5, 6, 7, 11, // 84, 8 + 0, 1, 2, 3, 8, 9, 10, 11, // 92, 8 + 2, 3, 6, 7, 8, 9, 10, 11, // 100, 8 + 4, 5, 6, 7, 8, 9, 10, 11, // 108, 8 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, // 116, 12 + 0, 4, 8, 12, // 128, 4 + 0, 2, 3, 4, 6, 7, 8, 12, // 132, 8 + 0, 1, 2, 4, 5, 8, 9, 12, // 140, 8 + 0, 1, 2, 3, 4, 5, 6, 8, 9, 12, // 148, 10 + 3, 6, 7, 8, 9, 12, // 158, 6 + 3, 5, 6, 7, 8, 9, 10, 12, // 164, 8 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, // 172, 12 + 0, 1, 2, 5, 6, 7, 11, 12, // 184, 8 + 5, 8, 9, 10, 13, // 192, 5 + 8, 12, 13, // 197, 3 + 4, 8, 12, 13, // 200, 4 + 2, 3, 6, 9, 12, 13, // 204, 6 + 0, 1, 2, 3, 8, 9, 12, 13, // 210, 8 + 0, 1, 4, 5, 8, 9, 12, 13, // 218, 8 + 2, 3, 6, 7, 8, 9, 12, 13, // 226, 8 + 2, 3, 5, 6, 9, 10, 12, 13, // 234, 8 + 0, 3, 6, 7, 9, 10, 12, 13, // 242, 8 + 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13, // 250, 12 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, // 262, 13 + 2, 3, 4, 7, 8, 11, 12, 13, // 275, 8 + 1, 2, 6, 7, 8, 11, 12, 13, // 283, 8 + 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, // 291, 10 + 2, 3, 4, 5, 10, 11, 12, 13, // 301, 8 + 0, 1, 6, 7, 10, 11, 12, 13, // 309, 8 + 6, 9, 10, 11, 14, // 317, 5 + 0, 2, 4, 6, 8, 10, 12, 14, // 322, 8 + 1, 3, 5, 7, 8, 10, 12, 14, // 330, 8 + 1, 3, 4, 6, 9, 11, 12, 14, // 338, 8 + 0, 2, 5, 7, 9, 11, 12, 14, // 346, 8 + 0, 3, 4, 5, 8, 9, 13, 14, // 354, 8 + 2, 3, 4, 7, 8, 9, 13, 14, // 362, 8 + 1, 2, 5, 6, 9, 10, 13, 14, // 370, 8 + 0, 3, 4, 7, 9, 10, 13, 14, // 378, 8 + 0, 3, 5, 6, 8, 11, 13, 14, // 386, 8 + 1, 2, 4, 7, 8, 11, 13, 14, // 394, 8 + 0, 1, 4, 7, 10, 11, 13, 14, // 402, 8 + 0, 3, 6, 7, 10, 11, 13, 14, // 410, 8 + 8, 12, 13, 14, // 418, 4 + 1, 2, 3, 7, 8, 12, 13, 14, // 422, 8 + 4, 8, 9, 12, 13, 14, // 430, 6 + 0, 4, 5, 8, 9, 12, 13, 14, // 436, 8 + 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, // 444, 10 + 2, 6, 8, 9, 10, 12, 13, 14, // 454, 8 + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, // 462, 12 + 0, 7, 9, 10, 11, 12, 13, 14, // 474, 8 + 1, 2, 3, 4, 5, 6, 8, 15, // 482, 8 + 3, 7, 11, 15, // 490, 4 + 0, 1, 3, 4, 5, 7, 11, 15, // 494, 8 + 0, 4, 5, 10, 11, 15, // 502, 6 + 1, 2, 3, 6, 7, 10, 11, 15, // 508, 8 + 0, 1, 2, 3, 5, 6, 7, 10, 11, 15, // 516, 10 + 0, 4, 5, 6, 9, 10, 11, 15, // 526, 8 + 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 15, // 534, 12 + 1, 2, 4, 5, 8, 9, 12, 15, // 546, 8 + 2, 3, 5, 6, 8, 9, 12, 15, // 554, 8 + 0, 3, 5, 6, 9, 10, 12, 15, // 562, 8 + 1, 2, 4, 7, 9, 10, 12, 15, // 570, 8 + 1, 2, 5, 6, 8, 11, 12, 15, // 578, 8 + 0, 3, 4, 7, 8, 11, 12, 15, // 586, 8 + 0, 1, 5, 6, 10, 11, 12, 15, // 594, 8 + 1, 2, 6, 7, 10, 11, 12, 15, // 602, 8 + 1, 3, 4, 6, 8, 10, 13, 15, // 610, 8 + 0, 2, 5, 7, 8, 10, 13, 15, // 618, 8 + 0, 2, 4, 6, 9, 11, 13, 15, // 626, 8 + 1, 3, 5, 7, 9, 11, 13, 15, // 634, 8 + 0, 1, 2, 3, 4, 5, 7, 8, 12, 13, 15, // 642, 11 + 2, 3, 4, 5, 8, 9, 14, 15, // 653, 8 + 0, 1, 6, 7, 8, 9, 14, 15, // 661, 8 + 0, 1, 5, 10, 14, 15, // 669, 6 + 0, 3, 4, 5, 9, 10, 14, 15, // 675, 8 + 0, 1, 5, 6, 9, 10, 14, 15, // 683, 8 + 11, 14, 15, // 691, 3 + 7, 11, 14, 15, // 694, 4 + 1, 2, 4, 5, 8, 11, 14, 15, // 698, 8 + 0, 1, 4, 7, 8, 11, 14, 15, // 706, 8 + 0, 1, 4, 5, 10, 11, 14, 15, // 714, 8 + 2, 3, 6, 7, 10, 11, 14, 15, // 722, 8 + 4, 5, 6, 7, 10, 11, 14, 15, // 730, 8 + 0, 1, 4, 5, 7, 8, 10, 11, 14, 15, // 738, 10 + 0, 1, 2, 3, 5, 6, 7, 9, 10, 11, 14, 15, // 748, 12 + 0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 15, // 760, 13 + 0, 1, 2, 3, 4, 6, 7, 11, 12, 14, 15, // 773, 11 + 3, 4, 8, 9, 10, 13, 14, 15, // 784, 8 + 11, 13, 14, 15, // 792, 4 + 0, 1, 2, 4, 11, 13, 14, 15, // 796, 8 + 0, 1, 2, 4, 5, 10, 11, 13, 14, 15, // 804, 10 + 7, 10, 11, 13, 14, 15, // 814, 6 + 3, 6, 7, 10, 11, 13, 14, 15, // 820, 8 + 1, 5, 9, 10, 11, 13, 14, 15, // 828, 8 + 1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, // 836, 12 + 12, 13, 14, 15, // 848, 4 + 0, 1, 2, 3, 12, 13, 14, 15, // 852, 8 + 0, 1, 4, 5, 12, 13, 14, 15, // 860, 8 + 4, 5, 6, 7, 12, 13, 14, 15, // 868, 8 + 4, 8, 9, 10, 12, 13, 14, 15, // 876, 8 + 0, 4, 5, 8, 9, 10, 12, 13, 14, 15, // 884, 10 + 0, 1, 4, 5, 6, 8, 9, 10, 12, 13, 14, 15, // 894, 12 + 0, 1, 2, 3, 4, 7, 8, 11, 12, 13, 14, 15, // 906, 12 + 0, 1, 3, 4, 8, 9, 11, 12, 13, 14, 15, // 918, 11 + 0, 2, 3, 7, 8, 10, 11, 12, 13, 14, 15, // 929, 11 + 7, 9, 10, 11, 12, 13, 14, 15, // 940, 8 + 3, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 948, 10 + 2, 3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, // 958, 12 + 8, 9, 10, 11, 12, 13, 14, 15, // 970, 8 + 0, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 978, 12 + 0, 1, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, // 990, 13 + 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1003, 12 + 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1015, 13 + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 1028, 12 + 0, 2, // 1040, 2 + 1, 3, // 1042, 2 + 0, 1, 4, 5, // 1044, 4 + 0, 1, 2, 4, 5, // 1048, 5 + 2, 3, 6, // 1053, 3 + 0, 2, 4, 6, // 1056, 4 + 1, 2, 5, 6, // 1060, 4 + 0, 1, 2, 3, 5, 6, // 1064, 6 + 0, 1, 2, 4, 5, 6, // 1070, 6 + 0, 1, 2, 3, 4, 5, 6, // 1076, 7 + 0, 3, 4, 7, // 1083, 4 + 0, 1, 2, 3, 4, 7, // 1087, 6 + 1, 3, 5, 7, // 1093, 4 + 2, 3, 6, 7, // 1097, 4 + 1, 2, 3, 6, 7, // 1101, 5 + 1, 2, 3, 5, 6, 7, // 1106, 6 + 0, 1, 2, 3, 5, 6, 7, // 1112, 7 + 4, 5, 6, 7, // 1119, 4 + 0, 8, // 1123, 2 + 0, 1, 4, 5, 8, // 1125, 5 + 0, 1, 8, 9, // 1130, 4 + 4, 5, 8, 9, // 1134, 4 + 0, 1, 4, 5, 8, 9, // 1138, 6 + 2, 6, 8, 9, // 1144, 4 + 6, 7, 8, 9, // 1148, 4 + 0, 2, 4, 6, 8, 10, // 1152, 6 + 1, 2, 5, 6, 9, 10, // 1158, 6 + 0, 3, 4, 7, 9, 10, // 1164, 6 + 0, 1, 2, 8, 9, 10, // 1170, 6 + 4, 5, 6, 8, 9, 10, // 1176, 6 + 3, 11, // 1182, 2 + 2, 3, 6, 7, 11, // 1184, 5 + 0, 3, 8, 11, // 1189, 4 + 0, 3, 4, 7, 8, 11, // 1193, 6 + 1, 3, 5, 7, 9, 11, // 1199, 6 + 2, 3, 10, 11, // 1205, 4 + 1, 5, 10, 11, // 1209, 4 + 4, 5, 10, 11, // 1213, 4 + 6, 7, 10, 11, // 1217, 4 + 2, 3, 6, 7, 10, 11, // 1221, 6 + 1, 2, 3, 9, 10, 11, // 1227, 6 + 5, 6, 7, 9, 10, 11, // 1233, 6 + 8, 9, 10, 11, // 1239, 4 + 4, 12, // 1243, 2 + 0, 1, 2, 3, 4, 5, 8, 12, // 1245, 8 + 8, 9, 12, // 1253, 3 + 0, 4, 5, 8, 9, 12, // 1256, 6 + 0, 1, 4, 5, 8, 9, 12, // 1262, 7 + 2, 3, 5, 6, 8, 9, 12, // 1269, 7 + 1, 5, 9, 13, // 1276, 4 + 6, 7, 9, 13, // 1280, 4 + 1, 4, 7, 10, 13, // 1284, 5 + 1, 6, 8, 11, 13, // 1289, 5 + 0, 1, 12, 13, // 1294, 4 + 4, 5, 12, 13, // 1298, 4 + 0, 1, 6, 7, 12, 13, // 1302, 6 + 0, 1, 4, 8, 12, 13, // 1308, 6 + 8, 9, 12, 13, // 1314, 4 + 4, 8, 9, 12, 13, // 1318, 5 + 4, 5, 8, 9, 12, 13, // 1323, 6 + 0, 4, 5, 8, 9, 12, 13, // 1329, 7 + 0, 1, 6, 10, 12, 13, // 1336, 6 + 3, 6, 7, 9, 10, 12, 13, // 1342, 7 + 0, 1, 10, 11, 12, 13, // 1349, 6 + 2, 4, 7, 9, 14, // 1355, 5 + 4, 5, 10, 14, // 1360, 4 + 2, 6, 10, 14, // 1364, 4 + 2, 5, 8, 11, 14, // 1368, 5 + 0, 2, 12, 14, // 1373, 4 + 8, 10, 12, 14, // 1377, 4 + 4, 6, 8, 10, 12, 14, // 1381, 6 + 13, 14, // 1387, 2 + 9, 10, 13, 14, // 1389, 4 + 5, 6, 9, 10, 13, 14, // 1393, 6 + 0, 1, 2, 12, 13, 14, // 1399, 6 + 4, 5, 6, 12, 13, 14, // 1405, 6 + 8, 9, 12, 13, 14, // 1411, 5 + 8, 9, 10, 12, 13, 14, // 1416, 6 + 7, 15, // 1422, 2 + 0, 5, 10, 15, // 1424, 4 + 0, 1, 2, 3, 6, 7, 11, 15, // 1428, 8 + 10, 11, 15, // 1436, 3 + 0, 1, 5, 6, 10, 11, 15, // 1439, 7 + 3, 6, 7, 10, 11, 15, // 1446, 6 + 12, 15, // 1452, 2 + 0, 3, 12, 15, // 1454, 4 + 4, 7, 12, 15, // 1458, 4 + 0, 3, 6, 9, 12, 15, // 1462, 6 + 0, 3, 5, 10, 12, 15, // 1468, 6 + 8, 11, 12, 15, // 1474, 4 + 5, 6, 8, 11, 12, 15, // 1478, 6 + 4, 7, 8, 11, 12, 15, // 1484, 6 + 1, 3, 13, 15, // 1490, 4 + 9, 11, 13, 15, // 1494, 4 + 5, 7, 9, 11, 13, 15, // 1498, 6 + 2, 3, 14, 15, // 1504, 4 + 2, 3, 4, 5, 14, 15, // 1508, 6 + 6, 7, 14, 15, // 1514, 4 + 2, 3, 5, 9, 14, 15, // 1518, 6 + 2, 3, 8, 9, 14, 15, // 1524, 6 + 10, 14, 15, // 1530, 3 + 0, 4, 5, 9, 10, 14, 15, // 1533, 7 + 2, 3, 7, 11, 14, 15, // 1540, 6 + 10, 11, 14, 15, // 1546, 4 + 7, 10, 11, 14, 15, // 1550, 5 + 6, 7, 10, 11, 14, 15, // 1555, 6 + 1, 2, 3, 13, 14, 15, // 1561, 6 + 5, 6, 7, 13, 14, 15, // 1567, 6 + 10, 11, 13, 14, 15, // 1573, 5 + 9, 10, 11, 13, 14, 15, // 1578, 6 + 0, 4, 8, 9, 12, 13, 14, 15, // 1584, 8 + 9, 10, 12, 13, 14, 15, // 1592, 6 + 8, 11, 12, 13, 14, 15, // 1598, 6 + 3, 7, 10, 11, 12, 13, 14, 15, // 1604, 8 + }; + static const int g_shapeRanges[][2] = + { + { 0, 16 },{ 16, 4 },{ 20, 3 },{ 23, 4 },{ 27, 3 },{ 30, 4 },{ 34, 8 },{ 42, 4 },{ 46, 6 },{ 52, 8 },{ 60, 5 }, + { 65, 5 },{ 70, 4 },{ 74, 4 },{ 78, 6 },{ 84, 8 },{ 92, 8 },{ 100, 8 },{ 108, 8 },{ 116, 12 },{ 128, 4 },{ 132, 8 }, + { 140, 8 },{ 148, 10 },{ 158, 6 },{ 164, 8 },{ 172, 12 },{ 184, 8 },{ 192, 5 },{ 197, 3 },{ 200, 4 },{ 204, 6 },{ 210, 8 }, + { 218, 8 },{ 226, 8 },{ 234, 8 },{ 242, 8 },{ 250, 12 },{ 262, 13 },{ 275, 8 },{ 283, 8 },{ 291, 10 },{ 301, 8 },{ 309, 8 }, + { 317, 5 },{ 322, 8 },{ 330, 8 },{ 338, 8 },{ 346, 8 },{ 354, 8 },{ 362, 8 },{ 370, 8 },{ 378, 8 },{ 386, 8 },{ 394, 8 }, + { 402, 8 },{ 410, 8 },{ 418, 4 },{ 422, 8 },{ 430, 6 },{ 436, 8 },{ 444, 10 },{ 454, 8 },{ 462, 12 },{ 474, 8 },{ 482, 8 }, + { 490, 4 },{ 494, 8 },{ 502, 6 },{ 508, 8 },{ 516, 10 },{ 526, 8 },{ 534, 12 },{ 546, 8 },{ 554, 8 },{ 562, 8 },{ 570, 8 }, + { 578, 8 },{ 586, 8 },{ 594, 8 },{ 602, 8 },{ 610, 8 },{ 618, 8 },{ 626, 8 },{ 634, 8 },{ 642, 11 },{ 653, 8 },{ 661, 8 }, + { 669, 6 },{ 675, 8 },{ 683, 8 },{ 691, 3 },{ 694, 4 },{ 698, 8 },{ 706, 8 },{ 714, 8 },{ 722, 8 },{ 730, 8 },{ 738, 10 }, + { 748, 12 },{ 760, 13 },{ 773, 11 },{ 784, 8 },{ 792, 4 },{ 796, 8 },{ 804, 10 },{ 814, 6 },{ 820, 8 },{ 828, 8 },{ 836, 12 }, + { 848, 4 },{ 852, 8 },{ 860, 8 },{ 868, 8 },{ 876, 8 },{ 884, 10 },{ 894, 12 },{ 906, 12 },{ 918, 11 },{ 929, 11 },{ 940, 8 }, + { 948, 10 },{ 958, 12 },{ 970, 8 },{ 978, 12 },{ 990, 13 },{ 1003, 12 },{ 1015, 13 },{ 1028, 12 },{ 1040, 2 },{ 1042, 2 },{ 1044, 4 }, + { 1048, 5 },{ 1053, 3 },{ 1056, 4 },{ 1060, 4 },{ 1064, 6 },{ 1070, 6 },{ 1076, 7 },{ 1083, 4 },{ 1087, 6 },{ 1093, 4 },{ 1097, 4 }, + { 1101, 5 },{ 1106, 6 },{ 1112, 7 },{ 1119, 4 },{ 1123, 2 },{ 1125, 5 },{ 1130, 4 },{ 1134, 4 },{ 1138, 6 },{ 1144, 4 },{ 1148, 4 }, + { 1152, 6 },{ 1158, 6 },{ 1164, 6 },{ 1170, 6 },{ 1176, 6 },{ 1182, 2 },{ 1184, 5 },{ 1189, 4 },{ 1193, 6 },{ 1199, 6 },{ 1205, 4 }, + { 1209, 4 },{ 1213, 4 },{ 1217, 4 },{ 1221, 6 },{ 1227, 6 },{ 1233, 6 },{ 1239, 4 },{ 1243, 2 },{ 1245, 8 },{ 1253, 3 },{ 1256, 6 }, + { 1262, 7 },{ 1269, 7 },{ 1276, 4 },{ 1280, 4 },{ 1284, 5 },{ 1289, 5 },{ 1294, 4 },{ 1298, 4 },{ 1302, 6 },{ 1308, 6 },{ 1314, 4 }, + { 1318, 5 },{ 1323, 6 },{ 1329, 7 },{ 1336, 6 },{ 1342, 7 },{ 1349, 6 },{ 1355, 5 },{ 1360, 4 },{ 1364, 4 },{ 1368, 5 },{ 1373, 4 }, + { 1377, 4 },{ 1381, 6 },{ 1387, 2 },{ 1389, 4 },{ 1393, 6 },{ 1399, 6 },{ 1405, 6 },{ 1411, 5 },{ 1416, 6 },{ 1422, 2 },{ 1424, 4 }, + { 1428, 8 },{ 1436, 3 },{ 1439, 7 },{ 1446, 6 },{ 1452, 2 },{ 1454, 4 },{ 1458, 4 },{ 1462, 6 },{ 1468, 6 },{ 1474, 4 },{ 1478, 6 }, + { 1484, 6 },{ 1490, 4 },{ 1494, 4 },{ 1498, 6 },{ 1504, 4 },{ 1508, 6 },{ 1514, 4 },{ 1518, 6 },{ 1524, 6 },{ 1530, 3 },{ 1533, 7 }, + { 1540, 6 },{ 1546, 4 },{ 1550, 5 },{ 1555, 6 },{ 1561, 6 },{ 1567, 6 },{ 1573, 5 },{ 1578, 6 },{ 1584, 8 },{ 1592, 6 },{ 1598, 6 }, + { 1604, 8 }, + }; + static const int g_shapes1[][2] = + { + { 0, 16 } + }; + static const int g_shapes2[64][2] = + { + { 33, 96 },{ 63, 66 },{ 20, 109 },{ 22, 107 },{ 37, 92 },{ 7, 122 },{ 8, 121 },{ 23, 106 }, + { 38, 91 },{ 2, 127 },{ 9, 120 },{ 26, 103 },{ 3, 126 },{ 6, 123 },{ 1, 128 },{ 19, 110 }, + { 15, 114 },{ 124, 5 },{ 72, 57 },{ 115, 14 },{ 125, 4 },{ 70, 59 },{ 100, 29 },{ 60, 69 }, + { 116, 13 },{ 99, 30 },{ 78, 51 },{ 94, 35 },{ 104, 25 },{ 111, 18 },{ 71, 58 },{ 90, 39 }, + { 45, 84 },{ 16, 113 },{ 82, 47 },{ 95, 34 },{ 87, 42 },{ 83, 46 },{ 53, 76 },{ 48, 81 }, + { 68, 61 },{ 105, 24 },{ 98, 31 },{ 88, 41 },{ 75, 54 },{ 43, 86 },{ 52, 77 },{ 117, 12 }, + { 119, 10 },{ 118, 11 },{ 85, 44 },{ 101, 28 },{ 36, 93 },{ 55, 74 },{ 89, 40 },{ 79, 50 }, + { 56, 73 },{ 49, 80 },{ 64, 65 },{ 27, 102 },{ 32, 97 },{ 112, 17 },{ 67, 62 },{ 21, 108 }, + }; + static const int g_shapes3[64][3] = + { + { 148, 160, 240 },{ 132, 212, 205 },{ 136, 233, 187 },{ 175, 237, 143 },{ 6, 186, 232 },{ 33, 142, 232 },{ 131, 123, 142 },{ 131, 96, 186 }, + { 6, 171, 110 },{ 1, 18, 110 },{ 1, 146, 123 },{ 33, 195, 66 },{ 20, 51, 66 },{ 20, 178, 96 },{ 2, 177, 106 },{ 211, 4, 59 }, + { 8, 191, 91 },{ 230, 14, 29 },{ 1, 188, 234 },{ 151, 110, 168 },{ 20, 144, 238 },{ 137, 66, 206 },{ 173, 179, 232 },{ 209, 194, 186 }, + { 239, 165, 142 },{ 131, 152, 242 },{ 214, 54, 12 },{ 140, 219, 201 },{ 190, 150, 231 },{ 156, 135, 241 },{ 185, 227, 167 },{ 145, 210, 59 }, + { 138, 174, 106 },{ 189, 229, 14 },{ 176, 133, 106 },{ 78, 178, 195 },{ 111, 146, 171 },{ 216, 180, 196 },{ 217, 181, 193 },{ 184, 228, 166 }, + { 192, 225, 153 },{ 134, 141, 123 },{ 6, 222, 198 },{ 149, 183, 96 },{ 33, 226, 164 },{ 161, 215, 51 },{ 197, 221, 18 },{ 1, 223, 199 }, + { 154, 163, 110 },{ 20, 236, 169 },{ 157, 204, 66 },{ 1, 202, 220 },{ 20, 170, 235 },{ 203, 158, 66 },{ 162, 155, 110 },{ 6, 201, 218 }, + { 139, 135, 123 },{ 33, 167, 224 },{ 182, 150, 96 },{ 19, 200, 213 },{ 63, 207, 159 },{ 147, 172, 109 },{ 129, 130, 128 },{ 208, 14, 59 }, + }; + + static const int g_shapeList1[] = + { + 0, + }; + + static const int g_shapeList2[] = + { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, + 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, + 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, + 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, + 122, 123, 124, 125, 126, 127, 128, + }; + + static const int g_shapeList12[] = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, + 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, + 121, 122, 123, 124, 125, 126, 127, 128, + }; + + static const int g_shapeList3[] = + { + 1, 2, 4, 6, 8, 12, 14, 18, 19, 20, 29, + 33, 51, 54, 59, 63, 66, 78, 91, 96, 106, 109, + 110, 111, 123, 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, + 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, + 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, + 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, + 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, + 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, + 235, 236, 237, 238, 239, 240, 241, 242, + }; + + static const int g_shapeList3Short[] = + { + 1, 2, 4, 6, 18, 20, 33, 51, 59, 66, 96, + 106, 110, 123, 131, 132, 136, 142, 143, 146, 148, 160, + 171, 175, 177, 178, 186, 187, 195, 205, 211, 212, 232, + 233, 237, 240, + }; + + static const int g_shapeListAll[] = + { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, + 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, + 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, + 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, + 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, + 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, + 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, + 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, + 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, + 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, + 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, + 242, + }; + + static const int g_numShapes1 = sizeof(g_shapeList1) / sizeof(g_shapeList1[0]); + static const int g_numShapes2 = sizeof(g_shapeList2) / sizeof(g_shapeList2[0]); + static const int g_numShapes12 = sizeof(g_shapeList12) / sizeof(g_shapeList12[0]); + static const int g_numShapes3 = sizeof(g_shapeList3) / sizeof(g_shapeList3[0]); + static const int g_numShapes3Short = sizeof(g_shapeList3Short) / sizeof(g_shapeList3Short[0]); + static const int g_numShapesAll = sizeof(g_shapeListAll) / sizeof(g_shapeListAll[0]); + static const int g_numFragments = sizeof(g_fragments) / sizeof(g_fragments[0]); + } + + struct PackingVector + { + uint32_t m_vector[4]; + int m_offset; + + void Init() + { + for (int i = 0; i < 4; i++) + m_vector[i] = 0; + + m_offset = 0; + } + + void InitPacked(const uint32_t *v, int bits) + { + for (int b = 0; b < bits; b += 32) + m_vector[b / 32] = v[b / 32]; + + m_offset = bits; + } + + inline void Pack(ParallelMath::ScalarUInt16 value, int bits) + { + int vOffset = m_offset >> 5; + int bitOffset = m_offset & 0x1f; + + m_vector[vOffset] |= (static_cast<uint32_t>(value) << bitOffset) & static_cast<uint32_t>(0xffffffff); + + int overflowBits = bitOffset + bits - 32; + if (overflowBits > 0) + m_vector[vOffset + 1] |= (static_cast<uint32_t>(value) >> (bits - overflowBits)); + + m_offset += bits; + } + + inline void Flush(uint8_t* output) + { + assert(m_offset == 128); + + for (int v = 0; v < 4; v++) + { + uint32_t chunk = m_vector[v]; + for (int b = 0; b < 4; b++) + output[v * 4 + b] = static_cast<uint8_t>((chunk >> (b * 8)) & 0xff); + } + } + }; + + + struct UnpackingVector + { + uint32_t m_vector[4]; + + void Init(const uint8_t *bytes) + { + for (int i = 0; i < 4; i++) + m_vector[i] = 0; + + for (int b = 0; b < 16; b++) + m_vector[b / 4] |= (bytes[b] << ((b % 4) * 8)); + } + + inline void UnpackStart(uint32_t *v, int bits) + { + for (int b = 0; b < bits; b += 32) + v[b / 32] = m_vector[b / 32]; + + int entriesShifted = bits / 32; + int carry = bits % 32; + + for (int i = entriesShifted; i < 4; i++) + m_vector[i - entriesShifted] = m_vector[i]; + + int entriesRemaining = 4 - entriesShifted; + if (carry) + { + uint32_t bitMask = (1 << carry) - 1; + for (int i = 0; i < 4; i++) + { + m_vector[i] >>= carry; + if (i != 3) + m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - carry); + } + } + } + + inline ParallelMath::ScalarUInt16 Unpack(int bits) + { + uint32_t bitMask = (1 << bits) - 1; + + ParallelMath::ScalarUInt16 result = static_cast<ParallelMath::ScalarUInt16>(m_vector[0] & bitMask); + + for (int i = 0; i < 4; i++) + { + m_vector[i] >>= bits; + if (i != 3) + m_vector[i] |= (m_vector[i + 1] & bitMask) << (32 - bits); + } + + return result; + } + }; + + ParallelMath::Float ScaleHDRValue(const ParallelMath::Float &v, bool isSigned) + { + if (isSigned) + { + ParallelMath::Float offset = ParallelMath::Select(ParallelMath::Less(v, ParallelMath::MakeFloatZero()), ParallelMath::MakeFloat(-30.0f), ParallelMath::MakeFloat(30.0f)); + return (v * 32.0f + offset) / 31.0f; + } + else + return (v * 64.0f + 30.0f) / 31.0f; + } + + ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v) + { +#ifdef CVTT_ENABLE_ASSERTS + for (int i = 0; i < ParallelMath::ParallelSize; i++) + assert(ParallelMath::Extract(v, i) != -32768) +#endif + + ParallelMath::Int16CompFlag negative = ParallelMath::Less(v, ParallelMath::MakeSInt16(0)); + ParallelMath::UInt15 absComp = ParallelMath::LosslessCast<ParallelMath::UInt15>::Cast(ParallelMath::Select(negative, ParallelMath::SInt16(ParallelMath::MakeSInt16(0) - v), v)); + + ParallelMath::UInt31 multiplied = ParallelMath::XMultiply(absComp, ParallelMath::MakeUInt15(31)); + ParallelMath::UInt31 shifted = ParallelMath::RightShift(multiplied, 5); + ParallelMath::UInt15 absCompScaled = ParallelMath::ToUInt15(shifted); + ParallelMath::SInt16 signBits = ParallelMath::SelectOrZero(negative, ParallelMath::MakeSInt16(-32768)); + + return ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(absCompScaled) | signBits; + } + + ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v) + { + return ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(v, ParallelMath::MakeUInt15(31)), 6)); + } + + void UnscaleHDREndpoints(const ParallelMath::AInt16 inEP[2][3], ParallelMath::AInt16 outEP[2][3], bool isSigned) + { + for (int epi = 0; epi < 2; epi++) + { + for (int ch = 0; ch < 3; ch++) + { + if (isSigned) + outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueSigned(ParallelMath::LosslessCast<ParallelMath::SInt16>::Cast(inEP[epi][ch]))); + else + outEP[epi][ch] = ParallelMath::LosslessCast<ParallelMath::AInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::LosslessCast<ParallelMath::UInt16>::Cast(inEP[epi][ch]))); + } + } + } + + struct SinglePlaneTemporaries + { + UnfinishedEndpoints<3> unfinishedRGB[BC7Data::g_numShapesAll]; + UnfinishedEndpoints<4> unfinishedRGBA[BC7Data::g_numShapes12]; + + ParallelMath::UInt15 fragmentBestIndexes[BC7Data::g_numFragments]; + ParallelMath::UInt15 shapeBestEP[BC7Data::g_numShapesAll][2][4]; + ParallelMath::Float shapeBestError[BC7Data::g_numShapesAll]; + }; + } +} + +void cvtt::Internal::BC7Computer::TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2]) +{ + ParallelMath::RoundTowardNearestForScope roundingMode; + + float tf[2]; + Util::ComputeTweakFactors(tweak, range, tf); + + MFloat base = ParallelMath::ToFloat(original[0]); + MFloat offs = ParallelMath::ToFloat(original[1]) - base; + + result[0] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[0], 0.0f, 255.0f), &roundingMode); + result[1] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(base + offs * tf[1], 0.0f, 255.0f), &roundingMode); +} + +void cvtt::Internal::BC7Computer::Quantize(MUInt15* color, int bits, int channels) +{ + for (int ch = 0; ch < channels; ch++) + color[ch] = ParallelMath::RightShift(((color[ch] << bits) - color[ch]) + ParallelMath::MakeUInt15(127 + (1 << (7 - bits))), 8); +} + +void cvtt::Internal::BC7Computer::QuantizeP(MUInt15* color, int bits, uint16_t p, int channels) +{ + int16_t addend; + if (p) + addend = ((1 << (8 - bits)) - 1); + else + addend = 255; + + for (int ch = 0; ch < channels; ch++) + { + MUInt16 ch16 = ParallelMath::LosslessCast<MUInt16>::Cast(color[ch]); + ch16 = ParallelMath::RightShift((ch16 << (bits + 1)) - ch16 + addend, 9); + ch16 = (ch16 << 1) | ParallelMath::MakeUInt16(p); + color[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ch16); + } +} + +void cvtt::Internal::BC7Computer::Unquantize(MUInt15* color, int bits, int channels) +{ + for (int ch = 0; ch < channels; ch++) + { + MUInt15 clr = color[ch]; + clr = clr << (8 - bits); + color[ch] = clr | ParallelMath::RightShift(clr, bits); + } +} + +void cvtt::Internal::BC7Computer::CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2]) +{ + for (int j = 0; j < 2; j++) + { + QuantizeP(ep[j], 4, p[j], 3); + Unquantize(ep[j], 5, 3); + ep[j][3] = ParallelMath::MakeUInt15(255); + } +} + +void cvtt::Internal::BC7Computer::CompressEndpoints1(MUInt15 ep[2][4], uint16_t p) +{ + for (int j = 0; j < 2; j++) + { + QuantizeP(ep[j], 6, p, 3); + Unquantize(ep[j], 7, 3); + ep[j][3] = ParallelMath::MakeUInt15(255); + } +} + +void cvtt::Internal::BC7Computer::CompressEndpoints2(MUInt15 ep[2][4]) +{ + for (int j = 0; j < 2; j++) + { + Quantize(ep[j], 5, 3); + Unquantize(ep[j], 5, 3); + ep[j][3] = ParallelMath::MakeUInt15(255); + } +} + +void cvtt::Internal::BC7Computer::CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2]) +{ + for (int j = 0; j < 2; j++) + { + QuantizeP(ep[j], 7, p[j], 3); + ep[j][3] = ParallelMath::MakeUInt15(255); + } +} + +void cvtt::Internal::BC7Computer::CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2]) +{ + for (int j = 0; j < 2; j++) + { + Quantize(epRGB[j], 5, 3); + Unquantize(epRGB[j], 5, 3); + + Quantize(epA + j, 6, 1); + Unquantize(epA + j, 6, 1); + } +} + +void cvtt::Internal::BC7Computer::CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2]) +{ + for (int j = 0; j < 2; j++) + { + Quantize(epRGB[j], 7, 3); + Unquantize(epRGB[j], 7, 3); + } + + // Alpha is full precision + (void)epA; +} + +void cvtt::Internal::BC7Computer::CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2]) +{ + for (int j = 0; j < 2; j++) + QuantizeP(ep[j], 7, p[j], 4); +} + +void cvtt::Internal::BC7Computer::CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2]) +{ + for (int j = 0; j < 2; j++) + { + QuantizeP(ep[j], 5, p[j], 4); + Unquantize(ep[j], 6, 4); + } +} + +void cvtt::Internal::BC7Computer::TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn) +{ + MFloat bestAverageError = ParallelMath::MakeFloat(FLT_MAX); + + MUInt15 intAverage[4]; + for (int ch = 0; ch < 4; ch++) + intAverage[ch] = ParallelMath::RoundAndConvertToU15(average[ch], rtn); + + MUInt15 eps[2][4]; + MUInt15 reconstructed[4]; + MUInt15 index = ParallelMath::MakeUInt15(0); + + for (int epi = 0; epi < 2; epi++) + { + for (int ch = 0; ch < 3; ch++) + eps[epi][ch] = ParallelMath::MakeUInt15(0); + eps[epi][3] = ParallelMath::MakeUInt15(255); + } + + for (int ch = 0; ch < 3; ch++) + reconstructed[ch] = ParallelMath::MakeUInt15(0); + reconstructed[3] = ParallelMath::MakeUInt15(255); + + // Depending on the target index and parity bits, there are multiple valid solid colors. + // We want to find the one closest to the actual average. + MFloat epsAverageDiff = ParallelMath::MakeFloat(FLT_MAX); + for (int t = 0; t < numTables; t++) + { + const cvtt::Tables::BC7SC::Table& table = *(tables[t]); + + ParallelMath::Int16CompFlag pti = punchThroughInvalid[table.m_pBits]; + + MUInt15 candidateReconstructed[4]; + MUInt15 candidateEPs[2][4]; + + for (int i = 0; i < ParallelMath::ParallelSize; i++) + { + for (int ch = 0; ch < numRealChannels; ch++) + { + ParallelMath::ScalarUInt16 avgValue = ParallelMath::Extract(intAverage[ch], i); + assert(avgValue >= 0 && avgValue <= 255); + + const cvtt::Tables::BC7SC::TableEntry &entry = table.m_entries[avgValue]; + + ParallelMath::PutUInt15(candidateEPs[0][ch], i, entry.m_min); + ParallelMath::PutUInt15(candidateEPs[1][ch], i, entry.m_max); + ParallelMath::PutUInt15(candidateReconstructed[ch], i, entry.m_actualColor); + } + } + + MFloat avgError = ParallelMath::MakeFloatZero(); + for (int ch = 0; ch < numRealChannels; ch++) + { + MFloat delta = ParallelMath::ToFloat(candidateReconstructed[ch]) - average[ch]; + avgError = avgError + delta * delta * channelWeightsSq[ch]; + } + + ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(avgError, bestAverageError)); + better = ParallelMath::AndNot(pti, better); // Mask out punch-through invalidations + + if (ParallelMath::AnySet(better)) + { + ParallelMath::ConditionalSet(bestAverageError, ParallelMath::Int16FlagToFloat(better), avgError); + + MUInt15 candidateIndex = ParallelMath::MakeUInt15(table.m_index); + + ParallelMath::ConditionalSet(index, better, candidateIndex); + + for (int ch = 0; ch < numRealChannels; ch++) + ParallelMath::ConditionalSet(reconstructed[ch], better, candidateReconstructed[ch]); + + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < numRealChannels; ch++) + ParallelMath::ConditionalSet(eps[epi][ch], better, candidateEPs[epi][ch]); + } + } + + AggregatedError<4> aggError; + for (int pxi = 0; pxi < shapeLength; pxi++) + { + int px = fragmentStart[pxi]; + + BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError); + } + + MFloat error = aggError.Finalize(flags, channelWeightsSq) + staticAlphaError; + + ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, shapeBestError)); + if (ParallelMath::AnySet(better)) + { + shapeBestError = ParallelMath::Min(shapeBestError, error); + for (int epi = 0; epi < 2; epi++) + { + for (int ch = 0; ch < numRealChannels; ch++) + ParallelMath::ConditionalSet(shapeBestEP[epi][ch], better, eps[epi][ch]); + } + + for (int pxi = 0; pxi < shapeLength; pxi++) + ParallelMath::ConditionalSet(fragmentBestIndexes[pxi], better, index); + } +} + +void cvtt::Internal::BC7Computer::TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn) +{ + if (numRefineRounds < 1) + numRefineRounds = 1; + + float channelWeightsSq[4]; + + for (int ch = 0; ch < 4; ch++) + channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; + + SinglePlaneTemporaries temps; + + MUInt15 maxAlpha = ParallelMath::MakeUInt15(0); + MUInt15 minAlpha = ParallelMath::MakeUInt15(255); + ParallelMath::Int16CompFlag isPunchThrough = ParallelMath::MakeBoolInt16(true); + for (int px = 0; px < 16; px++) + { + MUInt15 a = pixels[px][3]; + maxAlpha = ParallelMath::Max(maxAlpha, a); + minAlpha = ParallelMath::Min(minAlpha, a); + + isPunchThrough = (isPunchThrough & (ParallelMath::Equal(a, ParallelMath::MakeUInt15(0)) | ParallelMath::Equal(a, ParallelMath::MakeUInt15(255)))); + } + + ParallelMath::Int16CompFlag blockHasNonMaxAlpha = ParallelMath::Less(minAlpha, ParallelMath::MakeUInt15(255)); + ParallelMath::Int16CompFlag blockHasNonZeroAlpha = ParallelMath::Less(ParallelMath::MakeUInt15(0), maxAlpha); + + bool anyBlockHasAlpha = ParallelMath::AnySet(blockHasNonMaxAlpha); + + // Try RGB modes if any block has a min alpha 251 or higher + bool allowRGBModes = ParallelMath::AnySet(ParallelMath::Less(ParallelMath::MakeUInt15(250), minAlpha)); + + // Try mode 7 if any block has alpha. + // Mode 7 is almost never selected for RGB blocks because mode 4 has very accurate 7.7.7.1 endpoints + // and its parity bit doesn't affect alpha, meaning mode 7 can only be better in extremely specific + // situations, and only by at most 1 unit of error per pixel. + bool allowMode7 = anyBlockHasAlpha || (encodingPlan.mode7RGBPartitionEnabled != 0); + + MFloat preWeightedPixels[16][4]; + + BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights); + + // Get initial RGB endpoints + if (allowRGBModes) + { + const uint8_t *shapeList = encodingPlan.rgbShapeList; + int numShapesToEvaluate = encodingPlan.rgbNumShapesToEvaluate; + + for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++) + { + int shape = shapeList[shapeIter]; + + int shapeStart = BC7Data::g_shapeRanges[shape][0]; + int shapeSize = BC7Data::g_shapeRanges[shape][1]; + + EndpointSelector<3, 8> epSelector; + + for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) + { + for (int spx = 0; spx < shapeSize; spx++) + { + int px = BC7Data::g_fragments[shapeStart + spx]; + epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f)); + } + epSelector.FinishPass(epPass); + } + temps.unfinishedRGB[shape] = epSelector.GetEndpoints(channelWeights); + } + } + + // Get initial RGBA endpoints + { + const uint8_t *shapeList = encodingPlan.rgbaShapeList; + int numShapesToEvaluate = encodingPlan.rgbaNumShapesToEvaluate; + + for (int shapeIter = 0; shapeIter < numShapesToEvaluate; shapeIter++) + { + int shape = shapeList[shapeIter]; + + if (anyBlockHasAlpha || !allowRGBModes) + { + int shapeStart = BC7Data::g_shapeRanges[shape][0]; + int shapeSize = BC7Data::g_shapeRanges[shape][1]; + + EndpointSelector<4, 8> epSelector; + + for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) + { + for (int spx = 0; spx < shapeSize; spx++) + { + int px = BC7Data::g_fragments[shapeStart + spx]; + epSelector.ContributePass(preWeightedPixels[px], epPass, ParallelMath::MakeFloat(1.0f)); + } + epSelector.FinishPass(epPass); + } + temps.unfinishedRGBA[shape] = epSelector.GetEndpoints(channelWeights); + } + else + { + temps.unfinishedRGBA[shape] = temps.unfinishedRGB[shape].ExpandTo<4>(255); + } + } + } + + for (uint16_t mode = 0; mode <= 7; mode++) + { + if (mode == 4 || mode == 5) + continue; + + if (mode < 4 && !allowRGBModes) + continue; + + if (mode == 7 && !allowMode7) + continue; + + uint64_t partitionEnabledBits = 0; + switch (mode) + { + case 0: + partitionEnabledBits = encodingPlan.mode0PartitionEnabled; + break; + case 1: + partitionEnabledBits = encodingPlan.mode1PartitionEnabled; + break; + case 2: + partitionEnabledBits = encodingPlan.mode2PartitionEnabled; + break; + case 3: + partitionEnabledBits = encodingPlan.mode3PartitionEnabled; + break; + case 6: + partitionEnabledBits = encodingPlan.mode6Enabled ? 1 : 0; + break; + case 7: + if (anyBlockHasAlpha) + partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled; + else + partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled; + break; + default: + break; + } + + bool isRGB = (mode < 4); + + unsigned int numPartitions = 1 << BC7Data::g_modes[mode].m_partitionBits; + int numSubsets = BC7Data::g_modes[mode].m_numSubsets; + int indexPrec = BC7Data::g_modes[mode].m_indexBits; + + int parityBitMax = 1; + if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerEndpoint) + parityBitMax = 4; + else if (BC7Data::g_modes[mode].m_pBitMode == BC7Data::PBitMode_PerSubset) + parityBitMax = 2; + + int numRealChannels = isRGB ? 3 : 4; + + int numShapes; + const int *shapeList; + + if (numSubsets == 1) + { + numShapes = BC7Data::g_numShapes1; + shapeList = BC7Data::g_shapeList1; + } + else if (numSubsets == 2) + { + numShapes = BC7Data::g_numShapes2; + shapeList = BC7Data::g_shapeList2; + } + else + { + assert(numSubsets == 3); + if (numPartitions == 16) + { + numShapes = BC7Data::g_numShapes3Short; + shapeList = BC7Data::g_shapeList3Short; + } + else + { + assert(numPartitions == 64); + numShapes = BC7Data::g_numShapes3; + shapeList = BC7Data::g_shapeList3; + } + } + + for (int slot = 0; slot < BC7Data::g_numShapesAll; slot++) + temps.shapeBestError[slot] = ParallelMath::MakeFloat(FLT_MAX); + + for (int shapeIter = 0; shapeIter < numShapes; shapeIter++) + { + int shape = shapeList[shapeIter]; + + int numTweakRounds = 0; + if (isRGB) + numTweakRounds = encodingPlan.seedPointsForShapeRGB[shape]; + else + numTweakRounds = encodingPlan.seedPointsForShapeRGBA[shape]; + + if (numTweakRounds == 0) + continue; + + if (numTweakRounds > MaxTweakRounds) + numTweakRounds = MaxTweakRounds; + + int shapeStart = BC7Data::g_shapeRanges[shape][0]; + int shapeLength = BC7Data::g_shapeRanges[shape][1]; + + AggregatedError<1> alphaAggError; + if (isRGB && anyBlockHasAlpha) + { + MUInt15 filledAlpha[1] = { ParallelMath::MakeUInt15(255) }; + + for (int pxi = 0; pxi < shapeLength; pxi++) + { + int px = BC7Data::g_fragments[shapeStart + pxi]; + MUInt15 original[1] = { pixels[px][3] }; + BCCommon::ComputeErrorLDR<1>(flags, filledAlpha, original, alphaAggError); + } + } + + float alphaWeightsSq[1] = { channelWeightsSq[3] }; + MFloat staticAlphaError = alphaAggError.Finalize(flags, alphaWeightsSq); + + MUInt15 tweakBaseEP[MaxTweakRounds][2][4]; + + for (int tweak = 0; tweak < numTweakRounds; tweak++) + { + if (isRGB) + { + temps.unfinishedRGB[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]); + tweakBaseEP[tweak][0][3] = tweakBaseEP[tweak][1][3] = ParallelMath::MakeUInt15(255); + } + else + { + temps.unfinishedRGBA[shape].FinishLDR(tweak, 1 << indexPrec, tweakBaseEP[tweak][0], tweakBaseEP[tweak][1]); + } + } + + ParallelMath::Int16CompFlag punchThroughInvalid[4]; + for (int pIter = 0; pIter < parityBitMax; pIter++) + { + punchThroughInvalid[pIter] = ParallelMath::MakeBoolInt16(false); + + if ((flags & Flags::BC7_RespectPunchThrough) && (mode == 6 || mode == 7)) + { + // Modes 6 and 7 have parity bits that affect alpha + if (pIter == 0) + punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonZeroAlpha); + else if (pIter == parityBitMax - 1) + punchThroughInvalid[pIter] = (isPunchThrough & blockHasNonMaxAlpha); + else + punchThroughInvalid[pIter] = isPunchThrough; + } + } + + for (int pIter = 0; pIter < parityBitMax; pIter++) + { + if (ParallelMath::AllSet(punchThroughInvalid[pIter])) + continue; + + bool needPunchThroughCheck = ParallelMath::AnySet(punchThroughInvalid[pIter]); + + for (int tweak = 0; tweak < numTweakRounds; tweak++) + { + uint16_t p[2]; + p[0] = (pIter & 1); + p[1] = ((pIter >> 1) & 1); + + MUInt15 ep[2][4]; + + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < 4; ch++) + ep[epi][ch] = tweakBaseEP[tweak][epi][ch]; + + for (int refine = 0; refine < numRefineRounds; refine++) + { + switch (mode) + { + case 0: + CompressEndpoints0(ep, p); + break; + case 1: + CompressEndpoints1(ep, p[0]); + break; + case 2: + CompressEndpoints2(ep); + break; + case 3: + CompressEndpoints3(ep, p); + break; + case 6: + CompressEndpoints6(ep, p); + break; + case 7: + CompressEndpoints7(ep, p); + break; + default: + assert(false); + break; + }; + + MFloat shapeError = ParallelMath::MakeFloatZero(); + + IndexSelector<4> indexSelector; + indexSelector.Init<false>(channelWeights, ep, 1 << indexPrec); + + EndpointRefiner<4> epRefiner; + epRefiner.Init(1 << indexPrec, channelWeights); + + MUInt15 indexes[16]; + + AggregatedError<4> aggError; + for (int pxi = 0; pxi < shapeLength; pxi++) + { + int px = BC7Data::g_fragments[shapeStart + pxi]; + + MUInt15 index; + MUInt15 reconstructed[4]; + + index = indexSelector.SelectIndexLDR(floatPixels[px], rtn); + indexSelector.ReconstructLDR_BC7(index, reconstructed, numRealChannels); + + if (flags & cvtt::Flags::BC7_FastIndexing) + BCCommon::ComputeErrorLDR<4>(flags, reconstructed, pixels[px], numRealChannels, aggError); + else + { + MFloat error = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq); + + MUInt15 altIndexes[2]; + altIndexes[0] = ParallelMath::Max(index, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); + altIndexes[1] = ParallelMath::Min(index + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << indexPrec) - 1))); + + for (int ii = 0; ii < 2; ii++) + { + indexSelector.ReconstructLDR_BC7(altIndexes[ii], reconstructed, numRealChannels); + + MFloat altError = BCCommon::ComputeErrorLDRSimple<4>(flags, reconstructed, pixels[px], numRealChannels, channelWeightsSq); + ParallelMath::Int16CompFlag better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altError, error)); + error = ParallelMath::Min(error, altError); + ParallelMath::ConditionalSet(index, better, altIndexes[ii]); + } + + shapeError = shapeError + error; + } + + if (refine != numRefineRounds - 1) + epRefiner.ContributeUnweightedPW(preWeightedPixels[px], index, numRealChannels); + + indexes[pxi] = index; + } + + if (flags & cvtt::Flags::BC7_FastIndexing) + shapeError = aggError.Finalize(flags, channelWeightsSq); + + if (isRGB) + shapeError = shapeError + staticAlphaError; + + ParallelMath::FloatCompFlag shapeErrorBetter; + ParallelMath::Int16CompFlag shapeErrorBetter16; + + shapeErrorBetter = ParallelMath::Less(shapeError, temps.shapeBestError[shape]); + shapeErrorBetter16 = ParallelMath::FloatFlagToInt16(shapeErrorBetter); + + if (ParallelMath::AnySet(shapeErrorBetter16)) + { + bool punchThroughOK = true; + if (needPunchThroughCheck) + { + shapeErrorBetter16 = ParallelMath::AndNot(punchThroughInvalid[pIter], shapeErrorBetter16); + shapeErrorBetter = ParallelMath::Int16FlagToFloat(shapeErrorBetter16); + + if (!ParallelMath::AnySet(shapeErrorBetter16)) + punchThroughOK = false; + } + + if (punchThroughOK) + { + ParallelMath::ConditionalSet(temps.shapeBestError[shape], shapeErrorBetter, shapeError); + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < numRealChannels; ch++) + ParallelMath::ConditionalSet(temps.shapeBestEP[shape][epi][ch], shapeErrorBetter16, ep[epi][ch]); + + for (int pxi = 0; pxi < shapeLength; pxi++) + ParallelMath::ConditionalSet(temps.fragmentBestIndexes[shapeStart + pxi], shapeErrorBetter16, indexes[pxi]); + } + } + + if (refine != numRefineRounds - 1) + epRefiner.GetRefinedEndpointsLDR(ep, numRealChannels, rtn); + } // refine + } // tweak + } // p + + if (flags & cvtt::Flags::BC7_TrySingleColor) + { + MUInt15 total[4]; + for (int ch = 0; ch < 4; ch++) + total[ch] = ParallelMath::MakeUInt15(0); + + for (int pxi = 0; pxi < shapeLength; pxi++) + { + int px = BC7Data::g_fragments[shapeStart + pxi]; + for (int ch = 0; ch < 4; ch++) + total[ch] = total[ch] + pixels[pxi][ch]; + } + + MFloat rcpShapeLength = ParallelMath::MakeFloat(1.0f / static_cast<float>(shapeLength)); + MFloat average[4]; + for (int ch = 0; ch < 4; ch++) + average[ch] = ParallelMath::ToFloat(total[ch]) * rcpShapeLength; + + const uint8_t *fragment = BC7Data::g_fragments + shapeStart; + MFloat &shapeBestError = temps.shapeBestError[shape]; + MUInt15 (&shapeBestEP)[2][4] = temps.shapeBestEP[shape]; + MUInt15 *fragmentBestIndexes = temps.fragmentBestIndexes + shapeStart; + + const cvtt::Tables::BC7SC::Table **scTables = NULL; + int numSCTables = 0; + + const cvtt::Tables::BC7SC::Table *tables0[] = + { + &cvtt::Tables::BC7SC::g_mode0_p00_i1, + &cvtt::Tables::BC7SC::g_mode0_p00_i2, + &cvtt::Tables::BC7SC::g_mode0_p00_i3, + &cvtt::Tables::BC7SC::g_mode0_p01_i1, + &cvtt::Tables::BC7SC::g_mode0_p01_i2, + &cvtt::Tables::BC7SC::g_mode0_p01_i3, + &cvtt::Tables::BC7SC::g_mode0_p10_i1, + &cvtt::Tables::BC7SC::g_mode0_p10_i2, + &cvtt::Tables::BC7SC::g_mode0_p10_i3, + &cvtt::Tables::BC7SC::g_mode0_p11_i1, + &cvtt::Tables::BC7SC::g_mode0_p11_i2, + &cvtt::Tables::BC7SC::g_mode0_p11_i3, + }; + + const cvtt::Tables::BC7SC::Table *tables1[] = + { + &cvtt::Tables::BC7SC::g_mode1_p0_i1, + &cvtt::Tables::BC7SC::g_mode1_p0_i2, + &cvtt::Tables::BC7SC::g_mode1_p0_i3, + &cvtt::Tables::BC7SC::g_mode1_p1_i1, + &cvtt::Tables::BC7SC::g_mode1_p1_i2, + &cvtt::Tables::BC7SC::g_mode1_p1_i3, + }; + + const cvtt::Tables::BC7SC::Table *tables2[] = + { + &cvtt::Tables::BC7SC::g_mode2, + }; + + const cvtt::Tables::BC7SC::Table *tables3[] = + { + &cvtt::Tables::BC7SC::g_mode3_p0, + &cvtt::Tables::BC7SC::g_mode3_p1, + }; + + const cvtt::Tables::BC7SC::Table *tables6[] = + { + &cvtt::Tables::BC7SC::g_mode6_p0_i1, + &cvtt::Tables::BC7SC::g_mode6_p0_i2, + &cvtt::Tables::BC7SC::g_mode6_p0_i3, + &cvtt::Tables::BC7SC::g_mode6_p0_i4, + &cvtt::Tables::BC7SC::g_mode6_p0_i5, + &cvtt::Tables::BC7SC::g_mode6_p0_i6, + &cvtt::Tables::BC7SC::g_mode6_p0_i7, + &cvtt::Tables::BC7SC::g_mode6_p1_i1, + &cvtt::Tables::BC7SC::g_mode6_p1_i2, + &cvtt::Tables::BC7SC::g_mode6_p1_i3, + &cvtt::Tables::BC7SC::g_mode6_p1_i4, + &cvtt::Tables::BC7SC::g_mode6_p1_i5, + &cvtt::Tables::BC7SC::g_mode6_p1_i6, + &cvtt::Tables::BC7SC::g_mode6_p1_i7, + }; + + const cvtt::Tables::BC7SC::Table *tables7[] = + { + &cvtt::Tables::BC7SC::g_mode7_p00, + &cvtt::Tables::BC7SC::g_mode7_p01, + &cvtt::Tables::BC7SC::g_mode7_p10, + &cvtt::Tables::BC7SC::g_mode7_p11, + }; + + switch (mode) + { + case 0: + { + scTables = tables0; + numSCTables = sizeof(tables0) / sizeof(tables0[0]); + } + break; + case 1: + { + scTables = tables1; + numSCTables = sizeof(tables1) / sizeof(tables1[0]); + } + break; + case 2: + { + + scTables = tables2; + numSCTables = sizeof(tables2) / sizeof(tables2[0]); + } + break; + case 3: + { + scTables = tables3; + numSCTables = sizeof(tables3) / sizeof(tables3[0]); + } + break; + case 6: + { + scTables = tables6; + numSCTables = sizeof(tables6) / sizeof(tables6[0]); + } + break; + case 7: + { + scTables = tables7; + numSCTables = sizeof(tables7) / sizeof(tables7[0]); + } + break; + default: + assert(false); + break; + } + + TrySingleColorRGBAMultiTable(flags, pixels, average, numRealChannels, fragment, shapeLength, staticAlphaError, punchThroughInvalid, shapeBestError, shapeBestEP, fragmentBestIndexes, channelWeightsSq, scTables, numSCTables, rtn); + } + } // shapeIter + + uint64_t partitionsEnabledBits = 0xffffffffffffffffULL; + + switch (mode) + { + case 0: + partitionsEnabledBits = encodingPlan.mode0PartitionEnabled; + break; + case 1: + partitionsEnabledBits = encodingPlan.mode1PartitionEnabled; + break; + case 2: + partitionsEnabledBits = encodingPlan.mode2PartitionEnabled; + break; + case 3: + partitionsEnabledBits = encodingPlan.mode3PartitionEnabled; + break; + case 6: + partitionsEnabledBits = encodingPlan.mode6Enabled ? 1 : 0; + break; + case 7: + if (anyBlockHasAlpha) + partitionEnabledBits = encodingPlan.mode7RGBAPartitionEnabled; + else + partitionEnabledBits = encodingPlan.mode7RGBPartitionEnabled; + break; + default: + break; + }; + + for (uint16_t partition = 0; partition < numPartitions; partition++) + { + if (((partitionsEnabledBits >> partition) & 1) == 0) + continue; + + const int *partitionShapes; + if (numSubsets == 1) + partitionShapes = BC7Data::g_shapes1[partition]; + else if (numSubsets == 2) + partitionShapes = BC7Data::g_shapes2[partition]; + else + { + assert(numSubsets == 3); + partitionShapes = BC7Data::g_shapes3[partition]; + } + + MFloat totalError = ParallelMath::MakeFloatZero(); + for (int subset = 0; subset < numSubsets; subset++) + totalError = totalError + temps.shapeBestError[partitionShapes[subset]]; + + ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(totalError, work.m_error); + ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); + + if (mode == 7 && anyBlockHasAlpha) + { + // Some lanes could be better, but we filter them out to ensure consistency with scalar + bool isRGBAllowedForThisPartition = (((encodingPlan.mode7RGBPartitionEnabled >> partition) & 1) != 0); + + if (!isRGBAllowedForThisPartition) + { + errorBetter16 = (errorBetter16 & blockHasNonMaxAlpha); + errorBetter = ParallelMath::Int16FlagToFloat(errorBetter16); + } + } + + if (ParallelMath::AnySet(errorBetter16)) + { + for (int subset = 0; subset < numSubsets; subset++) + { + int shape = partitionShapes[subset]; + int shapeStart = BC7Data::g_shapeRanges[shape][0]; + int shapeLength = BC7Data::g_shapeRanges[shape][1]; + + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < 4; ch++) + ParallelMath::ConditionalSet(work.m_ep[subset][epi][ch], errorBetter16, temps.shapeBestEP[shape][epi][ch]); + + for (int pxi = 0; pxi < shapeLength; pxi++) + { + int px = BC7Data::g_fragments[shapeStart + pxi]; + ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, temps.fragmentBestIndexes[shapeStart + pxi]); + } + } + + ParallelMath::ConditionalSet(work.m_error, errorBetter, totalError); + ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode)); + ParallelMath::ConditionalSet(work.m_u.m_partition, errorBetter16, ParallelMath::MakeUInt15(partition)); + } + } + } +} + +void cvtt::Internal::BC7Computer::TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn) +{ + // TODO: These error calculations are not optimal for weight-by-alpha, but this routine needs to be mostly rewritten for that. + // The alpha/color solutions are co-dependent in that case, but a good way to solve it would probably be to + // solve the alpha channel first, then solve the RGB channels, which in turn breaks down into two cases: + // - Separate alpha channel, then weighted RGB + // - Alpha+2 other channels, then the independent channel + if (numRefineRounds < 1) + numRefineRounds = 1; + + float channelWeightsSq[4]; + for (int ch = 0; ch < 4; ch++) + channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; + + for (uint16_t mode = 4; mode <= 5; mode++) + { + int numSP[2] = { 0, 0 }; + + for (uint16_t rotation = 0; rotation < 4; rotation++) + { + if (mode == 4) + { + numSP[0] = encodingPlan.mode4SP[rotation][0]; + numSP[1] = encodingPlan.mode4SP[rotation][1]; + } + else + numSP[0] = numSP[1] = encodingPlan.mode5SP[rotation]; + + if (numSP[0] == 0 && numSP[1] == 0) + continue; + + int alphaChannel = (rotation + 3) & 3; + int redChannel = (rotation == 1) ? 3 : 0; + int greenChannel = (rotation == 2) ? 3 : 1; + int blueChannel = (rotation == 3) ? 3 : 2; + + MUInt15 rotatedRGB[16][3]; + MFloat floatRotatedRGB[16][3]; + + for (int px = 0; px < 16; px++) + { + rotatedRGB[px][0] = pixels[px][redChannel]; + rotatedRGB[px][1] = pixels[px][greenChannel]; + rotatedRGB[px][2] = pixels[px][blueChannel]; + + for (int ch = 0; ch < 3; ch++) + floatRotatedRGB[px][ch] = ParallelMath::ToFloat(rotatedRGB[px][ch]); + } + + uint16_t maxIndexSelector = (mode == 4) ? 2 : 1; + + float rotatedRGBWeights[3] = { channelWeights[redChannel], channelWeights[greenChannel], channelWeights[blueChannel] }; + float rotatedRGBWeightsSq[3] = { channelWeightsSq[redChannel], channelWeightsSq[greenChannel], channelWeightsSq[blueChannel] }; + float rotatedAlphaWeight[1] = { channelWeights[alphaChannel] }; + float rotatedAlphaWeightSq[1] = { channelWeightsSq[alphaChannel] }; + + float uniformWeight[1] = { 1.0f }; // Since the alpha channel is independent, there's no need to bother with weights when doing refinement or selection, only error + + MFloat preWeightedRotatedRGB[16][3]; + BCCommon::PreWeightPixelsLDR<3>(preWeightedRotatedRGB, rotatedRGB, rotatedRGBWeights); + + for (uint16_t indexSelector = 0; indexSelector < maxIndexSelector; indexSelector++) + { + int numTweakRounds = numSP[indexSelector]; + + if (numTweakRounds <= 0) + continue; + + if (numTweakRounds > MaxTweakRounds) + numTweakRounds = MaxTweakRounds; + + EndpointSelector<3, 8> rgbSelector; + + for (int epPass = 0; epPass < NumEndpointSelectorPasses; epPass++) + { + for (int px = 0; px < 16; px++) + rgbSelector.ContributePass(preWeightedRotatedRGB[px], epPass, ParallelMath::MakeFloat(1.0f)); + + rgbSelector.FinishPass(epPass); + } + + MUInt15 alphaRange[2]; + + alphaRange[0] = alphaRange[1] = pixels[0][alphaChannel]; + for (int px = 1; px < 16; px++) + { + alphaRange[0] = ParallelMath::Min(pixels[px][alphaChannel], alphaRange[0]); + alphaRange[1] = ParallelMath::Max(pixels[px][alphaChannel], alphaRange[1]); + } + + int rgbPrec = 0; + int alphaPrec = 0; + + if (mode == 4) + { + rgbPrec = indexSelector ? 3 : 2; + alphaPrec = indexSelector ? 2 : 3; + } + else + rgbPrec = alphaPrec = 2; + + UnfinishedEndpoints<3> unfinishedRGB = rgbSelector.GetEndpoints(rotatedRGBWeights); + + MFloat bestRGBError = ParallelMath::MakeFloat(FLT_MAX); + MFloat bestAlphaError = ParallelMath::MakeFloat(FLT_MAX); + + MUInt15 bestRGBIndexes[16]; + MUInt15 bestAlphaIndexes[16]; + MUInt15 bestEP[2][4]; + + for (int px = 0; px < 16; px++) + bestRGBIndexes[px] = bestAlphaIndexes[px] = ParallelMath::MakeUInt15(0); + + for (int tweak = 0; tweak < numTweakRounds; tweak++) + { + MUInt15 rgbEP[2][3]; + MUInt15 alphaEP[2]; + + unfinishedRGB.FinishLDR(tweak, 1 << rgbPrec, rgbEP[0], rgbEP[1]); + + TweakAlpha(alphaRange, tweak, 1 << alphaPrec, alphaEP); + + for (int refine = 0; refine < numRefineRounds; refine++) + { + if (mode == 4) + CompressEndpoints4(rgbEP, alphaEP); + else + CompressEndpoints5(rgbEP, alphaEP); + + + IndexSelector<1> alphaIndexSelector; + IndexSelector<3> rgbIndexSelector; + + { + MUInt15 alphaEPTemp[2][1] = { { alphaEP[0] },{ alphaEP[1] } }; + alphaIndexSelector.Init<false>(uniformWeight, alphaEPTemp, 1 << alphaPrec); + } + rgbIndexSelector.Init<false>(rotatedRGBWeights, rgbEP, 1 << rgbPrec); + + EndpointRefiner<3> rgbRefiner; + EndpointRefiner<1> alphaRefiner; + + rgbRefiner.Init(1 << rgbPrec, rotatedRGBWeights); + alphaRefiner.Init(1 << alphaPrec, uniformWeight); + + MFloat errorRGB = ParallelMath::MakeFloatZero(); + MFloat errorA = ParallelMath::MakeFloatZero(); + + MUInt15 rgbIndexes[16]; + MUInt15 alphaIndexes[16]; + + AggregatedError<3> rgbAggError; + AggregatedError<1> alphaAggError; + + for (int px = 0; px < 16; px++) + { + MUInt15 rgbIndex = rgbIndexSelector.SelectIndexLDR(floatRotatedRGB[px], rtn); + MUInt15 alphaIndex = alphaIndexSelector.SelectIndexLDR(floatPixels[px] + alphaChannel, rtn); + + MUInt15 reconstructedRGB[3]; + MUInt15 reconstructedAlpha[1]; + + rgbIndexSelector.ReconstructLDR_BC7(rgbIndex, reconstructedRGB); + alphaIndexSelector.ReconstructLDR_BC7(alphaIndex, reconstructedAlpha); + + if (flags & cvtt::Flags::BC7_FastIndexing) + { + BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], rgbAggError); + BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, alphaAggError); + } + else + { + AggregatedError<3> baseRGBAggError; + AggregatedError<1> baseAlphaAggError; + + BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], baseRGBAggError); + BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, baseAlphaAggError); + + MFloat rgbError = baseRGBAggError.Finalize(flags, rotatedRGBWeightsSq); + MFloat alphaError = baseAlphaAggError.Finalize(flags, rotatedAlphaWeightSq); + + MUInt15 altRGBIndexes[2]; + MUInt15 altAlphaIndexes[2]; + + altRGBIndexes[0] = ParallelMath::Max(rgbIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); + altRGBIndexes[1] = ParallelMath::Min(rgbIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << rgbPrec) - 1))); + + altAlphaIndexes[0] = ParallelMath::Max(alphaIndex, ParallelMath::MakeUInt15(1)) - ParallelMath::MakeUInt15(1); + altAlphaIndexes[1] = ParallelMath::Min(alphaIndex + ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << alphaPrec) - 1))); + + for (int ii = 0; ii < 2; ii++) + { + rgbIndexSelector.ReconstructLDR_BC7(altRGBIndexes[ii], reconstructedRGB); + alphaIndexSelector.ReconstructLDR_BC7(altAlphaIndexes[ii], reconstructedAlpha); + + AggregatedError<3> altRGBAggError; + AggregatedError<1> altAlphaAggError; + + BCCommon::ComputeErrorLDR<3>(flags, reconstructedRGB, rotatedRGB[px], altRGBAggError); + BCCommon::ComputeErrorLDR<1>(flags, reconstructedAlpha, pixels[px] + alphaChannel, altAlphaAggError); + + MFloat altRGBError = altRGBAggError.Finalize(flags, rotatedRGBWeightsSq); + MFloat altAlphaError = altAlphaAggError.Finalize(flags, rotatedAlphaWeightSq); + + ParallelMath::Int16CompFlag rgbBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altRGBError, rgbError)); + ParallelMath::Int16CompFlag alphaBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(altAlphaError, alphaError)); + + rgbError = ParallelMath::Min(altRGBError, rgbError); + alphaError = ParallelMath::Min(altAlphaError, alphaError); + + ParallelMath::ConditionalSet(rgbIndex, rgbBetter, altRGBIndexes[ii]); + ParallelMath::ConditionalSet(alphaIndex, alphaBetter, altAlphaIndexes[ii]); + } + + errorRGB = errorRGB + rgbError; + errorA = errorA + alphaError; + } + + if (refine != numRefineRounds - 1) + { + rgbRefiner.ContributeUnweightedPW(preWeightedRotatedRGB[px], rgbIndex); + alphaRefiner.ContributeUnweightedPW(floatPixels[px] + alphaChannel, alphaIndex); + } + + if (flags & Flags::BC7_FastIndexing) + { + errorRGB = rgbAggError.Finalize(flags, rotatedRGBWeightsSq); + errorA = alphaAggError.Finalize(flags, rotatedAlphaWeightSq); + } + + rgbIndexes[px] = rgbIndex; + alphaIndexes[px] = alphaIndex; + } + + ParallelMath::FloatCompFlag rgbBetter = ParallelMath::Less(errorRGB, bestRGBError); + ParallelMath::FloatCompFlag alphaBetter = ParallelMath::Less(errorA, bestAlphaError); + + ParallelMath::Int16CompFlag rgbBetterInt16 = ParallelMath::FloatFlagToInt16(rgbBetter); + ParallelMath::Int16CompFlag alphaBetterInt16 = ParallelMath::FloatFlagToInt16(alphaBetter); + + if (ParallelMath::AnySet(rgbBetterInt16)) + { + bestRGBError = ParallelMath::Min(errorRGB, bestRGBError); + + for (int px = 0; px < 16; px++) + ParallelMath::ConditionalSet(bestRGBIndexes[px], rgbBetterInt16, rgbIndexes[px]); + + for (int ep = 0; ep < 2; ep++) + { + for (int ch = 0; ch < 3; ch++) + ParallelMath::ConditionalSet(bestEP[ep][ch], rgbBetterInt16, rgbEP[ep][ch]); + } + } + + if (ParallelMath::AnySet(alphaBetterInt16)) + { + bestAlphaError = ParallelMath::Min(errorA, bestAlphaError); + + for (int px = 0; px < 16; px++) + ParallelMath::ConditionalSet(bestAlphaIndexes[px], alphaBetterInt16, alphaIndexes[px]); + + for (int ep = 0; ep < 2; ep++) + ParallelMath::ConditionalSet(bestEP[ep][3], alphaBetterInt16, alphaEP[ep]); + } + + if (refine != numRefineRounds - 1) + { + rgbRefiner.GetRefinedEndpointsLDR(rgbEP, rtn); + + MUInt15 alphaEPTemp[2][1]; + alphaRefiner.GetRefinedEndpointsLDR(alphaEPTemp, rtn); + + for (int i = 0; i < 2; i++) + alphaEP[i] = alphaEPTemp[i][0]; + } + } // refine + } // tweak + + MFloat combinedError = bestRGBError + bestAlphaError; + + ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, work.m_error); + ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); + + work.m_error = ParallelMath::Min(combinedError, work.m_error); + + ParallelMath::ConditionalSet(work.m_mode, errorBetter16, ParallelMath::MakeUInt15(mode)); + ParallelMath::ConditionalSet(work.m_u.m_isr.m_rotation, errorBetter16, ParallelMath::MakeUInt15(rotation)); + ParallelMath::ConditionalSet(work.m_u.m_isr.m_indexSelector, errorBetter16, ParallelMath::MakeUInt15(indexSelector)); + + for (int px = 0; px < 16; px++) + { + ParallelMath::ConditionalSet(work.m_indexes[px], errorBetter16, indexSelector ? bestAlphaIndexes[px] : bestRGBIndexes[px]); + ParallelMath::ConditionalSet(work.m_indexes2[px], errorBetter16, indexSelector ? bestRGBIndexes[px] : bestAlphaIndexes[px]); + } + + for (int ep = 0; ep < 2; ep++) + for (int ch = 0; ch < 4; ch++) + ParallelMath::ConditionalSet(work.m_ep[0][ep][ch], errorBetter16, bestEP[ep][ch]); + } + } + } +} + +template<class T> +void cvtt::Internal::BC7Computer::Swap(T& a, T& b) +{ + T temp = a; + a = b; + b = temp; +} + +void cvtt::Internal::BC7Computer::Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds) +{ + MUInt15 pixels[16][4]; + MFloat floatPixels[16][4]; + + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 4; ch++) + ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]); + } + + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 4; ch++) + floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]); + } + + BC67::WorkInfo work; + memset(&work, 0, sizeof(work)); + + work.m_error = ParallelMath::MakeFloat(FLT_MAX); + + { + ParallelMath::RoundTowardNearestForScope rtn; + TrySinglePlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn); + TryDualPlane(flags, pixels, floatPixels, channelWeights, encodingPlan, numRefineRounds, work, &rtn); + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + PackingVector pv; + pv.Init(); + + ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(work.m_mode, block); + ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(work.m_u.m_partition, block); + ParallelMath::ScalarUInt16 indexSelector = ParallelMath::Extract(work.m_u.m_isr.m_indexSelector, block); + + const BC7Data::BC7ModeInfo& modeInfo = BC7Data::g_modes[mode]; + + ParallelMath::ScalarUInt16 indexes[16]; + ParallelMath::ScalarUInt16 indexes2[16]; + ParallelMath::ScalarUInt16 endPoints[3][2][4]; + + for (int i = 0; i < 16; i++) + { + indexes[i] = ParallelMath::Extract(work.m_indexes[i], block); + if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) + indexes2[i] = ParallelMath::Extract(work.m_indexes2[i], block); + } + + for (int subset = 0; subset < 3; subset++) + { + for (int ep = 0; ep < 2; ep++) + { + for (int ch = 0; ch < 4; ch++) + endPoints[subset][ep][ch] = ParallelMath::Extract(work.m_ep[subset][ep][ch], block); + } + } + + int fixups[3] = { 0, 0, 0 }; + + if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) + { + bool flipRGB = ((indexes[0] & (1 << (modeInfo.m_indexBits - 1))) != 0); + bool flipAlpha = ((indexes2[0] & (1 << (modeInfo.m_alphaIndexBits - 1))) != 0); + + if (flipRGB) + { + uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1; + for (int px = 0; px < 16; px++) + indexes[px] = highIndex - indexes[px]; + } + + if (flipAlpha) + { + uint16_t highIndex = (1 << modeInfo.m_alphaIndexBits) - 1; + for (int px = 0; px < 16; px++) + indexes2[px] = highIndex - indexes2[px]; + } + + if (indexSelector) + Swap(flipRGB, flipAlpha); + + if (flipRGB) + { + for (int ch = 0; ch < 3; ch++) + Swap(endPoints[0][0][ch], endPoints[0][1][ch]); + } + if (flipAlpha) + Swap(endPoints[0][0][3], endPoints[0][1][3]); + + } + else + { + if (modeInfo.m_numSubsets == 2) + fixups[1] = BC7Data::g_fixupIndexes2[partition]; + else if (modeInfo.m_numSubsets == 3) + { + fixups[1] = BC7Data::g_fixupIndexes3[partition][0]; + fixups[2] = BC7Data::g_fixupIndexes3[partition][1]; + } + + bool flip[3] = { false, false, false }; + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + flip[subset] = ((indexes[fixups[subset]] & (1 << (modeInfo.m_indexBits - 1))) != 0); + + if (flip[0] || flip[1] || flip[2]) + { + uint16_t highIndex = (1 << modeInfo.m_indexBits) - 1; + for (int px = 0; px < 16; px++) + { + int subset = 0; + if (modeInfo.m_numSubsets == 2) + subset = (BC7Data::g_partitionMap[partition] >> px) & 1; + else if (modeInfo.m_numSubsets == 3) + subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3; + + if (flip[subset]) + indexes[px] = highIndex - indexes[px]; + } + + int maxCH = (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) ? 4 : 3; + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + if (flip[subset]) + for (int ch = 0; ch < maxCH; ch++) + Swap(endPoints[subset][0][ch], endPoints[subset][1][ch]); + } + } + } + + pv.Pack(static_cast<uint8_t>(1 << mode), mode + 1); + + if (modeInfo.m_partitionBits) + pv.Pack(partition, modeInfo.m_partitionBits); + + if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) + { + ParallelMath::ScalarUInt16 rotation = ParallelMath::Extract(work.m_u.m_isr.m_rotation, block); + pv.Pack(rotation, 2); + } + + if (modeInfo.m_hasIndexSelector) + pv.Pack(indexSelector, 1); + + // Encode RGB + for (int ch = 0; ch < 3; ch++) + { + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + for (int ep = 0; ep < 2; ep++) + { + ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][ch]; + epPart >>= (8 - modeInfo.m_rgbBits); + + pv.Pack(epPart, modeInfo.m_rgbBits); + } + } + } + + // Encode alpha + if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) + { + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + for (int ep = 0; ep < 2; ep++) + { + ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][3]; + epPart >>= (8 - modeInfo.m_alphaBits); + + pv.Pack(epPart, modeInfo.m_alphaBits); + } + } + } + + // Encode parity bits + if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset) + { + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + ParallelMath::ScalarUInt16 epPart = endPoints[subset][0][0]; + epPart >>= (7 - modeInfo.m_rgbBits); + epPart &= 1; + + pv.Pack(epPart, 1); + } + } + else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint) + { + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + for (int ep = 0; ep < 2; ep++) + { + ParallelMath::ScalarUInt16 epPart = endPoints[subset][ep][0]; + epPart >>= (7 - modeInfo.m_rgbBits); + epPart &= 1; + + pv.Pack(epPart, 1); + } + } + } + + // Encode indexes + for (int px = 0; px < 16; px++) + { + int bits = modeInfo.m_indexBits; + if ((px == 0) || (px == fixups[1]) || (px == fixups[2])) + bits--; + + pv.Pack(indexes[px], bits); + } + + // Encode secondary indexes + if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) + { + for (int px = 0; px < 16; px++) + { + int bits = modeInfo.m_alphaIndexBits; + if (px == 0) + bits--; + + pv.Pack(indexes2[px], bits); + } + } + + pv.Flush(packedBlocks); + + packedBlocks += 16; + } +} + +void cvtt::Internal::BC7Computer::UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock) +{ + UnpackingVector pv; + pv.Init(packedBlock); + + int mode = 8; + for (int i = 0; i < 8; i++) + { + if (pv.Unpack(1) == 1) + { + mode = i; + break; + } + } + + if (mode > 7) + { + for (int px = 0; px < 16; px++) + for (int ch = 0; ch < 4; ch++) + output.m_pixels[px][ch] = 0; + + return; + } + + const BC7Data::BC7ModeInfo &modeInfo = BC7Data::g_modes[mode]; + + int partition = 0; + if (modeInfo.m_partitionBits) + partition = pv.Unpack(modeInfo.m_partitionBits); + + int rotation = 0; + if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) + rotation = pv.Unpack(2); + + int indexSelector = 0; + if (modeInfo.m_hasIndexSelector) + indexSelector = pv.Unpack(1); + + // Resolve fixups + int fixups[3] = { 0, 0, 0 }; + + if (modeInfo.m_alphaMode != BC7Data::AlphaMode_Separate) + { + if (modeInfo.m_numSubsets == 2) + fixups[1] = BC7Data::g_fixupIndexes2[partition]; + else if (modeInfo.m_numSubsets == 3) + { + fixups[1] = BC7Data::g_fixupIndexes3[partition][0]; + fixups[2] = BC7Data::g_fixupIndexes3[partition][1]; + } + } + + int endPoints[3][2][4]; + + // Decode RGB + for (int ch = 0; ch < 3; ch++) + { + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + for (int ep = 0; ep < 2; ep++) + endPoints[subset][ep][ch] = (pv.Unpack(modeInfo.m_rgbBits) << (8 - modeInfo.m_rgbBits)); + } + } + + // Decode alpha + if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) + { + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + for (int ep = 0; ep < 2; ep++) + endPoints[subset][ep][3] = (pv.Unpack(modeInfo.m_alphaBits) << (8 - modeInfo.m_alphaBits)); + } + } + else + { + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + for (int ep = 0; ep < 2; ep++) + endPoints[subset][ep][3] = 255; + } + } + + int parityBits = 0; + + // Decode parity bits + if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerSubset) + { + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + int p = pv.Unpack(1); + + for (int ep = 0; ep < 2; ep++) + { + for (int ch = 0; ch < 3; ch++) + endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits); + + if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) + endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits); + } + } + + parityBits = 1; + } + else if (modeInfo.m_pBitMode == BC7Data::PBitMode_PerEndpoint) + { + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + for (int ep = 0; ep < 2; ep++) + { + int p = pv.Unpack(1); + + for (int ch = 0; ch < 3; ch++) + endPoints[subset][ep][ch] |= p << (7 - modeInfo.m_rgbBits); + + if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) + endPoints[subset][ep][3] |= p << (7 - modeInfo.m_alphaBits); + } + } + + parityBits = 1; + } + + // Fill endpoint bits + for (int subset = 0; subset < modeInfo.m_numSubsets; subset++) + { + for (int ep = 0; ep < 2; ep++) + { + for (int ch = 0; ch < 3; ch++) + endPoints[subset][ep][ch] |= (endPoints[subset][ep][ch] >> (modeInfo.m_rgbBits + parityBits)); + + if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) + endPoints[subset][ep][3] |= (endPoints[subset][ep][3] >> (modeInfo.m_alphaBits + parityBits)); + } + } + + int indexes[16]; + int indexes2[16]; + + // Decode indexes + for (int px = 0; px < 16; px++) + { + int bits = modeInfo.m_indexBits; + if ((px == 0) || (px == fixups[1]) || (px == fixups[2])) + bits--; + + indexes[px] = pv.Unpack(bits); + } + + // Decode secondary indexes + if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) + { + for (int px = 0; px < 16; px++) + { + int bits = modeInfo.m_alphaIndexBits; + if (px == 0) + bits--; + + indexes2[px] = pv.Unpack(bits); + } + } + else + { + for (int px = 0; px < 16; px++) + indexes2[px] = 0; + } + + const int *alphaWeights = BC7Data::g_weightTables[modeInfo.m_alphaIndexBits]; + const int *rgbWeights = BC7Data::g_weightTables[modeInfo.m_indexBits]; + + // Decode each pixel + for (int px = 0; px < 16; px++) + { + int rgbWeight = 0; + int alphaWeight = 0; + + int rgbIndex = indexes[px]; + + rgbWeight = rgbWeights[indexes[px]]; + + if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Combined) + alphaWeight = rgbWeight; + else if (modeInfo.m_alphaMode == BC7Data::AlphaMode_Separate) + alphaWeight = alphaWeights[indexes2[px]]; + + if (indexSelector == 1) + { + int temp = rgbWeight; + rgbWeight = alphaWeight; + alphaWeight = temp; + } + + int pixel[4] = { 0, 0, 0, 255 }; + + int subset = 0; + + if (modeInfo.m_numSubsets == 2) + subset = (BC7Data::g_partitionMap[partition] >> px) & 1; + else if (modeInfo.m_numSubsets == 3) + subset = (BC7Data::g_partitionMap2[partition] >> (px * 2)) & 3; + + for (int ch = 0; ch < 3; ch++) + pixel[ch] = ((64 - rgbWeight) * endPoints[subset][0][ch] + rgbWeight * endPoints[subset][1][ch] + 32) >> 6; + + if (modeInfo.m_alphaMode != BC7Data::AlphaMode_None) + pixel[3] = ((64 - alphaWeight) * endPoints[subset][0][3] + alphaWeight * endPoints[subset][1][3] + 32) >> 6; + + if (rotation != 0) + { + int ch = rotation - 1; + int temp = pixel[ch]; + pixel[ch] = pixel[3]; + pixel[3] = temp; + } + + for (int ch = 0; ch < 4; ch++) + output.m_pixels[px][ch] = static_cast<uint8_t>(pixel[ch]); + } +} + +cvtt::ParallelMath::SInt16 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru) +{ + assert(ParallelMath::AllSet(ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(31744)))); + assert(ParallelMath::AllSet(ParallelMath::Less(ParallelMath::MakeSInt16(-31744), elem2CL))); + + // Expand to full range + ParallelMath::Int16CompFlag isNegative = ParallelMath::Less(elem2CL, ParallelMath::MakeSInt16(0)); + MUInt15 absElem = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - elem2CL, elem2CL)); + + absElem = ParallelMath::RightShift(ParallelMath::RoundAndConvertToU15(ParallelMath::ToFloat(absElem) * 32.0f / 31.0f, ru), 16 - precision); + + MSInt16 absElemS16 = ParallelMath::LosslessCast<MSInt16>::Cast(absElem); + + return ParallelMath::Select(isNegative, ParallelMath::MakeSInt16(0) - absElemS16, absElemS16); +} + +cvtt::ParallelMath::UInt15 cvtt::Internal::BC6HComputer::QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru) +{ + MUInt16 expandedElem = ParallelMath::RoundAndConvertToU16(ParallelMath::Min(ParallelMath::ToFloat(elem) * 64.0f / 31.0f, ParallelMath::MakeFloat(65535.0f)), ru); + return ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(expandedElem, 16 - precision)); +} + +void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL) +{ + MSInt16 zero = ParallelMath::MakeSInt16(0); + + ParallelMath::Int16CompFlag negative = ParallelMath::Less(comp, zero); + MUInt15 absComp = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Select(negative, MSInt16(zero - comp), comp)); + + MSInt16 unq; + MUInt15 absUnq; + + if (precision >= 16) + { + unq = comp; + absUnq = absComp; + } + else + { + MSInt16 maxCompMinusOne = ParallelMath::MakeSInt16(static_cast<int16_t>((1 << (precision - 1)) - 2)); + ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero); + ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp); + + absUnq = (absComp << (16 - precision)) + ParallelMath::MakeUInt15(static_cast<uint16_t>(0x4000 >> (precision - 1))); + ParallelMath::ConditionalSet(absUnq, isZero, ParallelMath::MakeUInt15(0)); + ParallelMath::ConditionalSet(absUnq, isMax, ParallelMath::MakeUInt15(0x7fff)); + + unq = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(absUnq)); + } + + outUnquantized = unq; + + MUInt15 funq = ParallelMath::ToUInt15(ParallelMath::RightShift(ParallelMath::XMultiply(absUnq, ParallelMath::MakeUInt15(31)), 5)); + + outUnquantizedFinished2CL = ParallelMath::ConditionalNegate(negative, ParallelMath::LosslessCast<MSInt16>::Cast(funq)); +} + +void cvtt::Internal::BC6HComputer::UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished) +{ + MUInt16 unq = ParallelMath::LosslessCast<MUInt16>::Cast(comp); + if (precision < 15) + { + MUInt15 zero = ParallelMath::MakeUInt15(0); + MUInt15 maxCompMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>((1 << precision) - 2)); + + ParallelMath::Int16CompFlag isZero = ParallelMath::Equal(comp, zero); + ParallelMath::Int16CompFlag isMax = ParallelMath::Less(maxCompMinusOne, comp); + + unq = (ParallelMath::LosslessCast<MUInt16>::Cast(comp) << (16 - precision)) + ParallelMath::MakeUInt16(static_cast<uint16_t>(0x8000 >> precision)); + + ParallelMath::ConditionalSet(unq, isZero, ParallelMath::MakeUInt16(0)); + ParallelMath::ConditionalSet(unq, isMax, ParallelMath::MakeUInt16(0xffff)); + } + + outUnquantized = unq; + outUnquantizedFinished = ParallelMath::ToUInt16(ParallelMath::RightShift(ParallelMath::XMultiply(unq, ParallelMath::MakeUInt15(31)), 6)); +} + +void cvtt::Internal::BC6HComputer::QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn) +{ + MSInt16 unquantizedEP[2][3]; + MSInt16 finishedUnquantizedEP[2][3]; + + { + ParallelMath::RoundUpForScope ru; + + for (int epi = 0; epi < 2; epi++) + { + for (int ch = 0; ch < 3; ch++) + { + MSInt16 qee = QuantizeSingleEndpointElementSigned(endPoints[epi][ch], precision, &ru); + UnquantizeSingleEndpointElementSigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]); + quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee); + } + } + } + + indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange); + indexSelector.InitHDR(indexRange, true, fastIndexing, channelWeights); + + MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1); + + MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn); + + ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index); + + if (ParallelMath::AnySet(invert)) + { + ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index)); + + indexSelector.ConditionalInvert(invert); + + for (int ch = 0; ch < 3; ch++) + { + MAInt16 firstEP = quantizedEndPoints[0][ch]; + MAInt16 secondEP = quantizedEndPoints[1][ch]; + + quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP); + quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP); + } + } + + indexes[fixupIndex] = index; +} + +void cvtt::Internal::BC6HComputer::QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn) +{ + MUInt16 unquantizedEP[2][3]; + MUInt16 finishedUnquantizedEP[2][3]; + + { + ParallelMath::RoundUpForScope ru; + + for (int epi = 0; epi < 2; epi++) + { + for (int ch = 0; ch < 3; ch++) + { + MUInt15 qee = QuantizeSingleEndpointElementUnsigned(ParallelMath::LosslessCast<MUInt15>::Cast(endPoints[epi][ch]), precision, &ru); + UnquantizeSingleEndpointElementUnsigned(qee, precision, unquantizedEP[epi][ch], finishedUnquantizedEP[epi][ch]); + quantizedEndPoints[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(qee); + } + } + } + + indexSelector.Init(channelWeights, unquantizedEP, finishedUnquantizedEP, indexRange); + indexSelector.InitHDR(indexRange, false, fastIndexing, channelWeights); + + MUInt15 halfRangeMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange / 2) - 1); + + MUInt15 index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixelsColorSpace[fixupIndex], rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[fixupIndex], rtn); + + ParallelMath::Int16CompFlag invert = ParallelMath::Less(halfRangeMinusOne, index); + + if (ParallelMath::AnySet(invert)) + { + ParallelMath::ConditionalSet(index, invert, MUInt15(ParallelMath::MakeUInt15(static_cast<uint16_t>(indexRange - 1)) - index)); + + indexSelector.ConditionalInvert(invert); + + for (int ch = 0; ch < 3; ch++) + { + MAInt16 firstEP = quantizedEndPoints[0][ch]; + MAInt16 secondEP = quantizedEndPoints[1][ch]; + + quantizedEndPoints[0][ch] = ParallelMath::Select(invert, secondEP, firstEP); + quantizedEndPoints[1][ch] = ParallelMath::Select(invert, firstEP, secondEP); + } + } + + indexes[fixupIndex] = index; +} + +void cvtt::Internal::BC6HComputer::EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal) +{ + ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true); + + MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1)); + + for (int ch = 0; ch < 3; ch++) + { + outEncodedEPs[0][0][ch] = ep0[0][ch]; + outEncodedEPs[0][1][ch] = ep0[1][ch]; + outEncodedEPs[1][0][ch] = ep1[0][ch]; + outEncodedEPs[1][1][ch] = ep1[1][ch]; + + if (isTransformed) + { + for (int subset = 0; subset < 2; subset++) + { + for (int epi = 0; epi < 2; epi++) + { + if (epi == 0 && subset == 0) + continue; + + MAInt16 bReduced = (outEncodedEPs[subset][epi][ch] & aSignificantMask); + + MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch])), bPrec[ch]); + + outEncodedEPs[subset][epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta); + + MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[subset][epi][ch], outEncodedEPs[0][0][ch]) & aSignificantMask); + allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced); + } + } + } + + if (!ParallelMath::AnySet(allLegal)) + break; + } + + outIsLegal = allLegal; +} + +void cvtt::Internal::BC6HComputer::EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal) +{ + ParallelMath::Int16CompFlag allLegal = ParallelMath::MakeBoolInt16(true); + + MAInt16 aSignificantMask = ParallelMath::MakeAInt16(static_cast<int16_t>((1 << aPrec) - 1)); + + for (int ch = 0; ch < 3; ch++) + { + outEncodedEPs[0][ch] = ep[0][ch]; + outEncodedEPs[1][ch] = ep[1][ch]; + + if (isTransformed) + { + MAInt16 bReduced = (outEncodedEPs[1][ch] & aSignificantMask); + + MSInt16 delta = ParallelMath::TruncateToPrecisionSigned(ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::AbstractSubtract(outEncodedEPs[1][ch], outEncodedEPs[0][ch])), bPrec[ch]); + + outEncodedEPs[1][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(delta); + + MAInt16 reconstructed = (ParallelMath::AbstractAdd(outEncodedEPs[1][ch], outEncodedEPs[0][ch]) & aSignificantMask); + allLegal = allLegal & ParallelMath::Equal(reconstructed, bReduced); + } + } + + outIsLegal = allLegal; +} + +void cvtt::Internal::BC6HComputer::Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds) +{ + if (numTweakRounds < 1) + numTweakRounds = 1; + else if (numTweakRounds > MaxTweakRounds) + numTweakRounds = MaxTweakRounds; + + if (numRefineRounds < 1) + numRefineRounds = 1; + else if (numRefineRounds > MaxRefineRounds) + numRefineRounds = MaxRefineRounds; + + bool fastIndexing = ((flags & cvtt::Flags::BC6H_FastIndexing) != 0); + float channelWeightsSq[3]; + + ParallelMath::RoundTowardNearestForScope rtn; + + MSInt16 pixels[16][3]; + MFloat floatPixels2CL[16][3]; + MFloat floatPixelsLinearWeighted[16][3]; + + MSInt16 low15Bits = ParallelMath::MakeSInt16(32767); + + for (int ch = 0; ch < 3; ch++) + channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; + + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 3; ch++) + { + MSInt16 pixelValue; + ParallelMath::ConvertHDRInputs(inputs, px, ch, pixelValue); + + // Convert from sign+magnitude to 2CL + if (isSigned) + { + ParallelMath::Int16CompFlag negative = ParallelMath::Less(pixelValue, ParallelMath::MakeSInt16(0)); + MSInt16 magnitude = (pixelValue & low15Bits); + ParallelMath::ConditionalSet(pixelValue, negative, ParallelMath::MakeSInt16(0) - magnitude); + pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(-31743)); + } + else + pixelValue = ParallelMath::Max(pixelValue, ParallelMath::MakeSInt16(0)); + + pixelValue = ParallelMath::Min(pixelValue, ParallelMath::MakeSInt16(31743)); + + pixels[px][ch] = pixelValue; + floatPixels2CL[px][ch] = ParallelMath::ToFloat(pixelValue); + floatPixelsLinearWeighted[px][ch] = ParallelMath::TwosCLHalfToFloat(pixelValue) * channelWeights[ch]; + } + } + + MFloat preWeightedPixels[16][3]; + + BCCommon::PreWeightPixelsHDR<3>(preWeightedPixels, pixels, channelWeights); + + MAInt16 bestEndPoints[2][2][3]; + MUInt15 bestIndexes[16]; + MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); + MUInt15 bestMode = ParallelMath::MakeUInt15(0); + MUInt15 bestPartition = ParallelMath::MakeUInt15(0); + + for (int px = 0; px < 16; px++) + bestIndexes[px] = ParallelMath::MakeUInt15(0); + + for (int subset = 0; subset < 2; subset++) + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < 3; ch++) + bestEndPoints[subset][epi][ch] = ParallelMath::MakeAInt16(0); + + UnfinishedEndpoints<3> partitionedUFEP[32][2]; + UnfinishedEndpoints<3> singleUFEP; + + // Generate UFEP for partitions + for (int p = 0; p < 32; p++) + { + int partitionMask = BC7Data::g_partitionMap[p]; + + EndpointSelector<3, 8> epSelectors[2]; + + for (int pass = 0; pass < NumEndpointSelectorPasses; pass++) + { + for (int px = 0; px < 16; px++) + { + int subset = (partitionMask >> px) & 1; + epSelectors[subset].ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f)); + } + + for (int subset = 0; subset < 2; subset++) + epSelectors[subset].FinishPass(pass); + } + + for (int subset = 0; subset < 2; subset++) + partitionedUFEP[p][subset] = epSelectors[subset].GetEndpoints(channelWeights); + } + + // Generate UFEP for single + { + EndpointSelector<3, 8> epSelector; + + for (int pass = 0; pass < NumEndpointSelectorPasses; pass++) + { + for (int px = 0; px < 16; px++) + epSelector.ContributePass(preWeightedPixels[px], pass, ParallelMath::MakeFloat(1.0f)); + + epSelector.FinishPass(pass); + } + + singleUFEP = epSelector.GetEndpoints(channelWeights); + } + + for (int partitionedInt = 0; partitionedInt < 2; partitionedInt++) + { + bool partitioned = (partitionedInt == 1); + + for (int aPrec = BC7Data::g_maxHDRPrecision; aPrec >= 0; aPrec--) + { + if (!BC7Data::g_hdrModesExistForPrecision[partitionedInt][aPrec]) + continue; + + int numPartitions = partitioned ? 32 : 1; + int numSubsets = partitioned ? 2 : 1; + int indexBits = partitioned ? 3 : 4; + int indexRange = (1 << indexBits); + + for (int p = 0; p < numPartitions; p++) + { + int partitionMask = partitioned ? BC7Data::g_partitionMap[p] : 0; + + const int MaxMetaRounds = MaxTweakRounds * MaxRefineRounds; + + MAInt16 metaEndPointsQuantized[MaxMetaRounds][2][2][3]; + MUInt15 metaIndexes[MaxMetaRounds][16]; + MFloat metaError[MaxMetaRounds][2]; + + bool roundValid[MaxMetaRounds][2]; + + for (int r = 0; r < MaxMetaRounds; r++) + for (int subset = 0; subset < 2; subset++) + roundValid[r][subset] = true; + + for (int subset = 0; subset < numSubsets; subset++) + { + for (int tweak = 0; tweak < MaxTweakRounds; tweak++) + { + EndpointRefiner<3> refiners[2]; + + bool abortRemainingRefines = false; + for (int refinePass = 0; refinePass < MaxRefineRounds; refinePass++) + { + int metaRound = tweak * MaxRefineRounds + refinePass; + + if (tweak >= numTweakRounds || refinePass >= numRefineRounds) + abortRemainingRefines = true; + + if (abortRemainingRefines) + { + roundValid[metaRound][subset] = false; + continue; + } + + MAInt16(&mrQuantizedEndPoints)[2][2][3] = metaEndPointsQuantized[metaRound]; + MUInt15(&mrIndexes)[16] = metaIndexes[metaRound]; + + MSInt16 endPointsColorSpace[2][3]; + + if (refinePass == 0) + { + UnfinishedEndpoints<3> ufep = partitioned ? partitionedUFEP[p][subset] : singleUFEP; + + if (isSigned) + ufep.FinishHDRSigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn); + else + ufep.FinishHDRUnsigned(tweak, indexRange, endPointsColorSpace[0], endPointsColorSpace[1], &rtn); + } + else + refiners[subset].GetRefinedEndpointsHDR(endPointsColorSpace, isSigned, &rtn); + + refiners[subset].Init(indexRange, channelWeights); + + int fixupIndex = (subset == 0) ? 0 : BC7Data::g_fixupIndexes2[p]; + + IndexSelectorHDR<3> indexSelector; + if (isSigned) + QuantizeEndpointsSigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn); + else + QuantizeEndpointsUnsigned(endPointsColorSpace, floatPixels2CL, floatPixelsLinearWeighted, mrQuantizedEndPoints[subset], mrIndexes, indexSelector, fixupIndex, aPrec, indexRange, channelWeights, fastIndexing, &rtn); + + if (metaRound > 0) + { + ParallelMath::Int16CompFlag anySame = ParallelMath::MakeBoolInt16(false); + + for (int prevRound = 0; prevRound < metaRound; prevRound++) + { + MAInt16(&prevRoundEPs)[2][3] = metaEndPointsQuantized[prevRound][subset]; + + ParallelMath::Int16CompFlag same = ParallelMath::MakeBoolInt16(true); + + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < 3; ch++) + same = (same & ParallelMath::Equal(prevRoundEPs[epi][ch], mrQuantizedEndPoints[subset][epi][ch])); + + anySame = (anySame | same); + if (ParallelMath::AllSet(anySame)) + break; + } + + if (ParallelMath::AllSet(anySame)) + { + roundValid[metaRound][subset] = false; + continue; + } + } + + MFloat subsetError = ParallelMath::MakeFloatZero(); + + { + for (int px = 0; px < 16; px++) + { + if (subset != ((partitionMask >> px) & 1)) + continue; + + MUInt15 index; + if (px == fixupIndex) + index = mrIndexes[px]; + else + { + index = fastIndexing ? indexSelector.SelectIndexHDRFast(floatPixels2CL[px], &rtn) : indexSelector.SelectIndexHDRSlow(floatPixelsLinearWeighted[px], &rtn); + mrIndexes[px] = index; + } + + MSInt16 reconstructed[3]; + if (isSigned) + indexSelector.ReconstructHDRSigned(mrIndexes[px], reconstructed); + else + indexSelector.ReconstructHDRUnsigned(mrIndexes[px], reconstructed); + + subsetError = subsetError + (fastIndexing ? BCCommon::ComputeErrorHDRFast<3>(flags, reconstructed, pixels[px], channelWeightsSq) : BCCommon::ComputeErrorHDRSlow<3>(flags, reconstructed, pixels[px], channelWeightsSq)); + + if (refinePass != numRefineRounds - 1) + refiners[subset].ContributeUnweightedPW(preWeightedPixels[px], index); + } + } + + metaError[metaRound][subset] = subsetError; + } + } + } + + // Now we have a bunch of attempts, but not all of them will fit in the delta coding scheme + int numMeta1 = partitioned ? MaxMetaRounds : 1; + for (int meta0 = 0; meta0 < MaxMetaRounds; meta0++) + { + if (!roundValid[meta0][0]) + continue; + + for (int meta1 = 0; meta1 < numMeta1; meta1++) + { + MFloat combinedError = metaError[meta0][0]; + if (partitioned) + { + if (!roundValid[meta1][1]) + continue; + + combinedError = combinedError + metaError[meta1][1]; + } + + ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(combinedError, bestError); + if (!ParallelMath::AnySet(errorBetter)) + continue; + + ParallelMath::Int16CompFlag needsCommit = ParallelMath::FloatFlagToInt16(errorBetter); + + // Figure out if this is encodable + for (int mode = 0; mode < BC7Data::g_numHDRModes; mode++) + { + const BC7Data::BC6HModeInfo &modeInfo = BC7Data::g_hdrModes[mode]; + + if (modeInfo.m_partitioned != partitioned || modeInfo.m_aPrec != aPrec) + continue; + + MAInt16 encodedEPs[2][2][3]; + ParallelMath::Int16CompFlag isLegal; + if (partitioned) + EvaluatePartitionedLegality(metaEndPointsQuantized[meta0][0], metaEndPointsQuantized[meta1][1], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs, isLegal); + else + EvaluateSingleLegality(metaEndPointsQuantized[meta0][0], modeInfo.m_aPrec, modeInfo.m_bPrec, modeInfo.m_transformed, encodedEPs[0], isLegal); + + ParallelMath::Int16CompFlag isLegalAndBetter = (ParallelMath::FloatFlagToInt16(errorBetter) & isLegal); + if (!ParallelMath::AnySet(isLegalAndBetter)) + continue; + + ParallelMath::FloatCompFlag isLegalAndBetterFloat = ParallelMath::Int16FlagToFloat(isLegalAndBetter); + + ParallelMath::ConditionalSet(bestError, isLegalAndBetterFloat, combinedError); + ParallelMath::ConditionalSet(bestMode, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(mode))); + ParallelMath::ConditionalSet(bestPartition, isLegalAndBetter, ParallelMath::MakeUInt15(static_cast<uint16_t>(p))); + + for (int subset = 0; subset < numSubsets; subset++) + { + for (int epi = 0; epi < 2; epi++) + { + for (int ch = 0; ch < 3; ch++) + ParallelMath::ConditionalSet(bestEndPoints[subset][epi][ch], isLegalAndBetter, encodedEPs[subset][epi][ch]); + } + } + + for (int px = 0; px < 16; px++) + { + int subset = ((partitionMask >> px) & 1); + if (subset == 0) + ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta0][px]); + else + ParallelMath::ConditionalSet(bestIndexes[px], isLegalAndBetter, metaIndexes[meta1][px]); + } + + needsCommit = ParallelMath::AndNot(needsCommit, isLegalAndBetter); + if (!ParallelMath::AnySet(needsCommit)) + break; + } + } + } + } + } + } + + // At this point, everything should be set + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + ParallelMath::ScalarUInt16 mode = ParallelMath::Extract(bestMode, block); + ParallelMath::ScalarUInt16 partition = ParallelMath::Extract(bestPartition, block); + int32_t eps[2][2][3]; + ParallelMath::ScalarUInt16 indexes[16]; + + const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode]; + + BC6H_IO::WriteFunc_t writeFunc = BC6H_IO::g_writeFuncs[mode]; + + const int headerBits = modeInfo.m_partitioned ? 82 : 65; + + for (int subset = 0; subset < 2; subset++) + { + for (int epi = 0; epi < 2; epi++) + { + for (int ch = 0; ch < 3; ch++) + eps[subset][epi][ch] = ParallelMath::Extract(bestEndPoints[subset][epi][ch], block); + } + } + + for (int px = 0; px < 16; px++) + indexes[px] = ParallelMath::Extract(bestIndexes[px], block); + + uint16_t modeID = modeInfo.m_modeID; + + PackingVector pv; + + { + uint32_t header[3]; + writeFunc(header, modeID, partition, + eps[0][0][0], eps[0][1][0], eps[1][0][0], eps[1][1][0], + eps[0][0][1], eps[0][1][1], eps[1][0][1], eps[1][1][1], + eps[0][0][2], eps[0][1][2], eps[1][0][2], eps[1][1][2] + ); + + pv.InitPacked(header, headerBits); + } + + int fixupIndex1 = 0; + int indexBits = 4; + if (modeInfo.m_partitioned) + { + fixupIndex1 = BC7Data::g_fixupIndexes2[partition]; + indexBits = 3; + } + + for (int px = 0; px < 16; px++) + { + ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[px], block); + if (px == 0 || px == fixupIndex1) + pv.Pack(index, indexBits - 1); + else + pv.Pack(index, indexBits); + } + + pv.Flush(packedBlocks + 16 * block); + } +} + +void cvtt::Internal::BC6HComputer::SignExtendSingle(int &v, int bits) +{ + if (v & (1 << (bits - 1))) + v |= -(1 << bits); +} + +void cvtt::Internal::BC6HComputer::UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned) +{ + UnpackingVector pv; + pv.Init(pBC); + + int numModeBits = 2; + int modeBits = pv.Unpack(2); + if (modeBits != 0 && modeBits != 1) + { + modeBits |= pv.Unpack(3) << 2; + numModeBits += 3; + } + + int mode = -1; + for (int possibleMode = 0; possibleMode < BC7Data::g_numHDRModes; possibleMode++) + { + if (BC7Data::g_hdrModes[possibleMode].m_modeID == modeBits) + { + mode = possibleMode; + break; + } + } + + if (mode < 0) + { + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 3; ch++) + output.m_pixels[px][ch] = 0; + output.m_pixels[px][3] = 0x3c00; // 1.0 + } + return; + } + + const BC7Data::BC6HModeInfo& modeInfo = BC7Data::g_hdrModes[mode]; + const int headerBits = modeInfo.m_partitioned ? 82 : 65; + const BC6H_IO::ReadFunc_t readFunc = BC6H_IO::g_readFuncs[mode]; + + uint16_t partition = 0; + int32_t eps[2][2][3]; + + for (int subset = 0; subset < 2; subset++) + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < 3; ch++) + eps[subset][epi][ch] = 0; + + { + uint32_t header[3]; + uint16_t codedEPs[2][2][3]; + pv.UnpackStart(header, headerBits); + + readFunc(header, partition, + codedEPs[0][0][0], codedEPs[0][1][0], codedEPs[1][0][0], codedEPs[1][1][0], + codedEPs[0][0][1], codedEPs[0][1][1], codedEPs[1][0][1], codedEPs[1][1][1], + codedEPs[0][0][2], codedEPs[0][1][2], codedEPs[1][0][2], codedEPs[1][1][2] + ); + + for (int subset = 0; subset < 2; subset++) + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < 3; ch++) + eps[subset][epi][ch] = codedEPs[subset][epi][ch]; + } + + uint16_t modeID = modeInfo.m_modeID; + + int fixupIndex1 = 0; + int indexBits = 4; + int numSubsets = 1; + if (modeInfo.m_partitioned) + { + fixupIndex1 = BC7Data::g_fixupIndexes2[partition]; + indexBits = 3; + numSubsets = 2; + } + + int indexes[16]; + for (int px = 0; px < 16; px++) + { + if (px == 0 || px == fixupIndex1) + indexes[px] = pv.Unpack(indexBits - 1); + else + indexes[px] = pv.Unpack(indexBits); + } + + if (modeInfo.m_partitioned) + { + for (int ch = 0; ch < 3; ch++) + { + if (isSigned) + SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec); + if (modeInfo.m_transformed || isSigned) + { + SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]); + SignExtendSingle(eps[1][0][ch], modeInfo.m_bPrec[ch]); + SignExtendSingle(eps[1][1][ch], modeInfo.m_bPrec[ch]); + } + } + } + else + { + for (int ch = 0; ch < 3; ch++) + { + if (isSigned) + SignExtendSingle(eps[0][0][ch], modeInfo.m_aPrec); + if (modeInfo.m_transformed || isSigned) + SignExtendSingle(eps[0][1][ch], modeInfo.m_bPrec[ch]); + } + } + + int aPrec = modeInfo.m_aPrec; + + if (modeInfo.m_transformed) + { + for (int ch = 0; ch < 3; ch++) + { + int wrapMask = (1 << aPrec) - 1; + + eps[0][1][ch] = ((eps[0][0][ch] + eps[0][1][ch]) & wrapMask); + if (isSigned) + SignExtendSingle(eps[0][1][ch], aPrec); + + if (modeInfo.m_partitioned) + { + eps[1][0][ch] = ((eps[0][0][ch] + eps[1][0][ch]) & wrapMask); + eps[1][1][ch] = ((eps[0][0][ch] + eps[1][1][ch]) & wrapMask); + + if (isSigned) + { + SignExtendSingle(eps[1][0][ch], aPrec); + SignExtendSingle(eps[1][1][ch], aPrec); + } + } + } + } + + // Unquantize endpoints + for (int subset = 0; subset < numSubsets; subset++) + { + for (int epi = 0; epi < 2; epi++) + { + for (int ch = 0; ch < 3; ch++) + { + int &v = eps[subset][epi][ch]; + + if (isSigned) + { + if (aPrec >= 16) + { + // Nothing + } + else + { + bool s = false; + int comp = v; + if (v < 0) + { + s = true; + comp = -comp; + } + + int unq = 0; + if (comp == 0) + unq = 0; + else if (comp >= ((1 << (aPrec - 1)) - 1)) + unq = 0x7fff; + else + unq = ((comp << 15) + 0x4000) >> (aPrec - 1); + + if (s) + unq = -unq; + + v = unq; + } + } + else + { + if (aPrec >= 15) + { + // Nothing + } + else if (v == 0) + { + // Nothing + } + else if (v == ((1 << aPrec) - 1)) + v = 0xffff; + else + v = ((v << 16) + 0x8000) >> aPrec; + } + } + } + } + + const int *weights = BC7Data::g_weightTables[indexBits]; + + for (int px = 0; px < 16; px++) + { + int subset = 0; + if (modeInfo.m_partitioned) + subset = (BC7Data::g_partitionMap[partition] >> px) & 1; + + int w = weights[indexes[px]]; + for (int ch = 0; ch < 3; ch++) + { + int comp = ((64 - w) * eps[subset][0][ch] + w * eps[subset][1][ch] + 32) >> 6; + + if (isSigned) + { + if (comp < 0) + comp = -(((-comp) * 31) >> 5); + else + comp = (comp * 31) >> 5; + + int s = 0; + if (comp < 0) + { + s = 0x8000; + comp = -comp; + } + + output.m_pixels[px][ch] = static_cast<uint16_t>(s | comp); + } + else + { + comp = (comp * 31) >> 6; + output.m_pixels[px][ch] = static_cast<uint16_t>(comp); + } + } + output.m_pixels[px][3] = 0x3c00; // 1.0 + } +} + +void cvtt::Kernels::ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality) +{ + static const int kMaxQuality = 100; + + if (quality < 1) + quality = 1; + else if (quality > kMaxQuality) + quality = kMaxQuality; + + const int numRGBModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGB * quality / kMaxQuality; + const int numRGBAModes = cvtt::Tables::BC7Prio::g_bc7NumPrioCodesRGBA * quality / kMaxQuality; + + const uint16_t *prioLists[] = { cvtt::Tables::BC7Prio::g_bc7PrioCodesRGB, cvtt::Tables::BC7Prio::g_bc7PrioCodesRGBA }; + const int prioListSizes[] = { numRGBModes, numRGBAModes }; + + BC7FineTuningParams ftParams; + memset(&ftParams, 0, sizeof(ftParams)); + + for (int listIndex = 0; listIndex < 2; listIndex++) + { + int prioListSize = prioListSizes[listIndex]; + const uint16_t *prioList = prioLists[listIndex]; + + for (int prioIndex = 0; prioIndex < prioListSize; prioIndex++) + { + const uint16_t packedMode = prioList[prioIndex]; + + uint8_t seedPoints = static_cast<uint8_t>(cvtt::Tables::BC7Prio::UnpackSeedPointCount(packedMode)); + int mode = cvtt::Tables::BC7Prio::UnpackMode(packedMode); + + switch (mode) + { + case 0: + ftParams.mode0SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; + break; + case 1: + ftParams.mode1SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; + break; + case 2: + ftParams.mode2SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; + break; + case 3: + ftParams.mode3SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; + break; + case 4: + ftParams.mode4SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)][cvtt::Tables::BC7Prio::UnpackIndexSelector(packedMode)] = seedPoints; + break; + case 5: + ftParams.mode5SP[cvtt::Tables::BC7Prio::UnpackRotation(packedMode)] = seedPoints; + break; + case 6: + ftParams.mode6SP = seedPoints; + break; + case 7: + ftParams.mode7SP[cvtt::Tables::BC7Prio::UnpackPartition(packedMode)] = seedPoints; + break; + } + } + } + + ConfigureBC7EncodingPlanFromFineTuningParams(encodingPlan, ftParams); +} + +// Generates a BC7 encoding plan from fine-tuning parameters. +bool cvtt::Kernels::ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams ¶ms) +{ + memset(&encodingPlan, 0, sizeof(encodingPlan)); + + // Mode 0 + for (int partition = 0; partition < 16; partition++) + { + uint8_t sp = params.mode0SP[partition]; + if (sp == 0) + continue; + + encodingPlan.mode0PartitionEnabled |= static_cast<uint16_t>(1) << partition; + + for (int subset = 0; subset < 3; subset++) + { + int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset]; + encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); + } + } + + // Mode 1 + for (int partition = 0; partition < 64; partition++) + { + uint8_t sp = params.mode1SP[partition]; + if (sp == 0) + continue; + + encodingPlan.mode1PartitionEnabled |= static_cast<uint64_t>(1) << partition; + + for (int subset = 0; subset < 2; subset++) + { + int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset]; + encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); + } + } + + // Mode 2 + for (int partition = 0; partition < 64; partition++) + { + uint8_t sp = params.mode2SP[partition]; + if (sp == 0) + continue; + + encodingPlan.mode2PartitionEnabled |= static_cast<uint64_t>(1) << partition; + + for (int subset = 0; subset < 3; subset++) + { + int shape = cvtt::Internal::BC7Data::g_shapes3[partition][subset]; + encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); + } + } + + // Mode 3 + for (int partition = 0; partition < 64; partition++) + { + uint8_t sp = params.mode3SP[partition]; + if (sp == 0) + continue; + + encodingPlan.mode3PartitionEnabled |= static_cast<uint64_t>(1) << partition; + + for (int subset = 0; subset < 2; subset++) + { + int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset]; + encodingPlan.seedPointsForShapeRGB[shape] = std::max(encodingPlan.seedPointsForShapeRGB[shape], sp); + } + } + + // Mode 4 + for (int rotation = 0; rotation < 4; rotation++) + { + for (int indexMode = 0; indexMode < 2; indexMode++) + encodingPlan.mode4SP[rotation][indexMode] = params.mode4SP[rotation][indexMode]; + } + + // Mode 5 + for (int rotation = 0; rotation < 4; rotation++) + encodingPlan.mode5SP[rotation] = params.mode5SP[rotation]; + + // Mode 6 + { + uint8_t sp = params.mode6SP; + if (sp != 0) + { + encodingPlan.mode6Enabled = true; + + int shape = cvtt::Internal::BC7Data::g_shapes1[0][0]; + encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp); + } + } + + // Mode 7 + for (int partition = 0; partition < 64; partition++) + { + uint8_t sp = params.mode7SP[partition]; + if (sp == 0) + continue; + + encodingPlan.mode7RGBAPartitionEnabled |= static_cast<uint64_t>(1) << partition; + + for (int subset = 0; subset < 2; subset++) + { + int shape = cvtt::Internal::BC7Data::g_shapes2[partition][subset]; + encodingPlan.seedPointsForShapeRGBA[shape] = std::max(encodingPlan.seedPointsForShapeRGBA[shape], sp); + } + } + + for (int i = 0; i < BC7EncodingPlan::kNumRGBShapes; i++) + { + if (encodingPlan.seedPointsForShapeRGB[i] > 0) + { + encodingPlan.rgbShapeList[encodingPlan.rgbNumShapesToEvaluate] = i; + encodingPlan.rgbNumShapesToEvaluate++; + } + } + + for (int i = 0; i < BC7EncodingPlan::kNumRGBAShapes; i++) + { + if (encodingPlan.seedPointsForShapeRGBA[i] > 0) + { + encodingPlan.rgbaShapeList[encodingPlan.rgbaNumShapesToEvaluate] = i; + encodingPlan.rgbaNumShapesToEvaluate++; + } + } + + encodingPlan.mode7RGBPartitionEnabled = (encodingPlan.mode7RGBAPartitionEnabled & ~encodingPlan.mode3PartitionEnabled); + + return true; +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_BC67.h b/thirdparty/cvtt/ConvectionKernels_BC67.h new file mode 100644 index 0000000000..b929711187 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_BC67.h @@ -0,0 +1,99 @@ +#pragma once + +#include "ConvectionKernels_ParallelMath.h" + + +namespace cvtt +{ + namespace Tables + { + namespace BC7SC + { + struct Table; + } + } + + namespace Internal + { + namespace BC67 + { + struct WorkInfo; + } + + template<int TVectorSize> + class IndexSelectorHDR; + } + + struct PixelBlockU8; +} + +namespace cvtt +{ + namespace Internal + { + class BC7Computer + { + public: + static void Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds); + static void UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock); + + private: + static const int MaxTweakRounds = 4; + + typedef ParallelMath::SInt16 MSInt16; + typedef ParallelMath::UInt15 MUInt15; + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::SInt32 MSInt32; + typedef ParallelMath::Float MFloat; + + static void TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2]); + static void Quantize(MUInt15* color, int bits, int channels); + static void QuantizeP(MUInt15* color, int bits, uint16_t p, int channels); + static void Unquantize(MUInt15* color, int bits, int channels); + static void CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2]); + static void CompressEndpoints1(MUInt15 ep[2][4], uint16_t p); + static void CompressEndpoints2(MUInt15 ep[2][4]); + static void CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2]); + static void CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2]); + static void CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2]); + static void CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2]); + static void CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2]); + static void TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn); + static void TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn); + static void TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn); + + template<class T> + static void Swap(T& a, T& b); + }; + + + class BC6HComputer + { + public: + static void Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds); + static void UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned); + + private: + typedef ParallelMath::Float MFloat; + typedef ParallelMath::SInt16 MSInt16; + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::UInt15 MUInt15; + typedef ParallelMath::AInt16 MAInt16; + typedef ParallelMath::SInt32 MSInt32; + typedef ParallelMath::UInt31 MUInt31; + + static const int MaxTweakRounds = 4; + static const int MaxRefineRounds = 3; + + static MSInt16 QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru); + static MUInt15 QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru); + static void UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL); + static void UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished); + static void QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn); + static void QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn); + static void EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal); + static void EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal); + static void SignExtendSingle(int &v, int bits); + }; + } +} diff --git a/thirdparty/cvtt/ConvectionKernels_BC6H_IO.cpp b/thirdparty/cvtt/ConvectionKernels_BC6H_IO.cpp new file mode 100644 index 0000000000..753b6f9000 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_BC6H_IO.cpp @@ -0,0 +1,881 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------------------- + +Portions based on DirectX Texture Library (DirectXTex) + +Copyright (c) Microsoft Corporation. All rights reserved. +Licensed under the MIT License. + +http://go.microsoft.com/fwlink/?LinkId=248926 +*/ +#include "ConvectionKernels_Config.h" + +#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) + +#include "ConvectionKernels_BC6H_IO.h" + +namespace cvtt +{ + namespace BC6H_IO + { + void WriteMode0(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x3u) | ((gy >> 2) & 0x4u) | ((by >> 1) & 0x8u) | (bz & 0x10u) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u); + encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u); + } + + void WriteMode1(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x3u) | ((gy >> 3) & 0x4u) | ((gz >> 1) & 0x18u) | ((rw << 5) & 0xfe0u) | ((bz << 12) & 0x3000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x3f8000u) | ((by << 17) & 0x400000u) | ((bz << 21) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bz >> 3) & 0x1u) | ((bz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0x1f8u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x7e000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x1f800000u) | ((by << 29) & 0xe0000000u); + encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x7eu) | ((rz << 7) & 0x1f80u) | ((d << 13) & 0x3e000u); + } + + void WriteMode2(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0xf8u) | ((rw >> 2) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x1e000u) | ((gw << 7) & 0x20000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x7800000u) | ((bw << 17) & 0x8000000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u); + encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u); + } + + void WriteMode3(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x78u) | ((rw >> 3) & 0x80u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((gw << 8) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x7800000u) | ((bw << 17) & 0x8000000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u); + encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x1eu) | ((bz << 5) & 0x20u) | ((bz << 4) & 0x40u) | ((rz << 7) & 0x780u) | ((gy << 7) & 0x800u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u); + } + + void WriteMode4(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x78u) | ((rw >> 3) & 0x80u) | ((by << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x1e000u) | ((gw << 7) & 0x20000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bw << 18) & 0x10000000u) | ((by << 29) & 0xe0000000u); + encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x1eu) | ((bz << 4) & 0x60u) | ((rz << 7) & 0x780u) | ((bz << 7) & 0x800u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u); + } + + void WriteMode5(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x3fe0u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0xff8000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x3u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u); + encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u); + } + + void WriteMode6(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x1fe0u) | ((gz << 9) & 0x2000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x7f8000u) | ((bz << 21) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x1u) | ((bz >> 2) & 0x6u) | ((rx << 3) & 0x1f8u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u); + encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x7eu) | ((rz << 7) & 0x1f80u) | ((d << 13) & 0x3e000u); + } + + void WriteMode7(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x1fe0u) | ((bz << 13) & 0x2000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x7f8000u) | ((gy << 18) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x1u) | ((gz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x7e000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u); + encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u); + } + + void WriteMode8(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x1fe0u) | ((bz << 12) & 0x2000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x7f8000u) | ((by << 18) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x1u) | ((bz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x1f800000u) | ((by << 29) & 0xe0000000u); + encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u); + } + + void WriteMode9(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7e0u) | ((gz << 7) & 0x800u) | ((bz << 12) & 0x3000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x1f8000u) | ((gy << 16) & 0x200000u) | ((by << 17) & 0x400000u) | ((bz << 21) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0x7e000000u) | ((gz << 26) & 0x80000000u); + encoded[1] = ((bz >> 3) & 0x1u) | ((bz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0x1f8u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x7e000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x1f800000u) | ((by << 29) & 0xe0000000u); + encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x7eu) | ((rz << 7) & 0x1f80u) | ((d << 13) & 0x3e000u); + } + + void WriteMode10(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x1ff8u) | ((gx << 13) & 0x7fe000u) | ((bx << 23) & 0xff800000u); + encoded[2] = ((bx >> 9) & 0x1u); + } + + void WriteMode11(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0xff8u) | ((rw << 2) & 0x1000u) | ((gx << 13) & 0x3fe000u) | ((gw << 12) & 0x400000u) | ((bx << 23) & 0xff800000u); + encoded[2] = ((bw >> 10) & 0x1u); + } + + void WriteMode12(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x7f8u) | (rw & 0x800u) | ((rw << 2) & 0x1000u) | ((gx << 13) & 0x1fe000u) | ((gw << 10) & 0x200000u) | ((gw << 12) & 0x400000u) | ((bx << 23) & 0x7f800000u) | ((bw << 20) & 0x80000000u); + encoded[2] = ((bw >> 10) & 0x1u); + } + + void WriteMode13(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz) + { + encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u); + encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x78u) | ((rw >> 8) & 0x80u) | ((rw >> 6) & 0x100u) | ((rw >> 4) & 0x200u) | ((rw >> 2) & 0x400u) | (rw & 0x800u) | ((rw << 2) & 0x1000u) | ((gx << 13) & 0x1e000u) | ((gw << 2) & 0x20000u) | ((gw << 4) & 0x40000u) | ((gw << 6) & 0x80000u) | ((gw << 8) & 0x100000u) | ((gw << 10) & 0x200000u) | ((gw << 12) & 0x400000u) | ((bx << 23) & 0x7800000u) | ((bw << 12) & 0x8000000u) | ((bw << 14) & 0x10000000u) | ((bw << 16) & 0x20000000u) | ((bw << 18) & 0x40000000u) | ((bw << 20) & 0x80000000u); + encoded[2] = ((bw >> 10) & 0x1u); + } + + void ReadMode0(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + gy |= ((encoded[0] << 2) & 0x10u); + by |= ((encoded[0] << 1) & 0x10u); + bz |= (encoded[0] & 0x10u); + rw |= ((encoded[0] >> 5) & 0x3ffu); + gw |= ((encoded[0] >> 15) & 0x3ffu); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x380u); + rx |= ((encoded[1] >> 3) & 0x1fu); + gz |= ((encoded[1] >> 4) & 0x10u); + gy |= ((encoded[1] >> 9) & 0xfu); + gx |= ((encoded[1] >> 13) & 0x1fu); + bz |= ((encoded[1] >> 18) & 0x1u); + gz |= ((encoded[1] >> 19) & 0xfu); + bx |= ((encoded[1] >> 23) & 0x1fu); + bz |= ((encoded[1] >> 27) & 0x2u); + by |= ((encoded[1] >> 29) & 0x7u); + by |= ((encoded[2] << 3) & 0x8u); + ry |= ((encoded[2] >> 1) & 0x1fu); + bz |= ((encoded[2] >> 4) & 0x4u); + rz |= ((encoded[2] >> 7) & 0x1fu); + bz |= ((encoded[2] >> 9) & 0x8u); + d |= ((encoded[2] >> 13) & 0x1fu); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode1(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + gy |= ((encoded[0] << 3) & 0x20u); + gz |= ((encoded[0] << 1) & 0x30u); + rw |= ((encoded[0] >> 5) & 0x7fu); + bz |= ((encoded[0] >> 12) & 0x3u); + by |= ((encoded[0] >> 10) & 0x10u); + gw |= ((encoded[0] >> 15) & 0x7fu); + by |= ((encoded[0] >> 17) & 0x20u); + bz |= ((encoded[0] >> 21) & 0x4u); + gy |= ((encoded[0] >> 20) & 0x10u); + bw |= ((encoded[0] >> 25) & 0x7fu); + bz |= ((encoded[1] << 3) & 0x8u); + bz |= ((encoded[1] << 4) & 0x20u); + bz |= ((encoded[1] << 2) & 0x10u); + rx |= ((encoded[1] >> 3) & 0x3fu); + gy |= ((encoded[1] >> 9) & 0xfu); + gx |= ((encoded[1] >> 13) & 0x3fu); + gz |= ((encoded[1] >> 19) & 0xfu); + bx |= ((encoded[1] >> 23) & 0x3fu); + by |= ((encoded[1] >> 29) & 0x7u); + by |= ((encoded[2] << 3) & 0x8u); + ry |= ((encoded[2] >> 1) & 0x3fu); + rz |= ((encoded[2] >> 7) & 0x3fu); + d |= ((encoded[2] >> 13) & 0x1fu); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode2(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0x3ffu); + gw |= ((encoded[0] >> 15) & 0x3ffu); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x380u); + rx |= ((encoded[1] >> 3) & 0x1fu); + rw |= ((encoded[1] << 2) & 0x400u); + gy |= ((encoded[1] >> 9) & 0xfu); + gx |= ((encoded[1] >> 13) & 0xfu); + gw |= ((encoded[1] >> 7) & 0x400u); + bz |= ((encoded[1] >> 18) & 0x1u); + gz |= ((encoded[1] >> 19) & 0xfu); + bx |= ((encoded[1] >> 23) & 0xfu); + bw |= ((encoded[1] >> 17) & 0x400u); + bz |= ((encoded[1] >> 27) & 0x2u); + by |= ((encoded[1] >> 29) & 0x7u); + by |= ((encoded[2] << 3) & 0x8u); + ry |= ((encoded[2] >> 1) & 0x1fu); + bz |= ((encoded[2] >> 4) & 0x4u); + rz |= ((encoded[2] >> 7) & 0x1fu); + bz |= ((encoded[2] >> 9) & 0x8u); + d |= ((encoded[2] >> 13) & 0x1fu); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode3(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0x3ffu); + gw |= ((encoded[0] >> 15) & 0x3ffu); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x380u); + rx |= ((encoded[1] >> 3) & 0xfu); + rw |= ((encoded[1] << 3) & 0x400u); + gz |= ((encoded[1] >> 4) & 0x10u); + gy |= ((encoded[1] >> 9) & 0xfu); + gx |= ((encoded[1] >> 13) & 0x1fu); + gw |= ((encoded[1] >> 8) & 0x400u); + gz |= ((encoded[1] >> 19) & 0xfu); + bx |= ((encoded[1] >> 23) & 0xfu); + bw |= ((encoded[1] >> 17) & 0x400u); + bz |= ((encoded[1] >> 27) & 0x2u); + by |= ((encoded[1] >> 29) & 0x7u); + by |= ((encoded[2] << 3) & 0x8u); + ry |= ((encoded[2] >> 1) & 0xfu); + bz |= ((encoded[2] >> 5) & 0x1u); + bz |= ((encoded[2] >> 4) & 0x4u); + rz |= ((encoded[2] >> 7) & 0xfu); + gy |= ((encoded[2] >> 7) & 0x10u); + bz |= ((encoded[2] >> 9) & 0x8u); + d |= ((encoded[2] >> 13) & 0x1fu); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode4(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0x3ffu); + gw |= ((encoded[0] >> 15) & 0x3ffu); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x380u); + rx |= ((encoded[1] >> 3) & 0xfu); + rw |= ((encoded[1] << 3) & 0x400u); + by |= ((encoded[1] >> 4) & 0x10u); + gy |= ((encoded[1] >> 9) & 0xfu); + gx |= ((encoded[1] >> 13) & 0xfu); + gw |= ((encoded[1] >> 7) & 0x400u); + bz |= ((encoded[1] >> 18) & 0x1u); + gz |= ((encoded[1] >> 19) & 0xfu); + bx |= ((encoded[1] >> 23) & 0x1fu); + bw |= ((encoded[1] >> 18) & 0x400u); + by |= ((encoded[1] >> 29) & 0x7u); + by |= ((encoded[2] << 3) & 0x8u); + ry |= ((encoded[2] >> 1) & 0xfu); + bz |= ((encoded[2] >> 4) & 0x6u); + rz |= ((encoded[2] >> 7) & 0xfu); + bz |= ((encoded[2] >> 7) & 0x10u); + bz |= ((encoded[2] >> 9) & 0x8u); + d |= ((encoded[2] >> 13) & 0x1fu); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode5(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0x1ffu); + by |= ((encoded[0] >> 10) & 0x10u); + gw |= ((encoded[0] >> 15) & 0x1ffu); + gy |= ((encoded[0] >> 20) & 0x10u); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x180u); + bz |= ((encoded[1] << 2) & 0x10u); + rx |= ((encoded[1] >> 3) & 0x1fu); + gz |= ((encoded[1] >> 4) & 0x10u); + gy |= ((encoded[1] >> 9) & 0xfu); + gx |= ((encoded[1] >> 13) & 0x1fu); + bz |= ((encoded[1] >> 18) & 0x1u); + gz |= ((encoded[1] >> 19) & 0xfu); + bx |= ((encoded[1] >> 23) & 0x1fu); + bz |= ((encoded[1] >> 27) & 0x2u); + by |= ((encoded[1] >> 29) & 0x7u); + by |= ((encoded[2] << 3) & 0x8u); + ry |= ((encoded[2] >> 1) & 0x1fu); + bz |= ((encoded[2] >> 4) & 0x4u); + rz |= ((encoded[2] >> 7) & 0x1fu); + bz |= ((encoded[2] >> 9) & 0x8u); + d |= ((encoded[2] >> 13) & 0x1fu); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode6(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0xffu); + gz |= ((encoded[0] >> 9) & 0x10u); + by |= ((encoded[0] >> 10) & 0x10u); + gw |= ((encoded[0] >> 15) & 0xffu); + bz |= ((encoded[0] >> 21) & 0x4u); + gy |= ((encoded[0] >> 20) & 0x10u); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x80u); + bz |= ((encoded[1] << 2) & 0x18u); + rx |= ((encoded[1] >> 3) & 0x3fu); + gy |= ((encoded[1] >> 9) & 0xfu); + gx |= ((encoded[1] >> 13) & 0x1fu); + bz |= ((encoded[1] >> 18) & 0x1u); + gz |= ((encoded[1] >> 19) & 0xfu); + bx |= ((encoded[1] >> 23) & 0x1fu); + bz |= ((encoded[1] >> 27) & 0x2u); + by |= ((encoded[1] >> 29) & 0x7u); + by |= ((encoded[2] << 3) & 0x8u); + ry |= ((encoded[2] >> 1) & 0x3fu); + rz |= ((encoded[2] >> 7) & 0x3fu); + d |= ((encoded[2] >> 13) & 0x1fu); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode7(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0xffu); + bz |= ((encoded[0] >> 13) & 0x1u); + by |= ((encoded[0] >> 10) & 0x10u); + gw |= ((encoded[0] >> 15) & 0xffu); + gy |= ((encoded[0] >> 18) & 0x20u); + gy |= ((encoded[0] >> 20) & 0x10u); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x80u); + gz |= ((encoded[1] << 4) & 0x20u); + bz |= ((encoded[1] << 2) & 0x10u); + rx |= ((encoded[1] >> 3) & 0x1fu); + gz |= ((encoded[1] >> 4) & 0x10u); + gy |= ((encoded[1] >> 9) & 0xfu); + gx |= ((encoded[1] >> 13) & 0x3fu); + gz |= ((encoded[1] >> 19) & 0xfu); + bx |= ((encoded[1] >> 23) & 0x1fu); + bz |= ((encoded[1] >> 27) & 0x2u); + by |= ((encoded[1] >> 29) & 0x7u); + by |= ((encoded[2] << 3) & 0x8u); + ry |= ((encoded[2] >> 1) & 0x1fu); + bz |= ((encoded[2] >> 4) & 0x4u); + rz |= ((encoded[2] >> 7) & 0x1fu); + bz |= ((encoded[2] >> 9) & 0x8u); + d |= ((encoded[2] >> 13) & 0x1fu); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode8(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0xffu); + bz |= ((encoded[0] >> 12) & 0x2u); + by |= ((encoded[0] >> 10) & 0x10u); + gw |= ((encoded[0] >> 15) & 0xffu); + by |= ((encoded[0] >> 18) & 0x20u); + gy |= ((encoded[0] >> 20) & 0x10u); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x80u); + bz |= ((encoded[1] << 4) & 0x20u); + bz |= ((encoded[1] << 2) & 0x10u); + rx |= ((encoded[1] >> 3) & 0x1fu); + gz |= ((encoded[1] >> 4) & 0x10u); + gy |= ((encoded[1] >> 9) & 0xfu); + gx |= ((encoded[1] >> 13) & 0x1fu); + bz |= ((encoded[1] >> 18) & 0x1u); + gz |= ((encoded[1] >> 19) & 0xfu); + bx |= ((encoded[1] >> 23) & 0x3fu); + by |= ((encoded[1] >> 29) & 0x7u); + by |= ((encoded[2] << 3) & 0x8u); + ry |= ((encoded[2] >> 1) & 0x1fu); + bz |= ((encoded[2] >> 4) & 0x4u); + rz |= ((encoded[2] >> 7) & 0x1fu); + bz |= ((encoded[2] >> 9) & 0x8u); + d |= ((encoded[2] >> 13) & 0x1fu); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode9(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0x3fu); + gz |= ((encoded[0] >> 7) & 0x10u); + bz |= ((encoded[0] >> 12) & 0x3u); + by |= ((encoded[0] >> 10) & 0x10u); + gw |= ((encoded[0] >> 15) & 0x3fu); + gy |= ((encoded[0] >> 16) & 0x20u); + by |= ((encoded[0] >> 17) & 0x20u); + bz |= ((encoded[0] >> 21) & 0x4u); + gy |= ((encoded[0] >> 20) & 0x10u); + bw |= ((encoded[0] >> 25) & 0x3fu); + gz |= ((encoded[0] >> 26) & 0x20u); + bz |= ((encoded[1] << 3) & 0x8u); + bz |= ((encoded[1] << 4) & 0x20u); + bz |= ((encoded[1] << 2) & 0x10u); + rx |= ((encoded[1] >> 3) & 0x3fu); + gy |= ((encoded[1] >> 9) & 0xfu); + gx |= ((encoded[1] >> 13) & 0x3fu); + gz |= ((encoded[1] >> 19) & 0xfu); + bx |= ((encoded[1] >> 23) & 0x3fu); + by |= ((encoded[1] >> 29) & 0x7u); + by |= ((encoded[2] << 3) & 0x8u); + ry |= ((encoded[2] >> 1) & 0x3fu); + rz |= ((encoded[2] >> 7) & 0x3fu); + d |= ((encoded[2] >> 13) & 0x1fu); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode10(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0x3ffu); + gw |= ((encoded[0] >> 15) & 0x3ffu); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x380u); + rx |= ((encoded[1] >> 3) & 0x3ffu); + gx |= ((encoded[1] >> 13) & 0x3ffu); + bx |= ((encoded[1] >> 23) & 0x1ffu); + bx |= ((encoded[2] << 9) & 0x200u); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode11(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0x3ffu); + gw |= ((encoded[0] >> 15) & 0x3ffu); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x380u); + rx |= ((encoded[1] >> 3) & 0x1ffu); + rw |= ((encoded[1] >> 2) & 0x400u); + gx |= ((encoded[1] >> 13) & 0x1ffu); + gw |= ((encoded[1] >> 12) & 0x400u); + bx |= ((encoded[1] >> 23) & 0x1ffu); + bw |= ((encoded[2] << 10) & 0x400u); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode12(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0x3ffu); + gw |= ((encoded[0] >> 15) & 0x3ffu); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x380u); + rx |= ((encoded[1] >> 3) & 0xffu); + rw |= (encoded[1] & 0x800u); + rw |= ((encoded[1] >> 2) & 0x400u); + gx |= ((encoded[1] >> 13) & 0xffu); + gw |= ((encoded[1] >> 10) & 0x800u); + gw |= ((encoded[1] >> 12) & 0x400u); + bx |= ((encoded[1] >> 23) & 0xffu); + bw |= ((encoded[1] >> 20) & 0x800u); + bw |= ((encoded[2] << 10) & 0x400u); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + void ReadMode13(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ) + { + uint16_t d = 0; + uint16_t rw = 0; + uint16_t rx = 0; + uint16_t ry = 0; + uint16_t rz = 0; + uint16_t gw = 0; + uint16_t gx = 0; + uint16_t gy = 0; + uint16_t gz = 0; + uint16_t bw = 0; + uint16_t bx = 0; + uint16_t by = 0; + uint16_t bz = 0; + rw |= ((encoded[0] >> 5) & 0x3ffu); + gw |= ((encoded[0] >> 15) & 0x3ffu); + bw |= ((encoded[0] >> 25) & 0x7fu); + bw |= ((encoded[1] << 7) & 0x380u); + rx |= ((encoded[1] >> 3) & 0xfu); + rw |= ((encoded[1] << 8) & 0x8000u); + rw |= ((encoded[1] << 6) & 0x4000u); + rw |= ((encoded[1] << 4) & 0x2000u); + rw |= ((encoded[1] << 2) & 0x1000u); + rw |= (encoded[1] & 0x800u); + rw |= ((encoded[1] >> 2) & 0x400u); + gx |= ((encoded[1] >> 13) & 0xfu); + gw |= ((encoded[1] >> 2) & 0x8000u); + gw |= ((encoded[1] >> 4) & 0x4000u); + gw |= ((encoded[1] >> 6) & 0x2000u); + gw |= ((encoded[1] >> 8) & 0x1000u); + gw |= ((encoded[1] >> 10) & 0x800u); + gw |= ((encoded[1] >> 12) & 0x400u); + bx |= ((encoded[1] >> 23) & 0xfu); + bw |= ((encoded[1] >> 12) & 0x8000u); + bw |= ((encoded[1] >> 14) & 0x4000u); + bw |= ((encoded[1] >> 16) & 0x2000u); + bw |= ((encoded[1] >> 18) & 0x1000u); + bw |= ((encoded[1] >> 20) & 0x800u); + bw |= ((encoded[2] << 10) & 0x400u); + outD = d; + outRW = rw; + outRX = rx; + outRY = ry; + outRZ = rz; + outGW = gw; + outGX = gx; + outGY = gy; + outGZ = gz; + outBW = bw; + outBX = bx; + outBY = by; + outBZ = bz; + } + + const ReadFunc_t g_readFuncs[14] = + { + ReadMode0, + ReadMode1, + ReadMode2, + ReadMode3, + ReadMode4, + ReadMode5, + ReadMode6, + ReadMode7, + ReadMode8, + ReadMode9, + ReadMode10, + ReadMode11, + ReadMode12, + ReadMode13 + }; + + const WriteFunc_t g_writeFuncs[14] = + { + WriteMode0, + WriteMode1, + WriteMode2, + WriteMode3, + WriteMode4, + WriteMode5, + WriteMode6, + WriteMode7, + WriteMode8, + WriteMode9, + WriteMode10, + WriteMode11, + WriteMode12, + WriteMode13 + }; + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_BC6H_IO.h b/thirdparty/cvtt/ConvectionKernels_BC6H_IO.h new file mode 100644 index 0000000000..a7bb517b54 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_BC6H_IO.h @@ -0,0 +1,16 @@ +#pragma once + +#include <stdint.h> +#include "ConvectionKernels_BC6H_IO.h" + +namespace cvtt +{ + namespace BC6H_IO + { + typedef void (*ReadFunc_t)(const uint32_t *encoded, uint16_t &d, uint16_t &rw, uint16_t &rx, uint16_t &ry, uint16_t &rz, uint16_t &gw, uint16_t &gx, uint16_t &gy, uint16_t &gz, uint16_t &bw, uint16_t &bx, uint16_t &by, uint16_t &bz); + typedef void (*WriteFunc_t)(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz); + + extern const ReadFunc_t g_readFuncs[14]; + extern const WriteFunc_t g_writeFuncs[14]; + } +} diff --git a/thirdparty/cvtt/ConvectionKernels_BC7_Prio.h b/thirdparty/cvtt/ConvectionKernels_BC7_Prio.h new file mode 100644 index 0000000000..1880e22d0f --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_BC7_Prio.h @@ -0,0 +1,17 @@ +#pragma once + +#include <stdint.h> + +namespace cvtt { namespace Tables { namespace BC7Prio { + extern const uint16_t *g_bc7PrioCodesRGB; + extern const int g_bc7NumPrioCodesRGB; + + extern const uint16_t *g_bc7PrioCodesRGBA; + extern const int g_bc7NumPrioCodesRGBA; + + int UnpackMode(uint16_t packed); + int UnpackSeedPointCount(uint16_t packed); + int UnpackPartition(uint16_t packed); + int UnpackRotation(uint16_t packed); + int UnpackIndexSelector(uint16_t packed); +}}} diff --git a/thirdparty/cvtt/ConvectionKernels_BC7_PrioData.cpp b/thirdparty/cvtt/ConvectionKernels_BC7_PrioData.cpp new file mode 100644 index 0000000000..5b3134f860 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_BC7_PrioData.cpp @@ -0,0 +1,1301 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------------------- + +Portions based on DirectX Texture Library (DirectXTex) + +Copyright (c) Microsoft Corporation. All rights reserved. +Licensed under the MIT License. + +http://go.microsoft.com/fwlink/?LinkId=248926 +*/ +#include "ConvectionKernels_Config.h" + +#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) + +#include "ConvectionKernels_BC7_Prio.h" + +#define BC7_PARTITION_BITS 6 +#define BC7_PARTITION_OFFSET_BITS 0 + +#define BC7_ROTATION_BITS 2 +#define BC7_ROTATION_OFFSET_BITS 0 + +#define BC7_INDEX_MODE_BITS 1 +#define BC7_INDEX_MODE_OFFSET_BITS (BC7_ROTATION_OFFSET_BITS + BC7_ROTATION_BITS) + +#define BC7_MODE_BITS 3 +#define BC7_MODE_OFFSET_BITS (BC7_PARTITION_OFFSET_BITS + BC7_PARTITION_BITS) +#define BC7_SEED_POINT_COUNT_BITS 2 +#define BC7_SEED_POINT_COUNT_OFFSET_BITS (BC7_MODE_BITS + BC7_MODE_OFFSET_BITS) + + + +#define BC7_MODE_PRIO_DUAL_PLANE(subData) \ + ( \ + ((subData / 10) << BC7_ROTATION_OFFSET_BITS) | \ + ((subData % 10) << BC7_INDEX_MODE_OFFSET_BITS) \ + ) + +#define BC7_MODE_PRIO_CODE(seedPointCount, mode, subData) \ + (\ + ((seedPointCount - 1) << BC7_SEED_POINT_COUNT_OFFSET_BITS) | \ + (mode << BC7_MODE_OFFSET_BITS) | \ + ((mode == 4 || mode == 5) ? BC7_MODE_PRIO_DUAL_PLANE(subData) : (subData << BC7_PARTITION_OFFSET_BITS)) \ + ) + +namespace cvtt { namespace Tables { namespace BC7Prio { + const uint16_t g_bc7PrioCodesRGBData[] = + { + BC7_MODE_PRIO_CODE(1, 1, 13), + BC7_MODE_PRIO_CODE(1, 1, 0), + BC7_MODE_PRIO_CODE(1, 0, 3), + BC7_MODE_PRIO_CODE(1, 0, 1), + BC7_MODE_PRIO_CODE(1, 6, 0), + BC7_MODE_PRIO_CODE(1, 0, 9), + BC7_MODE_PRIO_CODE(1, 1, 6), + BC7_MODE_PRIO_CODE(1, 1, 1), + BC7_MODE_PRIO_CODE(1, 1, 2), + BC7_MODE_PRIO_CODE(1, 0, 15), + BC7_MODE_PRIO_CODE(1, 1, 7), + BC7_MODE_PRIO_CODE(1, 1, 16), + BC7_MODE_PRIO_CODE(1, 1, 15), + BC7_MODE_PRIO_CODE(1, 1, 14), + BC7_MODE_PRIO_CODE(1, 0, 13), + BC7_MODE_PRIO_CODE(1, 0, 14), + BC7_MODE_PRIO_CODE(1, 0, 11), + BC7_MODE_PRIO_CODE(1, 1, 22), + BC7_MODE_PRIO_CODE(1, 0, 8), + BC7_MODE_PRIO_CODE(1, 0, 10), + BC7_MODE_PRIO_CODE(1, 1, 8), + BC7_MODE_PRIO_CODE(1, 3, 13), + BC7_MODE_PRIO_CODE(1, 1, 19), + BC7_MODE_PRIO_CODE(1, 4, 31), + BC7_MODE_PRIO_CODE(1, 1, 10), + BC7_MODE_PRIO_CODE(1, 1, 23), + BC7_MODE_PRIO_CODE(1, 1, 3), + BC7_MODE_PRIO_CODE(2, 1, 13), + BC7_MODE_PRIO_CODE(1, 1, 9), + BC7_MODE_PRIO_CODE(2, 1, 0), + BC7_MODE_PRIO_CODE(1, 1, 20), + BC7_MODE_PRIO_CODE(1, 1, 21), + BC7_MODE_PRIO_CODE(1, 4, 11), + BC7_MODE_PRIO_CODE(1, 1, 29), + BC7_MODE_PRIO_CODE(1, 1, 26), + BC7_MODE_PRIO_CODE(1, 5, 30), + BC7_MODE_PRIO_CODE(1, 0, 4), + BC7_MODE_PRIO_CODE(2, 6, 0), + BC7_MODE_PRIO_CODE(1, 0, 0), + BC7_MODE_PRIO_CODE(2, 0, 10), + BC7_MODE_PRIO_CODE(3, 6, 0), + BC7_MODE_PRIO_CODE(1, 1, 11), + BC7_MODE_PRIO_CODE(1, 4, 10), + BC7_MODE_PRIO_CODE(2, 0, 8), + BC7_MODE_PRIO_CODE(2, 0, 11), + BC7_MODE_PRIO_CODE(2, 0, 13), + BC7_MODE_PRIO_CODE(1, 1, 4), + BC7_MODE_PRIO_CODE(3, 1, 13), + BC7_MODE_PRIO_CODE(1, 1, 12), + BC7_MODE_PRIO_CODE(1, 1, 18), + BC7_MODE_PRIO_CODE(1, 3, 0), + BC7_MODE_PRIO_CODE(1, 0, 5), + BC7_MODE_PRIO_CODE(1, 1, 17), + BC7_MODE_PRIO_CODE(1, 1, 25), + BC7_MODE_PRIO_CODE(1, 0, 7), + BC7_MODE_PRIO_CODE(3, 0, 10), + BC7_MODE_PRIO_CODE(1, 1, 5), + BC7_MODE_PRIO_CODE(2, 1, 10), + BC7_MODE_PRIO_CODE(1, 1, 24), + BC7_MODE_PRIO_CODE(3, 0, 8), + BC7_MODE_PRIO_CODE(3, 1, 0), + BC7_MODE_PRIO_CODE(2, 1, 15), + BC7_MODE_PRIO_CODE(2, 1, 14), + BC7_MODE_PRIO_CODE(3, 0, 13), + BC7_MODE_PRIO_CODE(3, 0, 11), + BC7_MODE_PRIO_CODE(2, 1, 16), + BC7_MODE_PRIO_CODE(2, 0, 14), + BC7_MODE_PRIO_CODE(2, 1, 3), + BC7_MODE_PRIO_CODE(4, 0, 10), + BC7_MODE_PRIO_CODE(2, 1, 1), + BC7_MODE_PRIO_CODE(1, 0, 2), + BC7_MODE_PRIO_CODE(2, 1, 2), + BC7_MODE_PRIO_CODE(4, 0, 8), + BC7_MODE_PRIO_CODE(1, 0, 12), + BC7_MODE_PRIO_CODE(4, 1, 13), + BC7_MODE_PRIO_CODE(1, 5, 10), + BC7_MODE_PRIO_CODE(2, 0, 15), + BC7_MODE_PRIO_CODE(1, 0, 6), + BC7_MODE_PRIO_CODE(1, 1, 35), + BC7_MODE_PRIO_CODE(2, 1, 23), + BC7_MODE_PRIO_CODE(4, 0, 13), + BC7_MODE_PRIO_CODE(4, 0, 11), + BC7_MODE_PRIO_CODE(1, 2, 17), + BC7_MODE_PRIO_CODE(2, 1, 6), + BC7_MODE_PRIO_CODE(2, 1, 7), + BC7_MODE_PRIO_CODE(4, 6, 0), + BC7_MODE_PRIO_CODE(1, 2, 16), + BC7_MODE_PRIO_CODE(2, 1, 19), + BC7_MODE_PRIO_CODE(1, 1, 30), + BC7_MODE_PRIO_CODE(2, 3, 13), + BC7_MODE_PRIO_CODE(3, 0, 14), + BC7_MODE_PRIO_CODE(2, 1, 29), + BC7_MODE_PRIO_CODE(2, 1, 21), + BC7_MODE_PRIO_CODE(4, 1, 0), + BC7_MODE_PRIO_CODE(3, 0, 15), + BC7_MODE_PRIO_CODE(2, 0, 3), + BC7_MODE_PRIO_CODE(1, 1, 28), + BC7_MODE_PRIO_CODE(1, 4, 30), + BC7_MODE_PRIO_CODE(2, 0, 4), + BC7_MODE_PRIO_CODE(1, 2, 63), + BC7_MODE_PRIO_CODE(4, 0, 14), + BC7_MODE_PRIO_CODE(2, 1, 26), + BC7_MODE_PRIO_CODE(2, 0, 1), + BC7_MODE_PRIO_CODE(3, 0, 3), + BC7_MODE_PRIO_CODE(1, 1, 61), + BC7_MODE_PRIO_CODE(2, 0, 7), + BC7_MODE_PRIO_CODE(2, 0, 5), + BC7_MODE_PRIO_CODE(3, 1, 10), + BC7_MODE_PRIO_CODE(2, 4, 31), + BC7_MODE_PRIO_CODE(2, 0, 9), + BC7_MODE_PRIO_CODE(2, 1, 11), + BC7_MODE_PRIO_CODE(4, 0, 15), + BC7_MODE_PRIO_CODE(3, 1, 14), + BC7_MODE_PRIO_CODE(2, 0, 0), + BC7_MODE_PRIO_CODE(3, 1, 15), + BC7_MODE_PRIO_CODE(2, 3, 0), + BC7_MODE_PRIO_CODE(3, 0, 1), + BC7_MODE_PRIO_CODE(1, 1, 60), + BC7_MODE_PRIO_CODE(2, 1, 12), + BC7_MODE_PRIO_CODE(3, 1, 1), + BC7_MODE_PRIO_CODE(3, 0, 5), + BC7_MODE_PRIO_CODE(1, 1, 27), + BC7_MODE_PRIO_CODE(2, 1, 18), + BC7_MODE_PRIO_CODE(3, 0, 9), + BC7_MODE_PRIO_CODE(3, 1, 3), + BC7_MODE_PRIO_CODE(2, 0, 2), + BC7_MODE_PRIO_CODE(3, 1, 16), + BC7_MODE_PRIO_CODE(3, 1, 2), + BC7_MODE_PRIO_CODE(1, 1, 31), + BC7_MODE_PRIO_CODE(3, 0, 7), + BC7_MODE_PRIO_CODE(2, 1, 17), + BC7_MODE_PRIO_CODE(1, 5, 20), + BC7_MODE_PRIO_CODE(2, 1, 4), + BC7_MODE_PRIO_CODE(1, 1, 62), + BC7_MODE_PRIO_CODE(2, 0, 12), + BC7_MODE_PRIO_CODE(3, 0, 4), + BC7_MODE_PRIO_CODE(4, 0, 4), + BC7_MODE_PRIO_CODE(1, 1, 33), + BC7_MODE_PRIO_CODE(3, 1, 23), + BC7_MODE_PRIO_CODE(2, 1, 5), + BC7_MODE_PRIO_CODE(2, 0, 6), + BC7_MODE_PRIO_CODE(2, 1, 24), + BC7_MODE_PRIO_CODE(1, 1, 59), + BC7_MODE_PRIO_CODE(1, 1, 63), + BC7_MODE_PRIO_CODE(3, 0, 0), + BC7_MODE_PRIO_CODE(1, 1, 52), + BC7_MODE_PRIO_CODE(4, 0, 7), + BC7_MODE_PRIO_CODE(2, 1, 22), + BC7_MODE_PRIO_CODE(4, 0, 3), + BC7_MODE_PRIO_CODE(1, 2, 10), + BC7_MODE_PRIO_CODE(3, 1, 7), + BC7_MODE_PRIO_CODE(4, 0, 9), + BC7_MODE_PRIO_CODE(2, 1, 8), + BC7_MODE_PRIO_CODE(4, 0, 1), + BC7_MODE_PRIO_CODE(3, 0, 12), + BC7_MODE_PRIO_CODE(4, 0, 5), + BC7_MODE_PRIO_CODE(3, 1, 6), + BC7_MODE_PRIO_CODE(4, 1, 14), + BC7_MODE_PRIO_CODE(1, 3, 15), + BC7_MODE_PRIO_CODE(1, 1, 56), + BC7_MODE_PRIO_CODE(3, 0, 6), + BC7_MODE_PRIO_CODE(3, 0, 2), + BC7_MODE_PRIO_CODE(1, 1, 32), + BC7_MODE_PRIO_CODE(4, 1, 10), + BC7_MODE_PRIO_CODE(1, 2, 8), + BC7_MODE_PRIO_CODE(2, 1, 9), + BC7_MODE_PRIO_CODE(1, 2, 18), + BC7_MODE_PRIO_CODE(4, 1, 15), + BC7_MODE_PRIO_CODE(4, 0, 6), + BC7_MODE_PRIO_CODE(3, 1, 29), + BC7_MODE_PRIO_CODE(2, 1, 25), + BC7_MODE_PRIO_CODE(3, 4, 31), + BC7_MODE_PRIO_CODE(3, 3, 13), + BC7_MODE_PRIO_CODE(4, 0, 0), + BC7_MODE_PRIO_CODE(3, 1, 19), + BC7_MODE_PRIO_CODE(4, 0, 12), + BC7_MODE_PRIO_CODE(4, 1, 1), + BC7_MODE_PRIO_CODE(4, 0, 2), + BC7_MODE_PRIO_CODE(1, 3, 2), + BC7_MODE_PRIO_CODE(1, 2, 13), + BC7_MODE_PRIO_CODE(1, 1, 58), + BC7_MODE_PRIO_CODE(1, 3, 14), + BC7_MODE_PRIO_CODE(4, 1, 3), + BC7_MODE_PRIO_CODE(3, 1, 21), + BC7_MODE_PRIO_CODE(2, 2, 8), + BC7_MODE_PRIO_CODE(1, 2, 19), + BC7_MODE_PRIO_CODE(4, 1, 16), + BC7_MODE_PRIO_CODE(4, 1, 2), + BC7_MODE_PRIO_CODE(2, 2, 16), + BC7_MODE_PRIO_CODE(2, 2, 10), + BC7_MODE_PRIO_CODE(2, 1, 20), + BC7_MODE_PRIO_CODE(1, 2, 11), + BC7_MODE_PRIO_CODE(1, 1, 54), + BC7_MODE_PRIO_CODE(1, 1, 47), + BC7_MODE_PRIO_CODE(1, 3, 1), + BC7_MODE_PRIO_CODE(1, 2, 21), + BC7_MODE_PRIO_CODE(1, 2, 62), + BC7_MODE_PRIO_CODE(2, 2, 11), + BC7_MODE_PRIO_CODE(3, 1, 26), + BC7_MODE_PRIO_CODE(1, 1, 53), + BC7_MODE_PRIO_CODE(2, 1, 35), + BC7_MODE_PRIO_CODE(2, 2, 13), + BC7_MODE_PRIO_CODE(4, 1, 23), + BC7_MODE_PRIO_CODE(4, 1, 6), + BC7_MODE_PRIO_CODE(4, 1, 7), + BC7_MODE_PRIO_CODE(1, 2, 25), + BC7_MODE_PRIO_CODE(1, 1, 57), + BC7_MODE_PRIO_CODE(2, 1, 60), + BC7_MODE_PRIO_CODE(1, 2, 20), + BC7_MODE_PRIO_CODE(3, 1, 8), + BC7_MODE_PRIO_CODE(4, 1, 29), + BC7_MODE_PRIO_CODE(4, 1, 19), + BC7_MODE_PRIO_CODE(3, 2, 8), + BC7_MODE_PRIO_CODE(2, 4, 11), + BC7_MODE_PRIO_CODE(4, 1, 21), + BC7_MODE_PRIO_CODE(3, 2, 10), + BC7_MODE_PRIO_CODE(2, 1, 61), + BC7_MODE_PRIO_CODE(2, 1, 30), + BC7_MODE_PRIO_CODE(3, 1, 12), + BC7_MODE_PRIO_CODE(3, 1, 11), + BC7_MODE_PRIO_CODE(2, 1, 63), + BC7_MODE_PRIO_CODE(2, 3, 1), + BC7_MODE_PRIO_CODE(2, 1, 28), + BC7_MODE_PRIO_CODE(2, 1, 62), + BC7_MODE_PRIO_CODE(3, 2, 13), + BC7_MODE_PRIO_CODE(2, 2, 63), + BC7_MODE_PRIO_CODE(2, 1, 33), + BC7_MODE_PRIO_CODE(2, 4, 10), + BC7_MODE_PRIO_CODE(3, 1, 18), + BC7_MODE_PRIO_CODE(2, 5, 30), + BC7_MODE_PRIO_CODE(3, 1, 5), + BC7_MODE_PRIO_CODE(2, 2, 17), + BC7_MODE_PRIO_CODE(1, 1, 55), + BC7_MODE_PRIO_CODE(3, 1, 17), + BC7_MODE_PRIO_CODE(2, 3, 2), + BC7_MODE_PRIO_CODE(1, 4, 21), + BC7_MODE_PRIO_CODE(3, 2, 11), + BC7_MODE_PRIO_CODE(4, 1, 11), + BC7_MODE_PRIO_CODE(2, 1, 27), + BC7_MODE_PRIO_CODE(1, 2, 59), + BC7_MODE_PRIO_CODE(4, 1, 26), + BC7_MODE_PRIO_CODE(3, 1, 9), + BC7_MODE_PRIO_CODE(2, 3, 14), + BC7_MODE_PRIO_CODE(3, 1, 4), + BC7_MODE_PRIO_CODE(3, 1, 24), + BC7_MODE_PRIO_CODE(3, 1, 25), + BC7_MODE_PRIO_CODE(3, 3, 0), + BC7_MODE_PRIO_CODE(3, 4, 11), + BC7_MODE_PRIO_CODE(4, 1, 12), + BC7_MODE_PRIO_CODE(2, 1, 32), + BC7_MODE_PRIO_CODE(2, 3, 15), + BC7_MODE_PRIO_CODE(4, 2, 10), + BC7_MODE_PRIO_CODE(1, 2, 60), + BC7_MODE_PRIO_CODE(1, 2, 32), + BC7_MODE_PRIO_CODE(1, 1, 40), + BC7_MODE_PRIO_CODE(4, 1, 18), + BC7_MODE_PRIO_CODE(2, 1, 59), + BC7_MODE_PRIO_CODE(4, 1, 5), + BC7_MODE_PRIO_CODE(3, 1, 22), + BC7_MODE_PRIO_CODE(3, 2, 16), + BC7_MODE_PRIO_CODE(3, 1, 20), + BC7_MODE_PRIO_CODE(4, 1, 4), + BC7_MODE_PRIO_CODE(2, 1, 31), + BC7_MODE_PRIO_CODE(4, 1, 17), + BC7_MODE_PRIO_CODE(1, 2, 24), + BC7_MODE_PRIO_CODE(4, 1, 24), + BC7_MODE_PRIO_CODE(2, 1, 58), + BC7_MODE_PRIO_CODE(4, 2, 8), + BC7_MODE_PRIO_CODE(1, 2, 22), + BC7_MODE_PRIO_CODE(1, 2, 23), + BC7_MODE_PRIO_CODE(1, 3, 10), + BC7_MODE_PRIO_CODE(1, 1, 41), + BC7_MODE_PRIO_CODE(2, 2, 18), + BC7_MODE_PRIO_CODE(4, 1, 25), + BC7_MODE_PRIO_CODE(3, 1, 61), + BC7_MODE_PRIO_CODE(1, 3, 29), + BC7_MODE_PRIO_CODE(1, 2, 57), + BC7_MODE_PRIO_CODE(2, 2, 19), + BC7_MODE_PRIO_CODE(1, 2, 53), + BC7_MODE_PRIO_CODE(1, 2, 55), + BC7_MODE_PRIO_CODE(3, 2, 63), + BC7_MODE_PRIO_CODE(3, 1, 60), + BC7_MODE_PRIO_CODE(4, 1, 8), + BC7_MODE_PRIO_CODE(2, 1, 56), + BC7_MODE_PRIO_CODE(3, 1, 35), + BC7_MODE_PRIO_CODE(4, 4, 31), + BC7_MODE_PRIO_CODE(4, 1, 9), + BC7_MODE_PRIO_CODE(1, 1, 46), + BC7_MODE_PRIO_CODE(1, 2, 58), + BC7_MODE_PRIO_CODE(2, 3, 29), + BC7_MODE_PRIO_CODE(1, 1, 45), + BC7_MODE_PRIO_CODE(4, 2, 13), + BC7_MODE_PRIO_CODE(1, 1, 42), + BC7_MODE_PRIO_CODE(1, 3, 3), + BC7_MODE_PRIO_CODE(4, 2, 11), + BC7_MODE_PRIO_CODE(3, 1, 63), + BC7_MODE_PRIO_CODE(3, 1, 30), + BC7_MODE_PRIO_CODE(1, 1, 36), + BC7_MODE_PRIO_CODE(3, 1, 62), + BC7_MODE_PRIO_CODE(1, 1, 43), + BC7_MODE_PRIO_CODE(1, 3, 21), + BC7_MODE_PRIO_CODE(3, 2, 17), + BC7_MODE_PRIO_CODE(1, 2, 14), + BC7_MODE_PRIO_CODE(1, 1, 48), + BC7_MODE_PRIO_CODE(2, 1, 57), + BC7_MODE_PRIO_CODE(2, 1, 52), + BC7_MODE_PRIO_CODE(1, 2, 61), + BC7_MODE_PRIO_CODE(3, 1, 33), + BC7_MODE_PRIO_CODE(1, 1, 51), + BC7_MODE_PRIO_CODE(4, 1, 20), + BC7_MODE_PRIO_CODE(1, 3, 8), + BC7_MODE_PRIO_CODE(4, 1, 22), + BC7_MODE_PRIO_CODE(1, 3, 19), + BC7_MODE_PRIO_CODE(1, 2, 36), + BC7_MODE_PRIO_CODE(2, 5, 10), + BC7_MODE_PRIO_CODE(3, 1, 28), + BC7_MODE_PRIO_CODE(2, 2, 14), + BC7_MODE_PRIO_CODE(1, 1, 49), + BC7_MODE_PRIO_CODE(1, 2, 33), + BC7_MODE_PRIO_CODE(1, 3, 9), + BC7_MODE_PRIO_CODE(2, 2, 20), + BC7_MODE_PRIO_CODE(1, 3, 26), + BC7_MODE_PRIO_CODE(2, 1, 53), + BC7_MODE_PRIO_CODE(4, 3, 13), + BC7_MODE_PRIO_CODE(2, 2, 21), + BC7_MODE_PRIO_CODE(3, 4, 10), + BC7_MODE_PRIO_CODE(4, 1, 60), + BC7_MODE_PRIO_CODE(2, 1, 54), + BC7_MODE_PRIO_CODE(1, 2, 29), + BC7_MODE_PRIO_CODE(2, 1, 47), + BC7_MODE_PRIO_CODE(1, 2, 52), + BC7_MODE_PRIO_CODE(3, 1, 32), + BC7_MODE_PRIO_CODE(1, 2, 40), + BC7_MODE_PRIO_CODE(1, 2, 31), + BC7_MODE_PRIO_CODE(3, 1, 27), + BC7_MODE_PRIO_CODE(3, 2, 18), + BC7_MODE_PRIO_CODE(2, 3, 10), + BC7_MODE_PRIO_CODE(2, 1, 55), + BC7_MODE_PRIO_CODE(4, 1, 61), + BC7_MODE_PRIO_CODE(3, 2, 14), + BC7_MODE_PRIO_CODE(3, 1, 31), + BC7_MODE_PRIO_CODE(1, 2, 34), + BC7_MODE_PRIO_CODE(3, 2, 19), + BC7_MODE_PRIO_CODE(2, 3, 21), + BC7_MODE_PRIO_CODE(2, 4, 30), + BC7_MODE_PRIO_CODE(1, 2, 15), + BC7_MODE_PRIO_CODE(2, 3, 26), + BC7_MODE_PRIO_CODE(1, 2, 28), + BC7_MODE_PRIO_CODE(4, 2, 16), + BC7_MODE_PRIO_CODE(2, 2, 15), + BC7_MODE_PRIO_CODE(2, 1, 40), + BC7_MODE_PRIO_CODE(2, 2, 22), + BC7_MODE_PRIO_CODE(4, 1, 33), + BC7_MODE_PRIO_CODE(1, 3, 7), + BC7_MODE_PRIO_CODE(1, 1, 50), + BC7_MODE_PRIO_CODE(2, 1, 41), + BC7_MODE_PRIO_CODE(1, 2, 9), + BC7_MODE_PRIO_CODE(1, 2, 39), + BC7_MODE_PRIO_CODE(2, 2, 25), + BC7_MODE_PRIO_CODE(1, 3, 6), + BC7_MODE_PRIO_CODE(3, 2, 21), + BC7_MODE_PRIO_CODE(1, 1, 37), + BC7_MODE_PRIO_CODE(2, 2, 58), + BC7_MODE_PRIO_CODE(3, 3, 29), + BC7_MODE_PRIO_CODE(4, 1, 62), + BC7_MODE_PRIO_CODE(1, 2, 35), + BC7_MODE_PRIO_CODE(3, 1, 59), + BC7_MODE_PRIO_CODE(4, 1, 28), + BC7_MODE_PRIO_CODE(1, 3, 23), + BC7_MODE_PRIO_CODE(4, 1, 30), + BC7_MODE_PRIO_CODE(2, 1, 45), + BC7_MODE_PRIO_CODE(1, 3, 16), + BC7_MODE_PRIO_CODE(4, 1, 35), + BC7_MODE_PRIO_CODE(2, 1, 46), + BC7_MODE_PRIO_CODE(1, 2, 38), + BC7_MODE_PRIO_CODE(4, 1, 63), + BC7_MODE_PRIO_CODE(1, 3, 22), + BC7_MODE_PRIO_CODE(1, 2, 30), + BC7_MODE_PRIO_CODE(2, 2, 31), + BC7_MODE_PRIO_CODE(1, 3, 20), + BC7_MODE_PRIO_CODE(2, 2, 9), + BC7_MODE_PRIO_CODE(2, 3, 3), + BC7_MODE_PRIO_CODE(3, 2, 22), + BC7_MODE_PRIO_CODE(2, 1, 42), + BC7_MODE_PRIO_CODE(2, 2, 62), + BC7_MODE_PRIO_CODE(3, 2, 20), + BC7_MODE_PRIO_CODE(4, 1, 32), + BC7_MODE_PRIO_CODE(2, 1, 43), + BC7_MODE_PRIO_CODE(3, 1, 58), + BC7_MODE_PRIO_CODE(2, 3, 19), + BC7_MODE_PRIO_CODE(2, 2, 32), + BC7_MODE_PRIO_CODE(2, 2, 57), + BC7_MODE_PRIO_CODE(4, 1, 27), + BC7_MODE_PRIO_CODE(2, 2, 34), + BC7_MODE_PRIO_CODE(4, 1, 58), + BC7_MODE_PRIO_CODE(1, 2, 12), + BC7_MODE_PRIO_CODE(2, 2, 12), + BC7_MODE_PRIO_CODE(1, 4, 20), + BC7_MODE_PRIO_CODE(1, 2, 56), + BC7_MODE_PRIO_CODE(2, 1, 48), + BC7_MODE_PRIO_CODE(2, 1, 36), + BC7_MODE_PRIO_CODE(4, 3, 0), + BC7_MODE_PRIO_CODE(2, 2, 24), + BC7_MODE_PRIO_CODE(3, 1, 40), + BC7_MODE_PRIO_CODE(3, 2, 9), + BC7_MODE_PRIO_CODE(3, 1, 56), + BC7_MODE_PRIO_CODE(3, 2, 15), + BC7_MODE_PRIO_CODE(2, 3, 7), + BC7_MODE_PRIO_CODE(1, 2, 37), + BC7_MODE_PRIO_CODE(2, 2, 35), + BC7_MODE_PRIO_CODE(3, 1, 52), + BC7_MODE_PRIO_CODE(2, 3, 6), + BC7_MODE_PRIO_CODE(3, 1, 57), + BC7_MODE_PRIO_CODE(4, 1, 31), + BC7_MODE_PRIO_CODE(4, 4, 11), + BC7_MODE_PRIO_CODE(1, 1, 44), + BC7_MODE_PRIO_CODE(3, 3, 1), + BC7_MODE_PRIO_CODE(1, 2, 54), + BC7_MODE_PRIO_CODE(2, 1, 50), + BC7_MODE_PRIO_CODE(3, 3, 15), + BC7_MODE_PRIO_CODE(2, 1, 51), + BC7_MODE_PRIO_CODE(1, 2, 27), + BC7_MODE_PRIO_CODE(3, 4, 30), + BC7_MODE_PRIO_CODE(3, 3, 14), + BC7_MODE_PRIO_CODE(3, 2, 25), + BC7_MODE_PRIO_CODE(2, 3, 9), + BC7_MODE_PRIO_CODE(2, 2, 60), + BC7_MODE_PRIO_CODE(2, 1, 49), + BC7_MODE_PRIO_CODE(1, 2, 6), + BC7_MODE_PRIO_CODE(2, 2, 23), + BC7_MODE_PRIO_CODE(3, 2, 12), + BC7_MODE_PRIO_CODE(3, 3, 2), + BC7_MODE_PRIO_CODE(4, 2, 14), + BC7_MODE_PRIO_CODE(2, 3, 16), + BC7_MODE_PRIO_CODE(1, 2, 51), + BC7_MODE_PRIO_CODE(1, 3, 11), + BC7_MODE_PRIO_CODE(1, 2, 4), + BC7_MODE_PRIO_CODE(4, 2, 17), + BC7_MODE_PRIO_CODE(1, 3, 12), + BC7_MODE_PRIO_CODE(3, 1, 43), + BC7_MODE_PRIO_CODE(2, 4, 21), + BC7_MODE_PRIO_CODE(4, 1, 56), + BC7_MODE_PRIO_CODE(3, 1, 53), + BC7_MODE_PRIO_CODE(3, 1, 47), + BC7_MODE_PRIO_CODE(2, 2, 61), + BC7_MODE_PRIO_CODE(2, 2, 55), + BC7_MODE_PRIO_CODE(2, 3, 23), + BC7_MODE_PRIO_CODE(3, 1, 42), + BC7_MODE_PRIO_CODE(2, 3, 8), + BC7_MODE_PRIO_CODE(3, 1, 55), + BC7_MODE_PRIO_CODE(4, 1, 59), + BC7_MODE_PRIO_CODE(3, 2, 60), + BC7_MODE_PRIO_CODE(2, 3, 20), + BC7_MODE_PRIO_CODE(3, 2, 57), + BC7_MODE_PRIO_CODE(3, 1, 54), + BC7_MODE_PRIO_CODE(3, 2, 35), + BC7_MODE_PRIO_CODE(1, 1, 38), + BC7_MODE_PRIO_CODE(1, 2, 5), + BC7_MODE_PRIO_CODE(2, 2, 5), + BC7_MODE_PRIO_CODE(2, 2, 6), + BC7_MODE_PRIO_CODE(3, 2, 23), + BC7_MODE_PRIO_CODE(2, 2, 59), + BC7_MODE_PRIO_CODE(3, 2, 5), + BC7_MODE_PRIO_CODE(4, 1, 42), + BC7_MODE_PRIO_CODE(2, 1, 37), + BC7_MODE_PRIO_CODE(3, 2, 59), + BC7_MODE_PRIO_CODE(4, 2, 9), + BC7_MODE_PRIO_CODE(2, 2, 4), + BC7_MODE_PRIO_CODE(2, 2, 56), + BC7_MODE_PRIO_CODE(1, 3, 33), + BC7_MODE_PRIO_CODE(2, 3, 33), + BC7_MODE_PRIO_CODE(2, 3, 22), + BC7_MODE_PRIO_CODE(2, 3, 12), + BC7_MODE_PRIO_CODE(4, 1, 40), + BC7_MODE_PRIO_CODE(3, 2, 34), + BC7_MODE_PRIO_CODE(3, 2, 56), + BC7_MODE_PRIO_CODE(3, 3, 26), + BC7_MODE_PRIO_CODE(1, 2, 7), + BC7_MODE_PRIO_CODE(2, 2, 7), + BC7_MODE_PRIO_CODE(3, 2, 7), + BC7_MODE_PRIO_CODE(2, 2, 36), + BC7_MODE_PRIO_CODE(3, 2, 36), + BC7_MODE_PRIO_CODE(4, 1, 52), + BC7_MODE_PRIO_CODE(2, 2, 33), + BC7_MODE_PRIO_CODE(3, 1, 45), + BC7_MODE_PRIO_CODE(1, 3, 4), + BC7_MODE_PRIO_CODE(4, 2, 15), + BC7_MODE_PRIO_CODE(3, 1, 41), + BC7_MODE_PRIO_CODE(2, 2, 54), + BC7_MODE_PRIO_CODE(3, 2, 4), + BC7_MODE_PRIO_CODE(2, 5, 20), + BC7_MODE_PRIO_CODE(3, 2, 62), + BC7_MODE_PRIO_CODE(1, 3, 35), + BC7_MODE_PRIO_CODE(4, 1, 41), + BC7_MODE_PRIO_CODE(3, 2, 6), + BC7_MODE_PRIO_CODE(2, 2, 52), + BC7_MODE_PRIO_CODE(3, 1, 46), + BC7_MODE_PRIO_CODE(1, 1, 39), + BC7_MODE_PRIO_CODE(3, 2, 33), + BC7_MODE_PRIO_CODE(1, 3, 5), + BC7_MODE_PRIO_CODE(3, 1, 48), + BC7_MODE_PRIO_CODE(3, 2, 24), + BC7_MODE_PRIO_CODE(3, 2, 32), + BC7_MODE_PRIO_CODE(3, 3, 33), + BC7_MODE_PRIO_CODE(1, 3, 17), + BC7_MODE_PRIO_CODE(4, 1, 57), + BC7_MODE_PRIO_CODE(1, 3, 25), + BC7_MODE_PRIO_CODE(2, 3, 11), + BC7_MODE_PRIO_CODE(1, 3, 61), + BC7_MODE_PRIO_CODE(4, 1, 43), + BC7_MODE_PRIO_CODE(1, 3, 60), + BC7_MODE_PRIO_CODE(2, 3, 60), + BC7_MODE_PRIO_CODE(2, 2, 28), + BC7_MODE_PRIO_CODE(3, 2, 28), + BC7_MODE_PRIO_CODE(4, 1, 55), + BC7_MODE_PRIO_CODE(2, 3, 5), + BC7_MODE_PRIO_CODE(3, 1, 51), + BC7_MODE_PRIO_CODE(4, 1, 53), + BC7_MODE_PRIO_CODE(4, 1, 54), + BC7_MODE_PRIO_CODE(1, 3, 32), + BC7_MODE_PRIO_CODE(1, 3, 24), + BC7_MODE_PRIO_CODE(4, 1, 47), + BC7_MODE_PRIO_CODE(2, 2, 51), + BC7_MODE_PRIO_CODE(4, 2, 12), + BC7_MODE_PRIO_CODE(2, 3, 61), + BC7_MODE_PRIO_CODE(3, 4, 21), + BC7_MODE_PRIO_CODE(2, 3, 32), + BC7_MODE_PRIO_CODE(3, 1, 36), + BC7_MODE_PRIO_CODE(3, 1, 49), + BC7_MODE_PRIO_CODE(1, 3, 18), + BC7_MODE_PRIO_CODE(4, 3, 29), + BC7_MODE_PRIO_CODE(4, 2, 63), + BC7_MODE_PRIO_CODE(2, 2, 27), + BC7_MODE_PRIO_CODE(2, 3, 17), + BC7_MODE_PRIO_CODE(3, 1, 50), + BC7_MODE_PRIO_CODE(3, 2, 61), + BC7_MODE_PRIO_CODE(1, 3, 63), + BC7_MODE_PRIO_CODE(2, 3, 63), + BC7_MODE_PRIO_CODE(3, 2, 27), + BC7_MODE_PRIO_CODE(4, 1, 46), + BC7_MODE_PRIO_CODE(1, 2, 26), + BC7_MODE_PRIO_CODE(2, 3, 4), + BC7_MODE_PRIO_CODE(2, 3, 18), + BC7_MODE_PRIO_CODE(4, 1, 45), + BC7_MODE_PRIO_CODE(4, 1, 51), + BC7_MODE_PRIO_CODE(1, 2, 1), + BC7_MODE_PRIO_CODE(4, 2, 6), + BC7_MODE_PRIO_CODE(1, 3, 62), + BC7_MODE_PRIO_CODE(2, 3, 62), + BC7_MODE_PRIO_CODE(2, 1, 44), + BC7_MODE_PRIO_CODE(4, 1, 49), + BC7_MODE_PRIO_CODE(3, 5, 30), + BC7_MODE_PRIO_CODE(2, 3, 25), + BC7_MODE_PRIO_CODE(1, 2, 49), + BC7_MODE_PRIO_CODE(4, 1, 48), + BC7_MODE_PRIO_CODE(3, 3, 3), + BC7_MODE_PRIO_CODE(3, 1, 37), + BC7_MODE_PRIO_CODE(1, 2, 0), + BC7_MODE_PRIO_CODE(2, 2, 0), + BC7_MODE_PRIO_CODE(2, 3, 35), + BC7_MODE_PRIO_CODE(2, 3, 24), + BC7_MODE_PRIO_CODE(2, 2, 53), + BC7_MODE_PRIO_CODE(3, 2, 53), + BC7_MODE_PRIO_CODE(4, 2, 59), + BC7_MODE_PRIO_CODE(3, 3, 10), + BC7_MODE_PRIO_CODE(1, 2, 3), + BC7_MODE_PRIO_CODE(2, 2, 3), + BC7_MODE_PRIO_CODE(3, 2, 3), + BC7_MODE_PRIO_CODE(3, 3, 32), + BC7_MODE_PRIO_CODE(1, 2, 46), + BC7_MODE_PRIO_CODE(4, 2, 62), + BC7_MODE_PRIO_CODE(4, 2, 60), + BC7_MODE_PRIO_CODE(2, 2, 30), + BC7_MODE_PRIO_CODE(1, 3, 47), + BC7_MODE_PRIO_CODE(4, 2, 36), + BC7_MODE_PRIO_CODE(2, 2, 1), + BC7_MODE_PRIO_CODE(3, 2, 1), + BC7_MODE_PRIO_CODE(3, 2, 58), + BC7_MODE_PRIO_CODE(4, 1, 36), + BC7_MODE_PRIO_CODE(3, 3, 16), + BC7_MODE_PRIO_CODE(2, 3, 47), + BC7_MODE_PRIO_CODE(2, 2, 39), + BC7_MODE_PRIO_CODE(4, 1, 50), + BC7_MODE_PRIO_CODE(4, 2, 21), + BC7_MODE_PRIO_CODE(2, 1, 38), + BC7_MODE_PRIO_CODE(4, 4, 21), + BC7_MODE_PRIO_CODE(3, 3, 23), + BC7_MODE_PRIO_CODE(1, 2, 43), + BC7_MODE_PRIO_CODE(1, 2, 41), + BC7_MODE_PRIO_CODE(2, 2, 41), + BC7_MODE_PRIO_CODE(1, 3, 28), + BC7_MODE_PRIO_CODE(4, 2, 35), + BC7_MODE_PRIO_CODE(4, 3, 26), + BC7_MODE_PRIO_CODE(1, 3, 59), + BC7_MODE_PRIO_CODE(1, 1, 34), + BC7_MODE_PRIO_CODE(2, 2, 29), + BC7_MODE_PRIO_CODE(3, 2, 29), + BC7_MODE_PRIO_CODE(3, 2, 52), + BC7_MODE_PRIO_CODE(1, 3, 58), + BC7_MODE_PRIO_CODE(4, 5, 30), + BC7_MODE_PRIO_CODE(4, 3, 33), + BC7_MODE_PRIO_CODE(3, 2, 30), + BC7_MODE_PRIO_CODE(1, 2, 44), + BC7_MODE_PRIO_CODE(1, 2, 2), + BC7_MODE_PRIO_CODE(2, 2, 2), + BC7_MODE_PRIO_CODE(3, 2, 2), + BC7_MODE_PRIO_CODE(1, 2, 47), + BC7_MODE_PRIO_CODE(2, 2, 47), + BC7_MODE_PRIO_CODE(3, 3, 7), + BC7_MODE_PRIO_CODE(2, 3, 58), + BC7_MODE_PRIO_CODE(3, 2, 55), + BC7_MODE_PRIO_CODE(4, 2, 4), + BC7_MODE_PRIO_CODE(3, 2, 0), + BC7_MODE_PRIO_CODE(1, 3, 31), + BC7_MODE_PRIO_CODE(3, 2, 31), + BC7_MODE_PRIO_CODE(3, 3, 12), + BC7_MODE_PRIO_CODE(3, 2, 51), + BC7_MODE_PRIO_CODE(2, 1, 39), + BC7_MODE_PRIO_CODE(1, 3, 48), + BC7_MODE_PRIO_CODE(1, 3, 27), + BC7_MODE_PRIO_CODE(4, 2, 25), + BC7_MODE_PRIO_CODE(4, 2, 22), + BC7_MODE_PRIO_CODE(4, 2, 18), + BC7_MODE_PRIO_CODE(2, 2, 44), + BC7_MODE_PRIO_CODE(2, 3, 28), + BC7_MODE_PRIO_CODE(3, 1, 44), + BC7_MODE_PRIO_CODE(2, 1, 34), + BC7_MODE_PRIO_CODE(3, 5, 10), + BC7_MODE_PRIO_CODE(4, 4, 10), + BC7_MODE_PRIO_CODE(3, 2, 54), + BC7_MODE_PRIO_CODE(4, 2, 7), + BC7_MODE_PRIO_CODE(4, 2, 20), + BC7_MODE_PRIO_CODE(2, 2, 37), + BC7_MODE_PRIO_CODE(3, 3, 6), + BC7_MODE_PRIO_CODE(2, 2, 43), + BC7_MODE_PRIO_CODE(2, 3, 59), + BC7_MODE_PRIO_CODE(1, 3, 30), + BC7_MODE_PRIO_CODE(4, 2, 5), + BC7_MODE_PRIO_CODE(4, 2, 61), + BC7_MODE_PRIO_CODE(4, 2, 19), + BC7_MODE_PRIO_CODE(4, 2, 23), + BC7_MODE_PRIO_CODE(3, 2, 39), + BC7_MODE_PRIO_CODE(2, 3, 27), + BC7_MODE_PRIO_CODE(1, 3, 57), + BC7_MODE_PRIO_CODE(2, 3, 57), + BC7_MODE_PRIO_CODE(3, 3, 21), + BC7_MODE_PRIO_CODE(3, 3, 11), + BC7_MODE_PRIO_CODE(3, 1, 39), + BC7_MODE_PRIO_CODE(2, 3, 48), + BC7_MODE_PRIO_CODE(4, 1, 37), + BC7_MODE_PRIO_CODE(3, 3, 19), + BC7_MODE_PRIO_CODE(3, 1, 38), + BC7_MODE_PRIO_CODE(2, 2, 38), + BC7_MODE_PRIO_CODE(2, 3, 31), + BC7_MODE_PRIO_CODE(2, 2, 40), + BC7_MODE_PRIO_CODE(3, 2, 40), + BC7_MODE_PRIO_CODE(1, 3, 56), + BC7_MODE_PRIO_CODE(4, 5, 10), + BC7_MODE_PRIO_CODE(2, 3, 56), + BC7_MODE_PRIO_CODE(4, 1, 38), + BC7_MODE_PRIO_CODE(1, 3, 41), + BC7_MODE_PRIO_CODE(1, 3, 50), + BC7_MODE_PRIO_CODE(2, 3, 30), + BC7_MODE_PRIO_CODE(3, 3, 8), + BC7_MODE_PRIO_CODE(4, 2, 24), + BC7_MODE_PRIO_CODE(3, 3, 9), + BC7_MODE_PRIO_CODE(3, 1, 34), + BC7_MODE_PRIO_CODE(4, 1, 34), + BC7_MODE_PRIO_CODE(2, 3, 50), + BC7_MODE_PRIO_CODE(1, 3, 43), + BC7_MODE_PRIO_CODE(1, 3, 40), + BC7_MODE_PRIO_CODE(1, 3, 51), + BC7_MODE_PRIO_CODE(2, 3, 51), + BC7_MODE_PRIO_CODE(1, 3, 45), + BC7_MODE_PRIO_CODE(2, 3, 45), + BC7_MODE_PRIO_CODE(2, 3, 40), + BC7_MODE_PRIO_CODE(3, 3, 20), + BC7_MODE_PRIO_CODE(2, 3, 41), + BC7_MODE_PRIO_CODE(3, 2, 44), + BC7_MODE_PRIO_CODE(2, 3, 43), + BC7_MODE_PRIO_CODE(4, 2, 57), + BC7_MODE_PRIO_CODE(2, 4, 20), + BC7_MODE_PRIO_CODE(3, 3, 4), + BC7_MODE_PRIO_CODE(3, 3, 61), + BC7_MODE_PRIO_CODE(1, 3, 46), + BC7_MODE_PRIO_CODE(2, 3, 46), + BC7_MODE_PRIO_CODE(4, 3, 1), + BC7_MODE_PRIO_CODE(3, 3, 22), + BC7_MODE_PRIO_CODE(1, 3, 49), + BC7_MODE_PRIO_CODE(2, 3, 49), + BC7_MODE_PRIO_CODE(4, 3, 15), + BC7_MODE_PRIO_CODE(3, 3, 5), + BC7_MODE_PRIO_CODE(4, 1, 44), + BC7_MODE_PRIO_CODE(4, 3, 14), + BC7_MODE_PRIO_CODE(4, 3, 2), + BC7_MODE_PRIO_CODE(3, 3, 60), + BC7_MODE_PRIO_CODE(1, 3, 53), + BC7_MODE_PRIO_CODE(2, 3, 53), + BC7_MODE_PRIO_CODE(4, 3, 32), + BC7_MODE_PRIO_CODE(3, 3, 24), + BC7_MODE_PRIO_CODE(3, 3, 63), + BC7_MODE_PRIO_CODE(3, 2, 37), + BC7_MODE_PRIO_CODE(1, 3, 52), + BC7_MODE_PRIO_CODE(2, 3, 52), + BC7_MODE_PRIO_CODE(4, 4, 30), + BC7_MODE_PRIO_CODE(4, 2, 34), + BC7_MODE_PRIO_CODE(1, 3, 54), + BC7_MODE_PRIO_CODE(3, 3, 62), + BC7_MODE_PRIO_CODE(3, 3, 18), + BC7_MODE_PRIO_CODE(3, 2, 41), + BC7_MODE_PRIO_CODE(4, 2, 58), + BC7_MODE_PRIO_CODE(1, 3, 42), + BC7_MODE_PRIO_CODE(2, 3, 42), + BC7_MODE_PRIO_CODE(4, 2, 0), + BC7_MODE_PRIO_CODE(4, 2, 55), + BC7_MODE_PRIO_CODE(2, 3, 54), + BC7_MODE_PRIO_CODE(3, 2, 47), + BC7_MODE_PRIO_CODE(4, 2, 53), + BC7_MODE_PRIO_CODE(3, 3, 25), + BC7_MODE_PRIO_CODE(3, 4, 20), + BC7_MODE_PRIO_CODE(4, 2, 33), + BC7_MODE_PRIO_CODE(1, 3, 55), + BC7_MODE_PRIO_CODE(2, 3, 55), + BC7_MODE_PRIO_CODE(4, 2, 32), + BC7_MODE_PRIO_CODE(3, 2, 43), + BC7_MODE_PRIO_CODE(3, 3, 17), + BC7_MODE_PRIO_CODE(3, 5, 20), + BC7_MODE_PRIO_CODE(4, 5, 20), + BC7_MODE_PRIO_CODE(1, 3, 36), + BC7_MODE_PRIO_CODE(2, 3, 36), + BC7_MODE_PRIO_CODE(4, 2, 54), + BC7_MODE_PRIO_CODE(2, 2, 49), + BC7_MODE_PRIO_CODE(3, 2, 49), + BC7_MODE_PRIO_CODE(4, 1, 39), + BC7_MODE_PRIO_CODE(4, 2, 3), + BC7_MODE_PRIO_CODE(3, 3, 35), + BC7_MODE_PRIO_CODE(4, 2, 52), + BC7_MODE_PRIO_CODE(4, 2, 1), + BC7_MODE_PRIO_CODE(1, 2, 50), + BC7_MODE_PRIO_CODE(4, 2, 49), + BC7_MODE_PRIO_CODE(4, 3, 16), + BC7_MODE_PRIO_CODE(2, 2, 50), + BC7_MODE_PRIO_CODE(3, 2, 50), + BC7_MODE_PRIO_CODE(4, 2, 31), + BC7_MODE_PRIO_CODE(4, 3, 3), + BC7_MODE_PRIO_CODE(1, 2, 48), + BC7_MODE_PRIO_CODE(2, 2, 48), + BC7_MODE_PRIO_CODE(3, 2, 48), + BC7_MODE_PRIO_CODE(3, 3, 28), + BC7_MODE_PRIO_CODE(4, 3, 9), + BC7_MODE_PRIO_CODE(1, 3, 38), + BC7_MODE_PRIO_CODE(4, 3, 10), + BC7_MODE_PRIO_CODE(3, 3, 31), + BC7_MODE_PRIO_CODE(4, 2, 51), + BC7_MODE_PRIO_CODE(1, 3, 37), + BC7_MODE_PRIO_CODE(2, 3, 37), + BC7_MODE_PRIO_CODE(3, 3, 50), + BC7_MODE_PRIO_CODE(2, 3, 38), + BC7_MODE_PRIO_CODE(4, 3, 20), + BC7_MODE_PRIO_CODE(3, 3, 41), + BC7_MODE_PRIO_CODE(3, 3, 56), + BC7_MODE_PRIO_CODE(4, 3, 6), + BC7_MODE_PRIO_CODE(4, 3, 8), + BC7_MODE_PRIO_CODE(4, 2, 37), + BC7_MODE_PRIO_CODE(3, 3, 58), + BC7_MODE_PRIO_CODE(3, 3, 59), + BC7_MODE_PRIO_CODE(4, 2, 56), + BC7_MODE_PRIO_CODE(1, 3, 39), + BC7_MODE_PRIO_CODE(2, 3, 39), + BC7_MODE_PRIO_CODE(4, 2, 43), + BC7_MODE_PRIO_CODE(1, 3, 44), + BC7_MODE_PRIO_CODE(2, 3, 44), + BC7_MODE_PRIO_CODE(4, 3, 7), + BC7_MODE_PRIO_CODE(3, 3, 27), + BC7_MODE_PRIO_CODE(4, 3, 23), + BC7_MODE_PRIO_CODE(3, 3, 45), + BC7_MODE_PRIO_CODE(4, 3, 22), + BC7_MODE_PRIO_CODE(3, 3, 30), + BC7_MODE_PRIO_CODE(3, 3, 48), + BC7_MODE_PRIO_CODE(3, 3, 51), + BC7_MODE_PRIO_CODE(1, 2, 42), + BC7_MODE_PRIO_CODE(2, 2, 42), + BC7_MODE_PRIO_CODE(3, 2, 42), + BC7_MODE_PRIO_CODE(4, 3, 19), + BC7_MODE_PRIO_CODE(4, 3, 21), + BC7_MODE_PRIO_CODE(2, 2, 46), + BC7_MODE_PRIO_CODE(3, 3, 36), + BC7_MODE_PRIO_CODE(4, 2, 28), + BC7_MODE_PRIO_CODE(3, 3, 49), + BC7_MODE_PRIO_CODE(3, 3, 53), + BC7_MODE_PRIO_CODE(3, 3, 55), + BC7_MODE_PRIO_CODE(2, 2, 26), + BC7_MODE_PRIO_CODE(3, 2, 26), + BC7_MODE_PRIO_CODE(4, 2, 30), + BC7_MODE_PRIO_CODE(3, 3, 52), + BC7_MODE_PRIO_CODE(4, 2, 41), + BC7_MODE_PRIO_CODE(4, 2, 29), + BC7_MODE_PRIO_CODE(1, 3, 34), + BC7_MODE_PRIO_CODE(2, 3, 34), + BC7_MODE_PRIO_CODE(4, 2, 44), + BC7_MODE_PRIO_CODE(3, 3, 43), + BC7_MODE_PRIO_CODE(4, 2, 47), + BC7_MODE_PRIO_CODE(4, 3, 18), + BC7_MODE_PRIO_CODE(4, 3, 17), + BC7_MODE_PRIO_CODE(3, 3, 47), + BC7_MODE_PRIO_CODE(4, 3, 11), + BC7_MODE_PRIO_CODE(3, 3, 57), + BC7_MODE_PRIO_CODE(3, 2, 38), + BC7_MODE_PRIO_CODE(3, 3, 46), + BC7_MODE_PRIO_CODE(4, 3, 25), + BC7_MODE_PRIO_CODE(4, 3, 4), + BC7_MODE_PRIO_CODE(3, 3, 42), + BC7_MODE_PRIO_CODE(4, 3, 61), + BC7_MODE_PRIO_CODE(4, 2, 48), + BC7_MODE_PRIO_CODE(4, 3, 5), + BC7_MODE_PRIO_CODE(3, 3, 54), + BC7_MODE_PRIO_CODE(4, 4, 20), + BC7_MODE_PRIO_CODE(4, 3, 24), + BC7_MODE_PRIO_CODE(4, 3, 12), + BC7_MODE_PRIO_CODE(4, 2, 40), + BC7_MODE_PRIO_CODE(3, 3, 40), + BC7_MODE_PRIO_CODE(3, 3, 44), + BC7_MODE_PRIO_CODE(4, 3, 63), + BC7_MODE_PRIO_CODE(4, 3, 50), + BC7_MODE_PRIO_CODE(4, 2, 50), + BC7_MODE_PRIO_CODE(4, 3, 60), + BC7_MODE_PRIO_CODE(4, 2, 39), + BC7_MODE_PRIO_CODE(4, 3, 62), + BC7_MODE_PRIO_CODE(4, 3, 49), + BC7_MODE_PRIO_CODE(4, 3, 58), + BC7_MODE_PRIO_CODE(4, 3, 47), + BC7_MODE_PRIO_CODE(4, 3, 56), + BC7_MODE_PRIO_CODE(4, 2, 26), + BC7_MODE_PRIO_CODE(4, 2, 27), + BC7_MODE_PRIO_CODE(3, 3, 37), + BC7_MODE_PRIO_CODE(4, 3, 57), + BC7_MODE_PRIO_CODE(4, 3, 48), + BC7_MODE_PRIO_CODE(4, 3, 31), + BC7_MODE_PRIO_CODE(4, 3, 51), + BC7_MODE_PRIO_CODE(4, 3, 28), + BC7_MODE_PRIO_CODE(4, 3, 53), + BC7_MODE_PRIO_CODE(3, 3, 39), + BC7_MODE_PRIO_CODE(4, 3, 40), + BC7_MODE_PRIO_CODE(4, 3, 27), + BC7_MODE_PRIO_CODE(4, 2, 2), + BC7_MODE_PRIO_CODE(3, 3, 34), + BC7_MODE_PRIO_CODE(4, 2, 38), + BC7_MODE_PRIO_CODE(4, 3, 54), + BC7_MODE_PRIO_CODE(3, 3, 38), + BC7_MODE_PRIO_CODE(4, 3, 52), + BC7_MODE_PRIO_CODE(4, 3, 30), + BC7_MODE_PRIO_CODE(4, 3, 59), + BC7_MODE_PRIO_CODE(1, 2, 45), + BC7_MODE_PRIO_CODE(4, 3, 45), + BC7_MODE_PRIO_CODE(4, 2, 42), + BC7_MODE_PRIO_CODE(4, 3, 35), + BC7_MODE_PRIO_CODE(4, 3, 41), + BC7_MODE_PRIO_CODE(3, 2, 46), + BC7_MODE_PRIO_CODE(4, 2, 46), + BC7_MODE_PRIO_CODE(4, 3, 46), + BC7_MODE_PRIO_CODE(2, 2, 45), + BC7_MODE_PRIO_CODE(4, 3, 43), + BC7_MODE_PRIO_CODE(4, 3, 37), + BC7_MODE_PRIO_CODE(4, 3, 38), + BC7_MODE_PRIO_CODE(4, 3, 36), + BC7_MODE_PRIO_CODE(4, 3, 42), + BC7_MODE_PRIO_CODE(4, 3, 34), + BC7_MODE_PRIO_CODE(4, 3, 39), + BC7_MODE_PRIO_CODE(4, 3, 55), + BC7_MODE_PRIO_CODE(4, 3, 44), + BC7_MODE_PRIO_CODE(3, 2, 45), + BC7_MODE_PRIO_CODE(1, 4, 0), + BC7_MODE_PRIO_CODE(1, 4, 1), + BC7_MODE_PRIO_CODE(1, 5, 0), + BC7_MODE_PRIO_CODE(4, 2, 45), + BC7_MODE_PRIO_CODE(2, 4, 0), + BC7_MODE_PRIO_CODE(2, 4, 1), + BC7_MODE_PRIO_CODE(2, 5, 0), + BC7_MODE_PRIO_CODE(3, 4, 0), + BC7_MODE_PRIO_CODE(3, 4, 1), + BC7_MODE_PRIO_CODE(3, 5, 0), + BC7_MODE_PRIO_CODE(4, 4, 0), + BC7_MODE_PRIO_CODE(4, 4, 1), + BC7_MODE_PRIO_CODE(4, 5, 0), + }; + + const uint16_t *g_bc7PrioCodesRGB = g_bc7PrioCodesRGBData; + const int g_bc7NumPrioCodesRGB = sizeof(g_bc7PrioCodesRGBData) / sizeof(g_bc7PrioCodesRGBData[0]); + + const uint16_t g_bc7PrioCodesRGBAData[] = + { + BC7_MODE_PRIO_CODE(1, 4, 1), + BC7_MODE_PRIO_CODE(1, 6, 0), + BC7_MODE_PRIO_CODE(1, 4, 31), + BC7_MODE_PRIO_CODE(1, 4, 11), + BC7_MODE_PRIO_CODE(1, 4, 0), + BC7_MODE_PRIO_CODE(1, 7, 13), + BC7_MODE_PRIO_CODE(1, 5, 0), + BC7_MODE_PRIO_CODE(1, 7, 0), + BC7_MODE_PRIO_CODE(2, 4, 1), + BC7_MODE_PRIO_CODE(3, 4, 1), + BC7_MODE_PRIO_CODE(2, 4, 0), + BC7_MODE_PRIO_CODE(2, 6, 0), + BC7_MODE_PRIO_CODE(1, 7, 6), + BC7_MODE_PRIO_CODE(1, 4, 10), + BC7_MODE_PRIO_CODE(1, 7, 15), + BC7_MODE_PRIO_CODE(1, 7, 14), + BC7_MODE_PRIO_CODE(1, 4, 30), + BC7_MODE_PRIO_CODE(1, 7, 7), + BC7_MODE_PRIO_CODE(3, 6, 0), + BC7_MODE_PRIO_CODE(1, 7, 19), + BC7_MODE_PRIO_CODE(3, 4, 0), + BC7_MODE_PRIO_CODE(2, 7, 13), + BC7_MODE_PRIO_CODE(1, 5, 30), + BC7_MODE_PRIO_CODE(1, 7, 2), + BC7_MODE_PRIO_CODE(1, 7, 1), + BC7_MODE_PRIO_CODE(1, 7, 21), + BC7_MODE_PRIO_CODE(4, 4, 1), + BC7_MODE_PRIO_CODE(1, 4, 21), + BC7_MODE_PRIO_CODE(2, 4, 31), + BC7_MODE_PRIO_CODE(1, 7, 10), + BC7_MODE_PRIO_CODE(1, 7, 3), + BC7_MODE_PRIO_CODE(4, 6, 0), + BC7_MODE_PRIO_CODE(3, 7, 13), + BC7_MODE_PRIO_CODE(1, 7, 16), + BC7_MODE_PRIO_CODE(1, 7, 8), + BC7_MODE_PRIO_CODE(2, 5, 0), + BC7_MODE_PRIO_CODE(2, 7, 0), + BC7_MODE_PRIO_CODE(1, 7, 23), + BC7_MODE_PRIO_CODE(1, 7, 9), + BC7_MODE_PRIO_CODE(2, 4, 11), + BC7_MODE_PRIO_CODE(3, 4, 31), + BC7_MODE_PRIO_CODE(1, 7, 20), + BC7_MODE_PRIO_CODE(1, 7, 22), + BC7_MODE_PRIO_CODE(4, 4, 0), + BC7_MODE_PRIO_CODE(1, 5, 10), + BC7_MODE_PRIO_CODE(4, 7, 13), + BC7_MODE_PRIO_CODE(3, 7, 0), + BC7_MODE_PRIO_CODE(1, 7, 12), + BC7_MODE_PRIO_CODE(1, 7, 29), + BC7_MODE_PRIO_CODE(3, 4, 11), + BC7_MODE_PRIO_CODE(1, 7, 11), + BC7_MODE_PRIO_CODE(1, 7, 18), + BC7_MODE_PRIO_CODE(1, 7, 4), + BC7_MODE_PRIO_CODE(2, 7, 15), + BC7_MODE_PRIO_CODE(2, 7, 14), + BC7_MODE_PRIO_CODE(1, 7, 5), + BC7_MODE_PRIO_CODE(1, 7, 25), + BC7_MODE_PRIO_CODE(1, 7, 17), + BC7_MODE_PRIO_CODE(1, 7, 24), + BC7_MODE_PRIO_CODE(1, 7, 26), + BC7_MODE_PRIO_CODE(3, 5, 0), + BC7_MODE_PRIO_CODE(2, 7, 2), + BC7_MODE_PRIO_CODE(1, 5, 20), + BC7_MODE_PRIO_CODE(2, 7, 1), + BC7_MODE_PRIO_CODE(2, 7, 29), + BC7_MODE_PRIO_CODE(2, 4, 10), + BC7_MODE_PRIO_CODE(4, 7, 0), + BC7_MODE_PRIO_CODE(2, 7, 6), + BC7_MODE_PRIO_CODE(2, 7, 7), + BC7_MODE_PRIO_CODE(3, 7, 14), + BC7_MODE_PRIO_CODE(3, 7, 15), + BC7_MODE_PRIO_CODE(4, 4, 31), + BC7_MODE_PRIO_CODE(2, 7, 21), + BC7_MODE_PRIO_CODE(2, 4, 30), + BC7_MODE_PRIO_CODE(2, 4, 21), + BC7_MODE_PRIO_CODE(3, 7, 29), + BC7_MODE_PRIO_CODE(2, 7, 19), + BC7_MODE_PRIO_CODE(2, 7, 10), + BC7_MODE_PRIO_CODE(3, 7, 1), + BC7_MODE_PRIO_CODE(4, 7, 29), + BC7_MODE_PRIO_CODE(3, 7, 7), + BC7_MODE_PRIO_CODE(1, 4, 20), + BC7_MODE_PRIO_CODE(3, 7, 2), + BC7_MODE_PRIO_CODE(2, 7, 16), + BC7_MODE_PRIO_CODE(2, 7, 3), + BC7_MODE_PRIO_CODE(2, 5, 30), + BC7_MODE_PRIO_CODE(2, 7, 23), + BC7_MODE_PRIO_CODE(3, 7, 6), + BC7_MODE_PRIO_CODE(2, 7, 12), + BC7_MODE_PRIO_CODE(1, 7, 61), + BC7_MODE_PRIO_CODE(4, 4, 11), + BC7_MODE_PRIO_CODE(3, 4, 10), + BC7_MODE_PRIO_CODE(3, 7, 10), + BC7_MODE_PRIO_CODE(2, 7, 8), + BC7_MODE_PRIO_CODE(2, 7, 22), + BC7_MODE_PRIO_CODE(2, 7, 26), + BC7_MODE_PRIO_CODE(3, 4, 30), + BC7_MODE_PRIO_CODE(2, 7, 9), + BC7_MODE_PRIO_CODE(3, 7, 19), + BC7_MODE_PRIO_CODE(2, 7, 25), + BC7_MODE_PRIO_CODE(3, 4, 21), + BC7_MODE_PRIO_CODE(2, 7, 24), + BC7_MODE_PRIO_CODE(1, 7, 60), + BC7_MODE_PRIO_CODE(2, 7, 11), + BC7_MODE_PRIO_CODE(2, 7, 18), + BC7_MODE_PRIO_CODE(2, 7, 17), + BC7_MODE_PRIO_CODE(2, 7, 4), + BC7_MODE_PRIO_CODE(2, 7, 5), + BC7_MODE_PRIO_CODE(3, 7, 3), + BC7_MODE_PRIO_CODE(3, 7, 16), + BC7_MODE_PRIO_CODE(3, 7, 26), + BC7_MODE_PRIO_CODE(3, 7, 21), + BC7_MODE_PRIO_CODE(1, 7, 62), + BC7_MODE_PRIO_CODE(2, 7, 20), + BC7_MODE_PRIO_CODE(3, 7, 23), + BC7_MODE_PRIO_CODE(1, 7, 33), + BC7_MODE_PRIO_CODE(2, 7, 33), + BC7_MODE_PRIO_CODE(3, 7, 33), + BC7_MODE_PRIO_CODE(4, 7, 33), + BC7_MODE_PRIO_CODE(3, 7, 11), + BC7_MODE_PRIO_CODE(3, 7, 12), + BC7_MODE_PRIO_CODE(4, 7, 26), + BC7_MODE_PRIO_CODE(3, 7, 25), + BC7_MODE_PRIO_CODE(1, 7, 63), + BC7_MODE_PRIO_CODE(2, 5, 10), + BC7_MODE_PRIO_CODE(3, 7, 8), + BC7_MODE_PRIO_CODE(4, 5, 0), + BC7_MODE_PRIO_CODE(3, 7, 24), + BC7_MODE_PRIO_CODE(3, 7, 22), + BC7_MODE_PRIO_CODE(3, 7, 9), + BC7_MODE_PRIO_CODE(1, 7, 32), + BC7_MODE_PRIO_CODE(2, 7, 61), + BC7_MODE_PRIO_CODE(3, 7, 4), + BC7_MODE_PRIO_CODE(3, 5, 30), + BC7_MODE_PRIO_CODE(3, 7, 20), + BC7_MODE_PRIO_CODE(1, 7, 35), + BC7_MODE_PRIO_CODE(4, 7, 14), + BC7_MODE_PRIO_CODE(3, 7, 5), + BC7_MODE_PRIO_CODE(3, 7, 18), + BC7_MODE_PRIO_CODE(1, 7, 30), + BC7_MODE_PRIO_CODE(1, 7, 43), + BC7_MODE_PRIO_CODE(4, 4, 21), + BC7_MODE_PRIO_CODE(4, 7, 15), + BC7_MODE_PRIO_CODE(3, 7, 17), + BC7_MODE_PRIO_CODE(2, 7, 32), + BC7_MODE_PRIO_CODE(3, 7, 32), + BC7_MODE_PRIO_CODE(2, 5, 20), + BC7_MODE_PRIO_CODE(4, 7, 1), + BC7_MODE_PRIO_CODE(4, 7, 2), + BC7_MODE_PRIO_CODE(1, 7, 28), + BC7_MODE_PRIO_CODE(1, 7, 54), + BC7_MODE_PRIO_CODE(4, 7, 32), + BC7_MODE_PRIO_CODE(1, 7, 27), + BC7_MODE_PRIO_CODE(4, 4, 10), + BC7_MODE_PRIO_CODE(3, 5, 10), + BC7_MODE_PRIO_CODE(2, 7, 60), + BC7_MODE_PRIO_CODE(2, 4, 20), + BC7_MODE_PRIO_CODE(2, 7, 63), + BC7_MODE_PRIO_CODE(4, 4, 30), + BC7_MODE_PRIO_CODE(2, 7, 62), + BC7_MODE_PRIO_CODE(1, 7, 41), + BC7_MODE_PRIO_CODE(1, 7, 58), + BC7_MODE_PRIO_CODE(3, 7, 60), + BC7_MODE_PRIO_CODE(1, 7, 40), + BC7_MODE_PRIO_CODE(1, 7, 55), + BC7_MODE_PRIO_CODE(2, 7, 35), + BC7_MODE_PRIO_CODE(4, 7, 8), + BC7_MODE_PRIO_CODE(4, 7, 6), + BC7_MODE_PRIO_CODE(1, 7, 53), + BC7_MODE_PRIO_CODE(4, 7, 9), + BC7_MODE_PRIO_CODE(3, 7, 61), + BC7_MODE_PRIO_CODE(3, 4, 20), + BC7_MODE_PRIO_CODE(4, 7, 22), + BC7_MODE_PRIO_CODE(4, 7, 20), + BC7_MODE_PRIO_CODE(3, 7, 62), + BC7_MODE_PRIO_CODE(4, 7, 7), + BC7_MODE_PRIO_CODE(1, 7, 42), + BC7_MODE_PRIO_CODE(1, 7, 52), + BC7_MODE_PRIO_CODE(4, 5, 30), + BC7_MODE_PRIO_CODE(1, 7, 56), + BC7_MODE_PRIO_CODE(1, 7, 31), + BC7_MODE_PRIO_CODE(3, 5, 20), + BC7_MODE_PRIO_CODE(1, 7, 48), + BC7_MODE_PRIO_CODE(2, 7, 28), + BC7_MODE_PRIO_CODE(3, 7, 28), + BC7_MODE_PRIO_CODE(4, 7, 19), + BC7_MODE_PRIO_CODE(3, 7, 35), + BC7_MODE_PRIO_CODE(1, 7, 59), + BC7_MODE_PRIO_CODE(2, 7, 30), + BC7_MODE_PRIO_CODE(3, 7, 63), + BC7_MODE_PRIO_CODE(4, 7, 21), + BC7_MODE_PRIO_CODE(4, 7, 10), + BC7_MODE_PRIO_CODE(4, 7, 3), + BC7_MODE_PRIO_CODE(1, 7, 47), + BC7_MODE_PRIO_CODE(1, 7, 37), + BC7_MODE_PRIO_CODE(4, 5, 10), + BC7_MODE_PRIO_CODE(4, 7, 23), + BC7_MODE_PRIO_CODE(1, 7, 57), + BC7_MODE_PRIO_CODE(4, 7, 17), + BC7_MODE_PRIO_CODE(1, 7, 45), + BC7_MODE_PRIO_CODE(4, 7, 24), + BC7_MODE_PRIO_CODE(4, 7, 60), + BC7_MODE_PRIO_CODE(1, 7, 50), + BC7_MODE_PRIO_CODE(2, 7, 41), + BC7_MODE_PRIO_CODE(4, 7, 25), + BC7_MODE_PRIO_CODE(3, 7, 30), + BC7_MODE_PRIO_CODE(2, 7, 59), + BC7_MODE_PRIO_CODE(2, 7, 55), + BC7_MODE_PRIO_CODE(4, 7, 18), + BC7_MODE_PRIO_CODE(4, 7, 12), + BC7_MODE_PRIO_CODE(4, 7, 5), + BC7_MODE_PRIO_CODE(3, 7, 59), + BC7_MODE_PRIO_CODE(1, 7, 51), + BC7_MODE_PRIO_CODE(4, 7, 16), + BC7_MODE_PRIO_CODE(4, 7, 11), + BC7_MODE_PRIO_CODE(2, 7, 58), + BC7_MODE_PRIO_CODE(3, 7, 41), + BC7_MODE_PRIO_CODE(4, 4, 20), + BC7_MODE_PRIO_CODE(4, 7, 4), + BC7_MODE_PRIO_CODE(1, 7, 49), + BC7_MODE_PRIO_CODE(2, 7, 27), + BC7_MODE_PRIO_CODE(3, 7, 27), + BC7_MODE_PRIO_CODE(4, 7, 62), + BC7_MODE_PRIO_CODE(3, 7, 58), + BC7_MODE_PRIO_CODE(4, 5, 20), + BC7_MODE_PRIO_CODE(2, 7, 53), + BC7_MODE_PRIO_CODE(3, 7, 53), + BC7_MODE_PRIO_CODE(2, 7, 40), + BC7_MODE_PRIO_CODE(3, 7, 40), + BC7_MODE_PRIO_CODE(2, 7, 31), + BC7_MODE_PRIO_CODE(3, 7, 31), + BC7_MODE_PRIO_CODE(4, 7, 61), + BC7_MODE_PRIO_CODE(1, 7, 36), + BC7_MODE_PRIO_CODE(4, 7, 63), + BC7_MODE_PRIO_CODE(1, 7, 46), + BC7_MODE_PRIO_CODE(3, 7, 55), + BC7_MODE_PRIO_CODE(2, 7, 52), + BC7_MODE_PRIO_CODE(2, 7, 56), + BC7_MODE_PRIO_CODE(2, 7, 42), + BC7_MODE_PRIO_CODE(2, 7, 37), + BC7_MODE_PRIO_CODE(2, 7, 57), + BC7_MODE_PRIO_CODE(3, 7, 57), + BC7_MODE_PRIO_CODE(2, 7, 45), + BC7_MODE_PRIO_CODE(4, 7, 57), + BC7_MODE_PRIO_CODE(2, 7, 49), + BC7_MODE_PRIO_CODE(3, 7, 42), + BC7_MODE_PRIO_CODE(2, 7, 43), + BC7_MODE_PRIO_CODE(3, 7, 43), + BC7_MODE_PRIO_CODE(4, 7, 28), + BC7_MODE_PRIO_CODE(2, 7, 48), + BC7_MODE_PRIO_CODE(3, 7, 52), + BC7_MODE_PRIO_CODE(3, 7, 49), + BC7_MODE_PRIO_CODE(4, 7, 59), + BC7_MODE_PRIO_CODE(4, 7, 40), + BC7_MODE_PRIO_CODE(4, 7, 27), + BC7_MODE_PRIO_CODE(3, 7, 45), + BC7_MODE_PRIO_CODE(4, 7, 55), + BC7_MODE_PRIO_CODE(3, 7, 56), + BC7_MODE_PRIO_CODE(4, 7, 42), + BC7_MODE_PRIO_CODE(2, 7, 54), + BC7_MODE_PRIO_CODE(3, 7, 54), + BC7_MODE_PRIO_CODE(4, 7, 54), + BC7_MODE_PRIO_CODE(2, 7, 47), + BC7_MODE_PRIO_CODE(3, 7, 47), + BC7_MODE_PRIO_CODE(4, 7, 43), + BC7_MODE_PRIO_CODE(4, 7, 31), + BC7_MODE_PRIO_CODE(3, 7, 37), + BC7_MODE_PRIO_CODE(3, 7, 48), + BC7_MODE_PRIO_CODE(4, 7, 48), + BC7_MODE_PRIO_CODE(4, 7, 45), + BC7_MODE_PRIO_CODE(4, 7, 47), + BC7_MODE_PRIO_CODE(2, 7, 36), + BC7_MODE_PRIO_CODE(1, 7, 44), + BC7_MODE_PRIO_CODE(4, 7, 35), + BC7_MODE_PRIO_CODE(4, 7, 58), + BC7_MODE_PRIO_CODE(3, 7, 36), + BC7_MODE_PRIO_CODE(2, 7, 50), + BC7_MODE_PRIO_CODE(3, 7, 50), + BC7_MODE_PRIO_CODE(4, 7, 50), + BC7_MODE_PRIO_CODE(4, 7, 52), + BC7_MODE_PRIO_CODE(1, 7, 39), + BC7_MODE_PRIO_CODE(1, 7, 34), + BC7_MODE_PRIO_CODE(1, 7, 38), + BC7_MODE_PRIO_CODE(2, 7, 38), + BC7_MODE_PRIO_CODE(3, 7, 38), + BC7_MODE_PRIO_CODE(4, 7, 30), + BC7_MODE_PRIO_CODE(2, 7, 51), + BC7_MODE_PRIO_CODE(4, 7, 41), + BC7_MODE_PRIO_CODE(4, 7, 53), + BC7_MODE_PRIO_CODE(2, 7, 46), + BC7_MODE_PRIO_CODE(3, 7, 46), + BC7_MODE_PRIO_CODE(4, 7, 49), + BC7_MODE_PRIO_CODE(4, 7, 56), + BC7_MODE_PRIO_CODE(4, 7, 37), + BC7_MODE_PRIO_CODE(2, 7, 44), + BC7_MODE_PRIO_CODE(3, 7, 44), + BC7_MODE_PRIO_CODE(4, 7, 36), + BC7_MODE_PRIO_CODE(2, 7, 39), + BC7_MODE_PRIO_CODE(2, 7, 34), + BC7_MODE_PRIO_CODE(4, 7, 38), + BC7_MODE_PRIO_CODE(3, 7, 51), + BC7_MODE_PRIO_CODE(4, 7, 51), + BC7_MODE_PRIO_CODE(4, 7, 46), + BC7_MODE_PRIO_CODE(4, 7, 44), + BC7_MODE_PRIO_CODE(3, 7, 39), + BC7_MODE_PRIO_CODE(3, 7, 34), + BC7_MODE_PRIO_CODE(4, 7, 39), + BC7_MODE_PRIO_CODE(4, 7, 34), + }; + + const uint16_t *g_bc7PrioCodesRGBA = g_bc7PrioCodesRGBAData; + const int g_bc7NumPrioCodesRGBA = sizeof(g_bc7PrioCodesRGBAData) / sizeof(g_bc7PrioCodesRGBA[0]); + + int UnpackMode(uint16_t packed) + { + return static_cast<int>((packed >> BC7_MODE_OFFSET_BITS) & ((1 << BC7_MODE_BITS) - 1)); + } + + int UnpackSeedPointCount(uint16_t packed) + { + return static_cast<int>((packed >> BC7_SEED_POINT_COUNT_OFFSET_BITS) & ((1 << BC7_SEED_POINT_COUNT_BITS) - 1)) + 1; + } + + int UnpackPartition(uint16_t packed) + { + return static_cast<int>((packed >> BC7_PARTITION_OFFSET_BITS) & ((1 << BC7_PARTITION_BITS) - 1)); + } + + int UnpackRotation(uint16_t packed) + { + return static_cast<int>((packed >> BC7_ROTATION_OFFSET_BITS) & ((1 << BC7_ROTATION_BITS) - 1)); + } + + int UnpackIndexSelector(uint16_t packed) + { + return static_cast<int>((packed >> BC7_INDEX_MODE_OFFSET_BITS) & ((1 << BC7_INDEX_MODE_BITS) - 1)); + } +}}} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_BC7_SingleColor.h b/thirdparty/cvtt/ConvectionKernels_BC7_SingleColor.h index b5564c0dab..b45ba5eca8 100644 --- a/thirdparty/cvtt/ConvectionKernels_BC7_SingleColor.h +++ b/thirdparty/cvtt/ConvectionKernels_BC7_SingleColor.h @@ -1,6 +1,8 @@ #pragma once #include <stdint.h> +// This file is generated by the MakeTables app. Do not edit this file manually. + namespace cvtt { namespace Tables { namespace BC7SC { struct TableEntry diff --git a/thirdparty/cvtt/ConvectionKernels_BCCommon.cpp b/thirdparty/cvtt/ConvectionKernels_BCCommon.cpp new file mode 100644 index 0000000000..be16d1db06 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_BCCommon.cpp @@ -0,0 +1,46 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------------------- + +Portions based on DirectX Texture Library (DirectXTex) + +Copyright (c) Microsoft Corporation. All rights reserved. +Licensed under the MIT License. + +http://go.microsoft.com/fwlink/?LinkId=248926 +*/ +#include "ConvectionKernels_Config.h" + +#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) + +#include "ConvectionKernels_BCCommon.h" + +int cvtt::Internal::BCCommon::TweakRoundsForRange(int range) +{ + if (range == 3) + return 3; + return 4; +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_BCCommon.h b/thirdparty/cvtt/ConvectionKernels_BCCommon.h new file mode 100644 index 0000000000..3e13151acd --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_BCCommon.h @@ -0,0 +1,104 @@ +#pragma once +#ifndef __CVTT_BCCOMMON_H__ +#define __CVTT_BCCOMMON_H__ + +#include "ConvectionKernels_AggregatedError.h" +#include "ConvectionKernels_ParallelMath.h" + +namespace cvtt +{ + namespace Internal + { + class BCCommon + { + public: + typedef ParallelMath::Float MFloat; + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::UInt15 MUInt15; + typedef ParallelMath::AInt16 MAInt16; + typedef ParallelMath::SInt16 MSInt16; + typedef ParallelMath::SInt32 MSInt32; + + static int TweakRoundsForRange(int range); + + template<int TVectorSize> + static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, AggregatedError<TVectorSize> &aggError) + { + for (int ch = 0; ch < numRealChannels; ch++) + aggError.Add(ParallelMath::SqDiffUInt8(reconstructed[ch], original[ch]), ch); + } + + template<int TVectorSize> + static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], AggregatedError<TVectorSize> &aggError) + { + ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, TVectorSize, aggError); + } + + template<int TVectorSize> + static MFloat ComputeErrorLDRSimple(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, const float *channelWeightsSq) + { + AggregatedError<TVectorSize> aggError; + ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, numRealChannels, aggError); + return aggError.Finalize(flags, channelWeightsSq); + } + + template<int TVectorSize> + static MFloat ComputeErrorHDRFast(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize]) + { + MFloat error = ParallelMath::MakeFloatZero(); + if (flags & Flags::Uniform) + { + for (int ch = 0; ch < TVectorSize; ch++) + error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]); + } + else + { + for (int ch = 0; ch < TVectorSize; ch++) + error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]); + } + + return error; + } + + template<int TVectorSize> + static MFloat ComputeErrorHDRSlow(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize]) + { + MFloat error = ParallelMath::MakeFloatZero(); + if (flags & Flags::Uniform) + { + for (int ch = 0; ch < TVectorSize; ch++) + error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]); + } + else + { + for (int ch = 0; ch < TVectorSize; ch++) + error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]); + } + + return error; + } + + template<int TChannelCount> + static void PreWeightPixelsLDR(MFloat preWeightedPixels[16][TChannelCount], const MUInt15 pixels[16][TChannelCount], const float channelWeights[TChannelCount]) + { + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < TChannelCount; ch++) + preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch]; + } + } + + template<int TChannelCount> + static void PreWeightPixelsHDR(MFloat preWeightedPixels[16][TChannelCount], const MSInt16 pixels[16][TChannelCount], const float channelWeights[TChannelCount]) + { + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < TChannelCount; ch++) + preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch]; + } + } + }; + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_Config.h b/thirdparty/cvtt/ConvectionKernels_Config.h new file mode 100644 index 0000000000..e79d32b1da --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_Config.h @@ -0,0 +1,12 @@ +#pragma once +#ifndef __CVTT_CONFIG_H__ +#define __CVTT_CONFIG_H__ + +#if (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(_M_X64) || defined(__SSE2__) +#define CVTT_USE_SSE2 +#endif + +// Define this to compile everything as a single source file +//#define CVTT_SINGLE_FILE + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_ETC.cpp b/thirdparty/cvtt/ConvectionKernels_ETC.cpp new file mode 100644 index 0000000000..cb202a6e9c --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_ETC.cpp @@ -0,0 +1,3147 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------------------- + +Portions based on DirectX Texture Library (DirectXTex) + +Copyright (c) Microsoft Corporation. All rights reserved. +Licensed under the MIT License. + +http://go.microsoft.com/fwlink/?LinkId=248926 +*/ +#include "ConvectionKernels_Config.h" + +#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) + +#include "ConvectionKernels.h" +#include "ConvectionKernels_ETC.h" +#include "ConvectionKernels_ETC1.h" +#include "ConvectionKernels_ETC2.h" +#include "ConvectionKernels_ETC2_Rounding.h" +#include "ConvectionKernels_ParallelMath.h" +#include "ConvectionKernels_FakeBT709_Rounding.h" + +#include <cmath> + +const int cvtt::Internal::ETCComputer::g_flipTables[2][2][8] = +{ + { + { 0, 1, 4, 5, 8, 9, 12, 13 }, + { 2, 3, 6, 7, 10, 11, 14, 15 } + }, + { + { 0, 1, 2, 3, 4, 5, 6, 7 }, + { 8, 9, 10, 11, 12, 13, 14, 15 } + }, +}; + +cvtt::ParallelMath::Float cvtt::Internal::ETCComputer::ComputeErrorUniform(const MUInt15 pixelA[3], const MUInt15 pixelB[3]) +{ + MSInt16 d0 = ParallelMath::LosslessCast<MSInt16>::Cast(pixelA[0]) - ParallelMath::LosslessCast<MSInt16>::Cast(pixelB[0]); + MFloat fd0 = ParallelMath::ToFloat(d0); + MFloat error = fd0 * fd0; + for (int ch = 1; ch < 3; ch++) + { + MSInt16 d = ParallelMath::LosslessCast<MSInt16>::Cast(pixelA[ch]) - ParallelMath::LosslessCast<MSInt16>::Cast(pixelB[ch]); + MFloat fd = ParallelMath::ToFloat(d); + error = error + fd * fd; + } + return error; +} + +cvtt::ParallelMath::Float cvtt::Internal::ETCComputer::ComputeErrorWeighted(const MUInt15 reconstructed[3], const MFloat preWeightedPixel[3], const Options options) +{ + MFloat dr = ParallelMath::ToFloat(reconstructed[0]) * options.redWeight - preWeightedPixel[0]; + MFloat dg = ParallelMath::ToFloat(reconstructed[1]) * options.greenWeight - preWeightedPixel[1]; + MFloat db = ParallelMath::ToFloat(reconstructed[2]) * options.blueWeight - preWeightedPixel[2]; + + return dr * dr + dg * dg + db * db; +} + +cvtt::ParallelMath::Float cvtt::Internal::ETCComputer::ComputeErrorFakeBT709(const MUInt15 reconstructed[3], const MFloat preWeightedPixel[3]) +{ + MFloat yuv[3]; + ConvertToFakeBT709(yuv, reconstructed); + + MFloat dy = yuv[0] - preWeightedPixel[0]; + MFloat du = yuv[1] - preWeightedPixel[1]; + MFloat dv = yuv[2] - preWeightedPixel[2]; + + return dy * dy + du * du + dv * dv; +} + +void cvtt::Internal::ETCComputer::TestHalfBlock(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const MSInt16 modifiers[4], bool isDifferential, const Options &options) +{ + MUInt15 quantized[3]; + MUInt15 unquantized[3]; + + for (int ch = 0; ch < 3; ch++) + { + quantized[ch] = (ParallelMath::RightShift(quantizedPackedColor, (ch * 5)) & ParallelMath::MakeUInt15(31)); + + if (isDifferential) + unquantized[ch] = (quantized[ch] << 3) | ParallelMath::RightShift(quantized[ch], 2); + else + unquantized[ch] = (quantized[ch] << 4) | quantized[ch]; + } + + MUInt16 selectors = ParallelMath::MakeUInt16(0); + MFloat totalError = ParallelMath::MakeFloatZero(); + + MUInt15 u15_255 = ParallelMath::MakeUInt15(255); + MSInt16 s16_zero = ParallelMath::MakeSInt16(0); + + MUInt15 unquantizedModified[4][3]; + for (unsigned int s = 0; s < 4; s++) + for (int ch = 0; ch < 3; ch++) + unquantizedModified[s][ch] = ParallelMath::Min(ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::ToSInt16(unquantized[ch]) + modifiers[s], s16_zero)), u15_255); + + bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0); + bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0); + + for (int px = 0; px < 8; px++) + { + MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); + MUInt16 bestSelector = ParallelMath::MakeUInt16(0); + + for (unsigned int s = 0; s < 4; s++) + { + MFloat error; + if (isFakeBT709) + error = ComputeErrorFakeBT709(unquantizedModified[s], preWeightedPixels[px]); + else if (isUniform) + error = ComputeErrorUniform(pixels[px], unquantizedModified[s]); + else + error = ComputeErrorWeighted(unquantizedModified[s], preWeightedPixels[px], options); + + ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError); + bestSelector = ParallelMath::Select(ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt16(s), bestSelector); + bestError = ParallelMath::Min(error, bestError); + } + + totalError = totalError + bestError; + selectors = selectors | (bestSelector << (px * 2)); + } + + outError = totalError; + outSelectors = selectors; +} + +void cvtt::Internal::ETCComputer::TestHalfBlockPunchthrough(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const ParallelMath::Int16CompFlag isTransparent[8], const MUInt15 modifier, const Options &options) +{ + MUInt15 quantized[3]; + MUInt15 unquantized[3]; + + for (int ch = 0; ch < 3; ch++) + { + quantized[ch] = (ParallelMath::RightShift(quantizedPackedColor, (ch * 5)) & ParallelMath::MakeUInt15(31)); + unquantized[ch] = (quantized[ch] << 3) | ParallelMath::RightShift(quantized[ch], 2); + } + + MUInt16 selectors = ParallelMath::MakeUInt16(0); + MFloat totalError = ParallelMath::MakeFloatZero(); + + MUInt15 u15_255 = ParallelMath::MakeUInt15(255); + MSInt16 s16_zero = ParallelMath::MakeSInt16(0); + + MUInt15 unquantizedModified[3][3]; + for (int ch = 0; ch < 3; ch++) + { + unquantizedModified[0][ch] = ParallelMath::Max(unquantized[ch], modifier) - modifier; + unquantizedModified[1][ch] = unquantized[ch]; + unquantizedModified[2][ch] = ParallelMath::Min(unquantized[ch] + modifier, u15_255); + } + + bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0); + bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0); + + for (int px = 0; px < 8; px++) + { + ParallelMath::FloatCompFlag isTransparentFloat = ParallelMath::Int16FlagToFloat(isTransparent[px]); + + MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); + MUInt15 bestSelector = ParallelMath::MakeUInt15(0); + + for (unsigned int s = 0; s < 3; s++) + { + MFloat error; + if (isFakeBT709) + error = ComputeErrorFakeBT709(unquantizedModified[s], preWeightedPixels[px]); + else if (isUniform) + error = ComputeErrorUniform(pixels[px], unquantizedModified[s]); + else + error = ComputeErrorWeighted(unquantizedModified[s], preWeightedPixels[px], options); + + ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError); + bestSelector = ParallelMath::Select(ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(s), bestSelector); + bestError = ParallelMath::Min(error, bestError); + } + + // Annoying quirk: The ETC encoding machinery assumes that selectors are in the table order in the spec, which isn't + // the same as their encoding bits, so the transparent index is actually 1 and the valid indexes are 0, 2, and 3. + + // Remap selector 1 to 2, and 2 to 3 + bestSelector = ParallelMath::Min(ParallelMath::MakeUInt15(3), bestSelector << 1); + + // Mark zero transparent as + ParallelMath::ConditionalSet(bestError, isTransparentFloat, ParallelMath::MakeFloatZero()); + ParallelMath::ConditionalSet(bestSelector, isTransparent[px], ParallelMath::MakeUInt15(1)); + + totalError = totalError + bestError; + selectors = selectors | (ParallelMath::LosslessCast<MUInt16>::Cast(bestSelector) << (px * 2)); + } + + outError = totalError; + outSelectors = selectors; +} + +void cvtt::Internal::ETCComputer::FindBestDifferentialCombination(int flip, int d, const ParallelMath::Int16CompFlag canIgnoreSector[2], ParallelMath::Int16CompFlag& bestIsThisMode, MFloat& bestTotalError, MUInt15& bestFlip, MUInt15& bestD, MUInt15 bestColors[2], MUInt16 bestSelectors[2], MUInt15 bestTables[2], DifferentialResolveStorage &drs) +{ + // We do this part scalar because most of the cost benefit of parallelization is in error evaluation, + // and this code has a LOT of early-outs and disjointed index lookups that vary heavily between blocks + // and save a lot of time. + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + bool canIgnore[2] = { ParallelMath::Extract(canIgnoreSector[0], block), ParallelMath::Extract(canIgnoreSector[1], block) }; + bool canIgnoreEither = canIgnore[0] || canIgnore[1]; + float blockBestTotalError = ParallelMath::Extract(bestTotalError, block); + float bestDiffErrors[2] = { FLT_MAX, FLT_MAX }; + uint16_t bestDiffSelectors[2] = { 0, 0 }; + uint16_t bestDiffColors[2] = { 0, 0 }; + uint16_t bestDiffTables[2] = { 0, 0 }; + for (int sector = 0; sector < 2; sector++) + { + unsigned int sectorNumAttempts = ParallelMath::Extract(drs.diffNumAttempts[sector], block); + for (unsigned int i = 0; i < sectorNumAttempts; i++) + { + float error = ParallelMath::Extract(drs.diffErrors[sector][i], block); + if (error < bestDiffErrors[sector]) + { + bestDiffErrors[sector] = error; + bestDiffSelectors[sector] = ParallelMath::Extract(drs.diffSelectors[sector][i], block); + bestDiffColors[sector] = ParallelMath::Extract(drs.diffColors[sector][i], block); + bestDiffTables[sector] = ParallelMath::Extract(drs.diffTables[sector][i], block); + } + } + } + + if (canIgnore[0]) + bestDiffColors[0] = bestDiffColors[1]; + else if (canIgnore[1]) + bestDiffColors[1] = bestDiffColors[0]; + + // The best differential possibilities must be better than the best total error + if (bestDiffErrors[0] + bestDiffErrors[1] < blockBestTotalError) + { + // Fast path if the best possible case is legal + if (canIgnoreEither || ETCDifferentialIsLegalScalar(bestDiffColors[0], bestDiffColors[1])) + { + ParallelMath::PutBoolInt16(bestIsThisMode, block, true); + ParallelMath::PutFloat(bestTotalError, block, bestDiffErrors[0] + bestDiffErrors[1]); + ParallelMath::PutUInt15(bestFlip, block, flip); + ParallelMath::PutUInt15(bestD, block, d); + for (int sector = 0; sector < 2; sector++) + { + ParallelMath::PutUInt15(bestColors[sector], block, bestDiffColors[sector]); + ParallelMath::PutUInt16(bestSelectors[sector], block, bestDiffSelectors[sector]); + ParallelMath::PutUInt15(bestTables[sector], block, bestDiffTables[sector]); + } + } + else + { + // Slow path: Sort the possible cases by quality, and search valid combinations + // TODO: Pre-flatten the error lists so this is nicer to cache + unsigned int numSortIndexes[2] = { 0, 0 }; + for (int sector = 0; sector < 2; sector++) + { + unsigned int sectorNumAttempts = ParallelMath::Extract(drs.diffNumAttempts[sector], block); + + for (unsigned int i = 0; i < sectorNumAttempts; i++) + { + if (ParallelMath::Extract(drs.diffErrors[sector][i], block) < blockBestTotalError) + drs.attemptSortIndexes[sector][numSortIndexes[sector]++] = i; + } + + struct SortPredicate + { + const MFloat *diffErrors; + int block; + + bool operator()(uint16_t a, uint16_t b) const + { + float errorA = ParallelMath::Extract(diffErrors[a], block); + float errorB = ParallelMath::Extract(diffErrors[b], block); + + if (errorA < errorB) + return true; + if (errorA > errorB) + return false; + + return a < b; + } + }; + + SortPredicate sp; + sp.diffErrors = drs.diffErrors[sector]; + sp.block = block; + + std::sort<uint16_t*, const SortPredicate&>(drs.attemptSortIndexes[sector], drs.attemptSortIndexes[sector] + numSortIndexes[sector], sp); + } + + int scannedElements = 0; + for (unsigned int i = 0; i < numSortIndexes[0]; i++) + { + unsigned int attemptIndex0 = drs.attemptSortIndexes[0][i]; + float error0 = ParallelMath::Extract(drs.diffErrors[0][attemptIndex0], block); + + scannedElements++; + + if (error0 >= blockBestTotalError) + break; + + float maxError1 = ParallelMath::Extract(bestTotalError, block) - error0; + uint16_t diffColor0 = ParallelMath::Extract(drs.diffColors[0][attemptIndex0], block); + + if (maxError1 < bestDiffErrors[1]) + break; + + for (unsigned int j = 0; j < numSortIndexes[1]; j++) + { + unsigned int attemptIndex1 = drs.attemptSortIndexes[1][j]; + float error1 = ParallelMath::Extract(drs.diffErrors[1][attemptIndex1], block); + + scannedElements++; + + if (error1 >= maxError1) + break; + + uint16_t diffColor1 = ParallelMath::Extract(drs.diffColors[1][attemptIndex1], block); + + if (ETCDifferentialIsLegalScalar(diffColor0, diffColor1)) + { + blockBestTotalError = error0 + error1; + + ParallelMath::PutBoolInt16(bestIsThisMode, block, true); + ParallelMath::PutFloat(bestTotalError, block, blockBestTotalError); + ParallelMath::PutUInt15(bestFlip, block, flip); + ParallelMath::PutUInt15(bestD, block, d); + ParallelMath::PutUInt15(bestColors[0], block, diffColor0); + ParallelMath::PutUInt15(bestColors[1], block, diffColor1); + ParallelMath::PutUInt16(bestSelectors[0], block, ParallelMath::Extract(drs.diffSelectors[0][attemptIndex0], block)); + ParallelMath::PutUInt16(bestSelectors[1], block, ParallelMath::Extract(drs.diffSelectors[1][attemptIndex1], block)); + ParallelMath::PutUInt15(bestTables[0], block, ParallelMath::Extract(drs.diffTables[0][attemptIndex0], block)); + ParallelMath::PutUInt15(bestTables[1], block, ParallelMath::Extract(drs.diffTables[1][attemptIndex1], block)); + break; + } + } + } + } + } + } +} + +cvtt::ParallelMath::Int16CompFlag cvtt::Internal::ETCComputer::ETCDifferentialIsLegalForChannel(const MUInt15 &a, const MUInt15 &b) +{ + MSInt16 diff = ParallelMath::LosslessCast<MSInt16>::Cast(b) - ParallelMath::LosslessCast<MSInt16>::Cast(a); + + return ParallelMath::Less(ParallelMath::MakeSInt16(-5), diff) & ParallelMath::Less(diff, ParallelMath::MakeSInt16(4)); +} + +cvtt::ParallelMath::Int16CompFlag cvtt::Internal::ETCComputer::ETCDifferentialIsLegal(const MUInt15 &a, const MUInt15 &b) +{ + MUInt15 mask = ParallelMath::MakeUInt15(31); + + return ETCDifferentialIsLegalForChannel(ParallelMath::RightShift(a, 10), ParallelMath::RightShift(b, 10)) + & ETCDifferentialIsLegalForChannel(ParallelMath::RightShift(a, 5) & mask, ParallelMath::RightShift(b, 5) & mask) + & ETCDifferentialIsLegalForChannel(a & mask, b & mask); +} + +bool cvtt::Internal::ETCComputer::ETCDifferentialIsLegalForChannelScalar(const uint16_t &a, const uint16_t &b) +{ + int16_t diff = static_cast<int16_t>(b) - static_cast<int16_t>(a); + + return (-4 <= diff) && (diff <= 3); +} + +bool cvtt::Internal::ETCComputer::ETCDifferentialIsLegalScalar(const uint16_t &a, const uint16_t &b) +{ + MUInt15 mask = ParallelMath::MakeUInt15(31); + + return ETCDifferentialIsLegalForChannelScalar((a >> 10), (b >> 10)) + & ETCDifferentialIsLegalForChannelScalar((a >> 5) & 31, (b >> 5) & 31) + & ETCDifferentialIsLegalForChannelScalar(a & 31, b & 31); +} + +void cvtt::Internal::ETCComputer::EncodeTMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options) +{ + bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0); + bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0); + + ParallelMath::Int16CompFlag bestIsThisMode = ParallelMath::MakeBoolInt16(false); + + MUInt15 isolatedTotal[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) }; + MUInt15 lineTotal[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) }; + + MUInt15 numPixelsIsolated = ParallelMath::MakeUInt15(0); + + // To speed this up, we compute line total as the sum, then subtract out isolated + for (unsigned int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 3; ch++) + { + isolatedTotal[ch] = isolatedTotal[ch] + ParallelMath::SelectOrZero(isIsolated[px], pixels[px][ch]); + lineTotal[ch] = lineTotal[ch] + pixels[px][ch]; + } + numPixelsIsolated = numPixelsIsolated + ParallelMath::SelectOrZero(isIsolated[px], ParallelMath::MakeUInt15(1)); + } + + for (int ch = 0; ch < 3; ch++) + lineTotal[ch] = lineTotal[ch] - isolatedTotal[ch]; + + MUInt15 numPixelsLine = ParallelMath::MakeUInt15(16) - numPixelsIsolated; + + MUInt15 isolatedAverageQuantized[3]; + MUInt15 isolatedAverageTargets[3]; + { + int divisors[ParallelMath::ParallelSize]; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + divisors[block] = ParallelMath::Extract(numPixelsIsolated, block) * 34; + + MUInt15 addend = (numPixelsIsolated << 4) | numPixelsIsolated; + for (int ch = 0; ch < 3; ch++) + { + // isolatedAverageQuantized[ch] = (isolatedTotal[ch] * 2 + numPixelsIsolated * 17) / (numPixelsIsolated * 34); + + MUInt15 numerator = isolatedTotal[ch] + isolatedTotal[ch]; + if (!isFakeBT709) + numerator = numerator + addend; + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int divisor = divisors[block]; + if (divisor == 0) + ParallelMath::PutUInt15(isolatedAverageQuantized[ch], block, 0); + else + ParallelMath::PutUInt15(isolatedAverageQuantized[ch], block, ParallelMath::Extract(numerator, block) / divisor); + } + + isolatedAverageTargets[ch] = numerator; + } + } + + if (isFakeBT709) + ResolveTHFakeBT709Rounding(isolatedAverageQuantized, isolatedAverageTargets, numPixelsIsolated); + + MUInt15 isolatedColor[3]; + for (int ch = 0; ch < 3; ch++) + isolatedColor[ch] = (isolatedAverageQuantized[ch]) | (isolatedAverageQuantized[ch] << 4); + + MFloat isolatedError[16]; + for (int px = 0; px < 16; px++) + { + if (isFakeBT709) + isolatedError[px] = ComputeErrorFakeBT709(isolatedColor, preWeightedPixels[px]); + else if (isUniform) + isolatedError[px] = ComputeErrorUniform(pixels[px], isolatedColor); + else + isolatedError[px] = ComputeErrorWeighted(isolatedColor, preWeightedPixels[px], options); + } + + MSInt32 bestSelectors = ParallelMath::MakeSInt32(0); + MUInt15 bestTable = ParallelMath::MakeUInt15(0); + MUInt15 bestLineColor = ParallelMath::MakeUInt15(0); + + MSInt16 maxLine = ParallelMath::LosslessCast<MSInt16>::Cast(numPixelsLine); + MSInt16 minLine = ParallelMath::MakeSInt16(0) - maxLine; + + int16_t clusterMaxLine = 0; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int16_t blockMaxLine = ParallelMath::Extract(maxLine, block); + if (blockMaxLine > clusterMaxLine) + clusterMaxLine = blockMaxLine; + } + + int16_t clusterMinLine = -clusterMaxLine; + + int lineDivisors[ParallelMath::ParallelSize]; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + lineDivisors[block] = ParallelMath::Extract(numPixelsLine, block) * 34; + + MUInt15 lineAddend = (numPixelsLine << 4) | numPixelsLine; + + for (int table = 0; table < 8; table++) + { + int numUniqueColors[ParallelMath::ParallelSize]; + MUInt15 uniqueQuantizedColors[31]; + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + numUniqueColors[block] = 0; + + MUInt15 modifier = ParallelMath::MakeUInt15(cvtt::Tables::ETC2::g_thModifierTable[table]); + MUInt15 modifierOffset = (modifier + modifier); + + for (int16_t offsetPremultiplier = clusterMinLine; offsetPremultiplier <= clusterMaxLine; offsetPremultiplier++) + { + MSInt16 clampedOffsetPremultiplier = ParallelMath::Max(minLine, ParallelMath::Min(maxLine, ParallelMath::MakeSInt16(offsetPremultiplier))); + MSInt16 modifierAddend = ParallelMath::CompactMultiply(clampedOffsetPremultiplier, modifierOffset); + + MUInt15 quantized[3]; + if (isFakeBT709) + { + MUInt15 targets[3]; + for (int ch = 0; ch < 3; ch++) + { + //quantized[ch] = std::min<int16_t>(15, std::max(0, (lineTotal[ch] * 2 + modifierOffset * offsetPremultiplier)) / (numDAIILine * 34)); + MUInt15 numerator = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(lineTotal[ch] + lineTotal[ch]) + modifierAddend)); + MUInt15 divided = ParallelMath::MakeUInt15(0); + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int divisor = lineDivisors[block]; + if (divisor == 0) + ParallelMath::PutUInt15(divided, block, 0); + else + ParallelMath::PutUInt15(divided, block, ParallelMath::Extract(numerator, block) / divisor); + } + quantized[ch] = ParallelMath::Min(ParallelMath::MakeUInt15(15), divided); + targets[ch] = numerator; + } + + ResolveTHFakeBT709Rounding(quantized, targets, numPixelsLine); + } + else + { + for (int ch = 0; ch < 3; ch++) + { + //quantized[ch] = std::min<int16_t>(15, std::max(0, (lineTotal[ch] * 2 + numDAIILine * 17 + modifierOffset * offsetPremultiplier)) / (numDAIILine * 34)); + MUInt15 numerator = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(lineTotal[ch] + lineTotal[ch] + lineAddend) + modifierAddend)); + MUInt15 divided = ParallelMath::MakeUInt15(0); + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int divisor = lineDivisors[block]; + if (divisor == 0) + ParallelMath::PutUInt15(divided, block, 0); + else + ParallelMath::PutUInt15(divided, block, ParallelMath::Extract(numerator, block) / divisor); + } + quantized[ch] = ParallelMath::Min(ParallelMath::MakeUInt15(15), divided); + } + } + + MUInt15 packedColor = quantized[0] | (quantized[1] << 5) | (quantized[2] << 10); + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint16_t blockPackedColor = ParallelMath::Extract(packedColor, block); + if (numUniqueColors[block] == 0 || blockPackedColor != ParallelMath::Extract(uniqueQuantizedColors[numUniqueColors[block] - 1], block)) + ParallelMath::PutUInt15(uniqueQuantizedColors[numUniqueColors[block]++], block, blockPackedColor); + } + } + + // Stripe unfilled unique colors + int maxUniqueColors = 0; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + if (numUniqueColors[block] > maxUniqueColors) + maxUniqueColors = numUniqueColors[block]; + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint16_t fillColor = ParallelMath::Extract(uniqueQuantizedColors[0], block); + + int numUnique = numUniqueColors[block]; + for (int fill = numUnique + 1; fill < maxUniqueColors; fill++) + ParallelMath::PutUInt15(uniqueQuantizedColors[fill], block, fillColor); + } + + for (int ci = 0; ci < maxUniqueColors; ci++) + { + MUInt15 lineColors[3][3]; + for (int ch = 0; ch < 3; ch++) + { + MUInt15 quantizedChannel = (ParallelMath::RightShift(uniqueQuantizedColors[ci], (ch * 5)) & ParallelMath::MakeUInt15(15)); + + MUInt15 unquantizedColor = (quantizedChannel << 4) | quantizedChannel; + lineColors[0][ch] = ParallelMath::Min(ParallelMath::MakeUInt15(255), unquantizedColor + modifier); + lineColors[1][ch] = unquantizedColor; + lineColors[2][ch] = ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(unquantizedColor) - ParallelMath::LosslessCast<MSInt16>::Cast(modifier))); + } + + MSInt32 selectors = ParallelMath::MakeSInt32(0); + MFloat error = ParallelMath::MakeFloatZero(); + for (int px = 0; px < 16; px++) + { + MFloat pixelError = isolatedError[px]; + + MUInt15 pixelBestSelector = ParallelMath::MakeUInt15(0); + for (int i = 0; i < 3; i++) + { + MFloat error = isUniform ? ComputeErrorUniform(lineColors[i], pixels[px]) : ComputeErrorWeighted(lineColors[i], preWeightedPixels[px], options); + ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, pixelError); + pixelError = ParallelMath::Min(error, pixelError); + pixelBestSelector = ParallelMath::Select(ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(i + 1), pixelBestSelector); + } + + error = error + pixelError; + selectors = selectors | (ParallelMath::ToInt32(pixelBestSelector) << (px * 2)); + } + + ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, bestError)); + bestError = ParallelMath::Min(error, bestError); + + if (ParallelMath::AnySet(errorBetter)) + { + ParallelMath::ConditionalSet(bestLineColor, errorBetter, uniqueQuantizedColors[ci]); + ParallelMath::ConditionalSet(bestSelectors, errorBetter, selectors); + ParallelMath::ConditionalSet(bestTable, errorBetter, ParallelMath::MakeUInt15(table)); + bestIsThisMode = bestIsThisMode | errorBetter; + } + } + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + if (ParallelMath::Extract(bestIsThisMode, block)) + { + uint32_t lowBits = 0; + uint32_t highBits = 0; + + uint16_t blockBestLineColor = ParallelMath::Extract(bestLineColor, block); + ParallelMath::ScalarUInt16 blockIsolatedAverageQuantized[3]; + + for (int ch = 0; ch < 3; ch++) + blockIsolatedAverageQuantized[ch] = ParallelMath::Extract(isolatedAverageQuantized[ch], block); + + uint16_t blockBestTable = ParallelMath::Extract(bestTable, block); + int32_t blockBestSelectors = ParallelMath::Extract(bestSelectors, block); + + ParallelMath::ScalarUInt16 lineColor[3]; + for (int ch = 0; ch < 3; ch++) + lineColor[ch] = (blockBestLineColor >> (ch * 5)) & 15; + + EmitTModeBlock(outputBuffer + block * 8, lineColor, blockIsolatedAverageQuantized, blockBestSelectors, blockBestTable, true); + } + } +} + +void cvtt::Internal::ETCComputer::EncodeHMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag groupings[16], const MUInt15 pixels[16][3], HModeEval &he, const MFloat preWeightedPixels[16][3], const Options &options) +{ + bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0); + bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0); + + MUInt15 zero15 = ParallelMath::MakeUInt15(0); + + MUInt15 counts[2] = { zero15, zero15 }; + + ParallelMath::Int16CompFlag bestIsThisMode = ParallelMath::MakeBoolInt16(false); + + MUInt15 totals[2][3] = + { + { zero15, zero15, zero15 }, + { zero15, zero15, zero15 } + }; + + for (unsigned int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 3; ch++) + { + totals[0][ch] = totals[0][ch] + pixels[px][ch]; + totals[1][ch] = totals[1][ch] + ParallelMath::SelectOrZero(groupings[px], pixels[px][ch]); + } + counts[1] = counts[1] + ParallelMath::SelectOrZero(groupings[px], ParallelMath::MakeUInt15(1)); + } + + for (int ch = 0; ch < 3; ch++) + totals[0][ch] = totals[0][ch] - totals[1][ch]; + counts[0] = ParallelMath::MakeUInt15(16) - counts[1]; + + MUInt16 bestSectorBits = ParallelMath::MakeUInt16(0); + MUInt16 bestSignBits = ParallelMath::MakeUInt16(0); + MUInt15 bestColors[2] = { zero15, zero15 }; + MUInt15 bestTable = ParallelMath::MakeUInt15(0); + + for (int table = 0; table < 8; table++) + { + MUInt15 numUniqueColors = zero15; + + int modifier = cvtt::Tables::ETC1::g_thModifierTable[table]; + + for (int sector = 0; sector < 2; sector++) + { + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int blockNumUniqueColors = 0; + uint16_t blockUniqueQuantizedColors[31]; + + int maxOffsetMultiplier = ParallelMath::Extract(counts[sector], block); + int minOffsetMultiplier = -maxOffsetMultiplier; + + int modifierOffset = modifier * 2; + + int blockSectorCounts = ParallelMath::Extract(counts[sector], block); + int blockSectorTotals[3]; + for (int ch = 0; ch < 3; ch++) + blockSectorTotals[ch] = ParallelMath::Extract(totals[sector][ch], block); + + for (int offsetPremultiplier = minOffsetMultiplier; offsetPremultiplier <= maxOffsetMultiplier; offsetPremultiplier++) + { + // TODO: This isn't ideal for FakeBT709 + int16_t quantized[3]; + for (int ch = 0; ch < 3; ch++) + { + if (blockSectorCounts == 0) + quantized[ch] = 0; + else + quantized[ch] = std::min<int16_t>(15, std::max<int16_t>(0, (blockSectorTotals[ch] * 2 + blockSectorCounts * 17 + modifierOffset * offsetPremultiplier)) / (blockSectorCounts * 34)); + } + + uint16_t packedColor = (quantized[0] << 10) | (quantized[1] << 5) | quantized[2]; + if (blockNumUniqueColors == 0 || packedColor != blockUniqueQuantizedColors[blockNumUniqueColors - 1]) + { + assert(blockNumUniqueColors < 32); + blockUniqueQuantizedColors[blockNumUniqueColors++] = packedColor; + } + } + + ParallelMath::PutUInt15(he.numUniqueColors[sector], block, blockNumUniqueColors); + + int baseIndex = 0; + if (sector == 1) + baseIndex = ParallelMath::Extract(he.numUniqueColors[0], block); + + for (int i = 0; i < blockNumUniqueColors; i++) + ParallelMath::PutUInt15(he.uniqueQuantizedColors[baseIndex + i], block, blockUniqueQuantizedColors[i]); + } + } + + MUInt15 totalColors = he.numUniqueColors[0] + he.numUniqueColors[1]; + int maxErrorColors = 0; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + maxErrorColors = std::max<int>(maxErrorColors, ParallelMath::Extract(totalColors, block)); + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int lastColor = ParallelMath::Extract(totalColors, block); + uint16_t stripeColor = ParallelMath::Extract(he.uniqueQuantizedColors[0], block); + for (int i = lastColor; i < maxErrorColors; i++) + ParallelMath::PutUInt15(he.uniqueQuantizedColors[i], block, stripeColor); + } + + for (int ci = 0; ci < maxErrorColors; ci++) + { + MUInt15 fifteen = ParallelMath::MakeUInt15(15); + MUInt15 twoFiftyFive = ParallelMath::MakeUInt15(255); + MSInt16 zeroS16 = ParallelMath::MakeSInt16(0); + + MUInt15 colors[2][3]; + for (int ch = 0; ch < 3; ch++) + { + MUInt15 quantizedChannel = ParallelMath::RightShift(he.uniqueQuantizedColors[ci], ((2 - ch) * 5)) & fifteen; + + MUInt15 unquantizedColor = (quantizedChannel << 4) | quantizedChannel; + colors[0][ch] = ParallelMath::Min(twoFiftyFive, unquantizedColor + modifier); + colors[1][ch] = ParallelMath::ToUInt15(ParallelMath::Max(zeroS16, ParallelMath::LosslessCast<MSInt16>::Cast(unquantizedColor) - ParallelMath::MakeSInt16(modifier))); + } + + MUInt16 signBits = ParallelMath::MakeUInt16(0); + for (int px = 0; px < 16; px++) + { + MFloat errors[2]; + for (int i = 0; i < 2; i++) + { + if (isFakeBT709) + errors[i] = ComputeErrorFakeBT709(colors[i], preWeightedPixels[px]); + else if (isUniform) + errors[i] = ComputeErrorUniform(colors[i], pixels[px]); + else + errors[i] = ComputeErrorWeighted(colors[i], preWeightedPixels[px], options); + } + + ParallelMath::Int16CompFlag errorOneLess = ParallelMath::FloatFlagToInt16(ParallelMath::Less(errors[1], errors[0])); + he.errors[ci][px] = ParallelMath::Min(errors[0], errors[1]); + signBits = signBits | ParallelMath::SelectOrZero(errorOneLess, ParallelMath::MakeUInt16(1 << px)); + } + he.signBits[ci] = signBits; + } + + int maxUniqueColorCombos = 0; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int numUniqueColorCombos = ParallelMath::Extract(he.numUniqueColors[0], block) * ParallelMath::Extract(he.numUniqueColors[1], block); + if (numUniqueColorCombos > maxUniqueColorCombos) + maxUniqueColorCombos = numUniqueColorCombos; + } + + MUInt15 indexes[2] = { zero15, zero15 }; + MUInt15 maxIndex[2] = { he.numUniqueColors[0] - ParallelMath::MakeUInt15(1), he.numUniqueColors[1] - ParallelMath::MakeUInt15(1) }; + + int block1Starts[ParallelMath::ParallelSize]; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + block1Starts[block] = ParallelMath::Extract(he.numUniqueColors[0], block); + + for (int combo = 0; combo < maxUniqueColorCombos; combo++) + { + MUInt15 index0 = indexes[0] + ParallelMath::MakeUInt15(1); + ParallelMath::Int16CompFlag index0Overflow = ParallelMath::Less(maxIndex[0], index0); + ParallelMath::ConditionalSet(index0, index0Overflow, ParallelMath::MakeUInt15(0)); + + MUInt15 index1 = ParallelMath::Min(maxIndex[1], indexes[1] + ParallelMath::SelectOrZero(index0Overflow, ParallelMath::MakeUInt15(1))); + indexes[0] = index0; + indexes[1] = index1; + + int ci0[ParallelMath::ParallelSize]; + int ci1[ParallelMath::ParallelSize]; + MUInt15 color0; + MUInt15 color1; + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + ci0[block] = ParallelMath::Extract(index0, block); + ci1[block] = ParallelMath::Extract(index1, block) + block1Starts[block]; + ParallelMath::PutUInt15(color0, block, ParallelMath::Extract(he.uniqueQuantizedColors[ci0[block]], block)); + ParallelMath::PutUInt15(color1, block, ParallelMath::Extract(he.uniqueQuantizedColors[ci1[block]], block)); + } + + MFloat totalError = ParallelMath::MakeFloatZero(); + MUInt16 sectorBits = ParallelMath::MakeUInt16(0); + MUInt16 signBits = ParallelMath::MakeUInt16(0); + for (int px = 0; px < 16; px++) + { + MFloat errorCI0; + MFloat errorCI1; + MUInt16 signBits0; + MUInt16 signBits1; + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + ParallelMath::PutFloat(errorCI0, block, ParallelMath::Extract(he.errors[ci0[block]][px], block)); + ParallelMath::PutFloat(errorCI1, block, ParallelMath::Extract(he.errors[ci1[block]][px], block)); + ParallelMath::PutUInt16(signBits0, block, ParallelMath::Extract(he.signBits[ci0[block]], block)); + ParallelMath::PutUInt16(signBits1, block, ParallelMath::Extract(he.signBits[ci1[block]], block)); + } + + totalError = totalError + ParallelMath::Min(errorCI0, errorCI1); + + MUInt16 bitPosition = ParallelMath::MakeUInt16(1 << px); + + ParallelMath::Int16CompFlag error1Better = ParallelMath::FloatFlagToInt16(ParallelMath::Less(errorCI1, errorCI0)); + + sectorBits = sectorBits | ParallelMath::SelectOrZero(error1Better, bitPosition); + signBits = signBits | (bitPosition & ParallelMath::Select(error1Better, signBits1, signBits0)); + } + + ParallelMath::FloatCompFlag totalErrorBetter = ParallelMath::Less(totalError, bestError); + ParallelMath::Int16CompFlag totalErrorBetter16 = ParallelMath::FloatFlagToInt16(totalErrorBetter); + if (ParallelMath::AnySet(totalErrorBetter16)) + { + bestIsThisMode = bestIsThisMode | totalErrorBetter16; + ParallelMath::ConditionalSet(bestTable, totalErrorBetter16, ParallelMath::MakeUInt15(table)); + ParallelMath::ConditionalSet(bestColors[0], totalErrorBetter16, color0); + ParallelMath::ConditionalSet(bestColors[1], totalErrorBetter16, color1); + ParallelMath::ConditionalSet(bestSectorBits, totalErrorBetter16, sectorBits); + ParallelMath::ConditionalSet(bestSignBits, totalErrorBetter16, signBits); + bestError = ParallelMath::Min(totalError, bestError); + } + } + } + + if (ParallelMath::AnySet(bestIsThisMode)) + { + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + if (!ParallelMath::Extract(bestIsThisMode, block)) + continue; + + ParallelMath::ScalarUInt16 blockBestColors[2] = { ParallelMath::Extract(bestColors[0], block), ParallelMath::Extract(bestColors[1], block) }; + ParallelMath::ScalarUInt16 blockBestSectorBits = ParallelMath::Extract(bestSectorBits, block); + ParallelMath::ScalarUInt16 blockBestSignBits = ParallelMath::Extract(bestSignBits, block); + ParallelMath::ScalarUInt16 blockBestTable = ParallelMath::Extract(bestTable, block); + + EmitHModeBlock(outputBuffer + block * 8, blockBestColors, blockBestSectorBits, blockBestSignBits, blockBestTable, true); + } + } +} + +void cvtt::Internal::ETCComputer::EncodeVirtualTModePunchthrough(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolatedBase[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], const ParallelMath::Int16CompFlag& anyTransparent, const ParallelMath::Int16CompFlag& allTransparent, const Options &options) +{ + // We treat T and H mode as the same mode ("Virtual T mode") with punchthrough, because of how the colors work: + // + // T mode: C1, C2+M, Transparent, C2-M + // H mode: C1+M, C1-M, Transparent, C2-M + // + // So in either case, we have 2 colors +/- a modifier, and a third unique color, which is basically T mode except without the middle color. + // The only thing that matters is whether it's better to store the isolated color as T mode color 1, or store it offset in H mode color 2. + // + // Sometimes it won't even be possible to store it in H mode color 2 because the table low bit derives from a numeric comparison of the colors, + // but unlike opaque blocks, we can't flip them. + bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0); + bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0); + + ParallelMath::FloatCompFlag isTransparentF[16]; + for (int px = 0; px < 16; px++) + isTransparentF[px] = ParallelMath::Int16FlagToFloat(isTransparent[px]); + + ParallelMath::Int16CompFlag bestIsThisMode = ParallelMath::MakeBoolInt16(false); + ParallelMath::Int16CompFlag bestIsHMode = ParallelMath::MakeBoolInt16(false); + + MUInt15 isolatedTotal[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) }; + MUInt15 lineTotal[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) }; + + MUInt15 numPixelsIsolated = ParallelMath::MakeUInt15(0); + MUInt15 numPixelsLine = ParallelMath::MakeUInt15(0); + + ParallelMath::Int16CompFlag isIsolated[16]; + ParallelMath::Int16CompFlag isLine[16]; + + for (unsigned int px = 0; px < 16; px++) + { + ParallelMath::Int16CompFlag isOpaque = ParallelMath::Not(isTransparent[px]); + isIsolated[px] = isIsolatedBase[px] & isOpaque; + isLine[px] = ParallelMath::Not(isIsolatedBase[px]) & isOpaque; + } + + for (unsigned int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 3; ch++) + { + isolatedTotal[ch] = isolatedTotal[ch] + ParallelMath::SelectOrZero(isIsolated[px], pixels[px][ch]); + lineTotal[ch] = lineTotal[ch] + ParallelMath::SelectOrZero(isLine[px], pixels[px][ch]); + } + numPixelsIsolated = numPixelsIsolated + ParallelMath::SelectOrZero(isIsolated[px], ParallelMath::MakeUInt15(1)); + numPixelsLine = numPixelsLine + ParallelMath::SelectOrZero(isLine[px], ParallelMath::MakeUInt15(1)); + } + + MUInt15 isolatedAverageQuantized[3]; + MUInt15 hModeIsolatedQuantized[8][3]; + MUInt15 isolatedAverageTargets[3]; + { + int divisors[ParallelMath::ParallelSize]; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + divisors[block] = ParallelMath::Extract(numPixelsIsolated, block) * 34; + + MUInt15 addend = (numPixelsIsolated << 4) | numPixelsIsolated; + for (int ch = 0; ch < 3; ch++) + { + // isolatedAverageQuantized[ch] = (isolatedTotal[ch] * 2 + numPixelsIsolated * 17) / (numPixelsIsolated * 34); + + MUInt15 numerator = isolatedTotal[ch] + isolatedTotal[ch]; + if (!isFakeBT709) + numerator = numerator + addend; + + MUInt15 hModeIsolatedNumerators[8]; + for (int table = 0; table < 8; table++) + { + // FIXME: Handle fake BT.709 correctly + MUInt15 offsetTotal = isolatedTotal[ch] + ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(ParallelMath::MakeUInt15(cvtt::Tables::ETC2::g_thModifierTable[table]), numPixelsIsolated)); + + hModeIsolatedNumerators[table] = (offsetTotal + offsetTotal) + addend; + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int divisor = divisors[block]; + if (divisor == 0) + { + ParallelMath::PutUInt15(isolatedAverageQuantized[ch], block, 0); + for (int table = 0; table < 8; table++) + ParallelMath::PutUInt15(hModeIsolatedQuantized[table][ch], block, 0); + } + else + { + ParallelMath::PutUInt15(isolatedAverageQuantized[ch], block, ParallelMath::Extract(numerator, block) / divisor); + for (int table = 0; table < 8; table++) + ParallelMath::PutUInt15(hModeIsolatedQuantized[table][ch], block, ParallelMath::Extract(hModeIsolatedNumerators[table], block) / divisor); + } + } + + isolatedAverageTargets[ch] = numerator; + } + } + + if (isFakeBT709) + ResolveTHFakeBT709Rounding(isolatedAverageQuantized, isolatedAverageTargets, numPixelsIsolated); + + for (int table = 0; table < 8; table++) + for (int ch = 0; ch < 3; ch++) + hModeIsolatedQuantized[table][ch] = ParallelMath::Min(ParallelMath::MakeUInt15(15), hModeIsolatedQuantized[table][ch]); + + MUInt15 isolatedColor[3]; + for (int ch = 0; ch < 3; ch++) + isolatedColor[ch] = (isolatedAverageQuantized[ch]) | (isolatedAverageQuantized[ch] << 4); + + MFloat isolatedError[16]; + for (int px = 0; px < 16; px++) + { + if (isFakeBT709) + isolatedError[px] = ComputeErrorFakeBT709(isolatedColor, preWeightedPixels[px]); + else if (isUniform) + isolatedError[px] = ComputeErrorUniform(pixels[px], isolatedColor); + else + isolatedError[px] = ComputeErrorWeighted(isolatedColor, preWeightedPixels[px], options); + + ParallelMath::ConditionalSet(isolatedError[px], isTransparentF[px], ParallelMath::MakeFloatZero()); + } + + MSInt32 bestSelectors = ParallelMath::MakeSInt32(0); + MUInt15 bestTable = ParallelMath::MakeUInt15(0); + MUInt15 bestLineColor = ParallelMath::MakeUInt15(0); + MUInt15 bestIsolatedColor = ParallelMath::MakeUInt15(0); + MUInt15 bestHModeColor2 = ParallelMath::MakeUInt15(0); + ParallelMath::Int16CompFlag bestUseHMode = ParallelMath::MakeBoolInt16(false); + + MSInt16 maxLine = ParallelMath::LosslessCast<MSInt16>::Cast(numPixelsLine); + MSInt16 minLine = ParallelMath::MakeSInt16(0) - maxLine; + + int16_t clusterMaxLine = 0; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int16_t blockMaxLine = ParallelMath::Extract(maxLine, block); + if (blockMaxLine > clusterMaxLine) + clusterMaxLine = blockMaxLine; + } + + int16_t clusterMinLine = -clusterMaxLine; + + int lineDivisors[ParallelMath::ParallelSize]; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + lineDivisors[block] = ParallelMath::Extract(numPixelsLine, block) * 34; + + MUInt15 lineAddend = (numPixelsLine << 4) | numPixelsLine; + + for (int table = 0; table < 8; table++) + { + int numUniqueColors[ParallelMath::ParallelSize]; + MUInt15 uniqueQuantizedColors[31]; + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + numUniqueColors[block] = 0; + + MUInt15 modifier = ParallelMath::MakeUInt15(cvtt::Tables::ETC2::g_thModifierTable[table]); + MUInt15 modifierOffset = (modifier + modifier); + + for (int16_t offsetPremultiplier = clusterMinLine; offsetPremultiplier <= clusterMaxLine; offsetPremultiplier += 2) + { + MSInt16 clampedOffsetPremultiplier = ParallelMath::Max(minLine, ParallelMath::Min(maxLine, ParallelMath::MakeSInt16(offsetPremultiplier))); + MSInt16 modifierAddend = ParallelMath::CompactMultiply(clampedOffsetPremultiplier, modifierOffset); + + MUInt15 quantized[3]; + if (isFakeBT709) + { + MUInt15 targets[3]; + for (int ch = 0; ch < 3; ch++) + { + //quantized[ch] = std::min<int16_t>(15, std::max(0, (lineTotal[ch] * 2 + modifierOffset * offsetPremultiplier)) / (numDAIILine * 34)); + MUInt15 numerator = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(lineTotal[ch] + lineTotal[ch]) + modifierAddend)); + MUInt15 divided = ParallelMath::MakeUInt15(0); + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int divisor = lineDivisors[block]; + if (divisor == 0) + ParallelMath::PutUInt15(divided, block, 0); + else + ParallelMath::PutUInt15(divided, block, ParallelMath::Extract(numerator, block) / divisor); + } + quantized[ch] = ParallelMath::Min(ParallelMath::MakeUInt15(15), divided); + targets[ch] = numerator; + } + + ResolveTHFakeBT709Rounding(quantized, targets, numPixelsLine); + } + else + { + for (int ch = 0; ch < 3; ch++) + { + //quantized[ch] = std::min<int16_t>(15, std::max(0, (lineTotal[ch] * 2 + numDAIILine * 17 + modifierOffset * offsetPremultiplier)) / (numDAIILine * 34)); + MUInt15 numerator = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(lineTotal[ch] + lineTotal[ch] + lineAddend) + modifierAddend)); + MUInt15 divided = ParallelMath::MakeUInt15(0); + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int divisor = lineDivisors[block]; + if (divisor == 0) + ParallelMath::PutUInt15(divided, block, 0); + else + ParallelMath::PutUInt15(divided, block, ParallelMath::Extract(numerator, block) / divisor); + } + quantized[ch] = ParallelMath::Min(ParallelMath::MakeUInt15(15), divided); + } + } + + MUInt15 packedColor = (quantized[0] << 10) | (quantized[1] << 5) | quantized[2]; + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint16_t blockPackedColor = ParallelMath::Extract(packedColor, block); + if (numUniqueColors[block] == 0 || blockPackedColor != ParallelMath::Extract(uniqueQuantizedColors[numUniqueColors[block] - 1], block)) + ParallelMath::PutUInt15(uniqueQuantizedColors[numUniqueColors[block]++], block, blockPackedColor); + } + } + + // Stripe unfilled unique colors + int maxUniqueColors = 0; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + if (numUniqueColors[block] > maxUniqueColors) + maxUniqueColors = numUniqueColors[block]; + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint16_t fillColor = ParallelMath::Extract(uniqueQuantizedColors[0], block); + + int numUnique = numUniqueColors[block]; + for (int fill = numUnique + 1; fill < maxUniqueColors; fill++) + ParallelMath::PutUInt15(uniqueQuantizedColors[fill], block, fillColor); + } + + MFloat hModeErrors[16]; + MUInt15 hModeUnquantizedColor[3]; + for (int ch = 0; ch < 3; ch++) + { + MUInt15 quantizedChannel = hModeIsolatedQuantized[table][ch]; + + MUInt15 unquantizedCh = (quantizedChannel << 4) | quantizedChannel; + hModeUnquantizedColor[ch] = ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(unquantizedCh) - ParallelMath::LosslessCast<MSInt16>::Cast(modifier))); + } + + for (int px = 0; px < 16; px++) + { + hModeErrors[px] = isUniform ? ComputeErrorUniform(hModeUnquantizedColor, pixels[px]) : ComputeErrorWeighted(hModeUnquantizedColor, preWeightedPixels[px], options); + ParallelMath::ConditionalSet(hModeErrors[px], isTransparentF[px], ParallelMath::MakeFloatZero()); + } + + MUInt15 packedHModeColor2 = (hModeIsolatedQuantized[table][0] << 10) | (hModeIsolatedQuantized[table][1] << 5) | hModeIsolatedQuantized[table][2]; + ParallelMath::Int16CompFlag tableLowBitIsZero = ((table & 1) == 0) ? ParallelMath::MakeBoolInt16(true) : ParallelMath::MakeBoolInt16(false); + + for (int ci = 0; ci < maxUniqueColors; ci++) + { + MUInt15 lineColors[2][3]; + for (int ch = 0; ch < 3; ch++) + { + MUInt15 quantizedChannel = (ParallelMath::RightShift(uniqueQuantizedColors[ci], 10 - (ch * 5)) & ParallelMath::MakeUInt15(15)); + + MUInt15 unquantizedColor = (quantizedChannel << 4) | quantizedChannel; + lineColors[0][ch] = ParallelMath::Min(ParallelMath::MakeUInt15(255), unquantizedColor + modifier); + lineColors[1][ch] = ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::MakeSInt16(0), ParallelMath::LosslessCast<MSInt16>::Cast(unquantizedColor) - ParallelMath::LosslessCast<MSInt16>::Cast(modifier))); + } + + MUInt15 bestLineSelector[16]; + MFloat bestLineError[16]; + for (int px = 0; px < 16; px++) + { + MFloat lineErrors[2]; + for (int i = 0; i < 2; i++) + lineErrors[i] = isUniform ? ComputeErrorUniform(lineColors[i], pixels[px]) : ComputeErrorWeighted(lineColors[i], preWeightedPixels[px], options); + + ParallelMath::Int16CompFlag firstIsBetter = ParallelMath::FloatFlagToInt16(ParallelMath::LessOrEqual(lineErrors[0], lineErrors[1])); + bestLineSelector[px] = ParallelMath::Select(firstIsBetter, ParallelMath::MakeUInt15(1), ParallelMath::MakeUInt15(3)); + bestLineError[px] = ParallelMath::Min(lineErrors[0], lineErrors[1]); + + ParallelMath::ConditionalSet(bestLineError[px], isTransparentF[px], ParallelMath::MakeFloatZero()); + } + + // One case considered here was if it was possible to force H mode to be valid when the line color is unused. + // That case isn't actually useful because it's equivalent to the isolated color being unused at maximum offset, + // which is always checked after a swap. + MFloat tModeError = ParallelMath::MakeFloatZero(); + MFloat hModeError = ParallelMath::MakeFloatZero(); + for (int px = 0; px < 16; px++) + { + tModeError = tModeError + ParallelMath::Min(bestLineError[px], isolatedError[px]); + hModeError = hModeError + ParallelMath::Min(bestLineError[px], hModeErrors[px]); + } + + ParallelMath::FloatCompFlag hLessError = ParallelMath::Less(hModeError, tModeError); + + MUInt15 packedHModeColor1 = uniqueQuantizedColors[ci]; + + ParallelMath::Int16CompFlag hModeTableLowBitMustBeZero = ParallelMath::Less(packedHModeColor1, packedHModeColor2); + + ParallelMath::Int16CompFlag hModeIsLegal = ParallelMath::Equal(hModeTableLowBitMustBeZero, tableLowBitIsZero); + ParallelMath::Int16CompFlag useHMode = ParallelMath::FloatFlagToInt16(hLessError) & hModeIsLegal; + + MFloat roundBestError = tModeError; + ParallelMath::ConditionalSet(roundBestError, ParallelMath::Int16FlagToFloat(useHMode), hModeError); + + ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(roundBestError, bestError)); + ParallelMath::FloatCompFlag useHModeF = ParallelMath::Int16FlagToFloat(useHMode); + + if (ParallelMath::AnySet(errorBetter)) + { + MSInt32 selectors = ParallelMath::MakeSInt32(0); + for (int px = 0; px < 16; px++) + { + MUInt15 selector = bestLineSelector[px]; + + MFloat isolatedPixelError = ParallelMath::Select(useHModeF, hModeErrors[px], isolatedError[px]); + ParallelMath::Int16CompFlag isolatedBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(isolatedPixelError, bestLineError[px])); + + ParallelMath::ConditionalSet(selector, isolatedBetter, ParallelMath::MakeUInt15(0)); + ParallelMath::ConditionalSet(selector, isTransparent[px], ParallelMath::MakeUInt15(2)); + selectors = selectors | (ParallelMath::ToInt32(selector) << (px * 2)); + } + + bestError = ParallelMath::Min(bestError, roundBestError); + ParallelMath::ConditionalSet(bestLineColor, errorBetter, uniqueQuantizedColors[ci]); + ParallelMath::ConditionalSet(bestSelectors, errorBetter, selectors); + ParallelMath::ConditionalSet(bestTable, errorBetter, ParallelMath::MakeUInt15(table)); + ParallelMath::ConditionalSet(bestIsHMode, errorBetter, useHMode); + ParallelMath::ConditionalSet(bestHModeColor2, errorBetter, packedHModeColor2); + + bestIsThisMode = bestIsThisMode | errorBetter; + } + } + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + if (ParallelMath::Extract(bestIsThisMode, block)) + { + uint32_t lowBits = 0; + uint32_t highBits = 0; + + uint16_t blockBestLineColor = ParallelMath::Extract(bestLineColor, block); + ParallelMath::ScalarUInt16 blockIsolatedAverageQuantized[3]; + + for (int ch = 0; ch < 3; ch++) + blockIsolatedAverageQuantized[ch] = ParallelMath::Extract(isolatedAverageQuantized[ch], block); + + uint16_t blockBestTable = ParallelMath::Extract(bestTable, block); + int32_t blockBestSelectors = ParallelMath::Extract(bestSelectors, block); + + ParallelMath::ScalarUInt16 lineColor[3]; + for (int ch = 0; ch < 3; ch++) + lineColor[ch] = (blockBestLineColor >> (10 - (ch * 5))) & 15; + + if (ParallelMath::Extract(bestIsHMode, block)) + { + // T mode: C1, C2+M, Transparent, C2-M + // H mode: C1+M, C1-M, Transparent, C2-M + static const ParallelMath::ScalarUInt16 selectorRemapSector[4] = { 1, 0, 1, 0 }; + static const ParallelMath::ScalarUInt16 selectorRemapSign[4] = { 1, 0, 0, 1 }; + + // Remap selectors + ParallelMath::ScalarUInt16 signBits = 0; + ParallelMath::ScalarUInt16 sectorBits = 0; + int32_t blockBestSelectors = ParallelMath::Extract(bestSelectors, block); + for (int px = 0; px < 16; px++) + { + int32_t selector = (blockBestSelectors >> (px * 2)) & 3; + sectorBits |= (selectorRemapSector[selector] << px); + signBits |= (selectorRemapSign[selector] << px); + } + + ParallelMath::ScalarUInt16 blockColors[2] = { blockBestLineColor, ParallelMath::Extract(bestHModeColor2, block) }; + + EmitHModeBlock(outputBuffer + block * 8, blockColors, sectorBits, signBits, blockBestTable, false); + } + else + EmitTModeBlock(outputBuffer + block * 8, lineColor, blockIsolatedAverageQuantized, blockBestSelectors, blockBestTable, false); + } + } +} + + +cvtt::ParallelMath::UInt15 cvtt::Internal::ETCComputer::DecodePlanarCoeff(const MUInt15 &coeff, int ch) +{ + if (ch == 1) + return (coeff << 1) | (ParallelMath::RightShift(coeff, 6)); + else + return (coeff << 2) | (ParallelMath::RightShift(coeff, 4)); +} + +void cvtt::Internal::ETCComputer::EncodePlanar(uint8_t *outputBuffer, MFloat &bestError, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options) +{ + // NOTE: If it's desired to do this in another color space, the best way to do it would probably be + // to do everything in that color space and then transform it back to RGB. + + // We compute H = (H-O)/4 and V= (V-O)/4 to simplify the math + + // error = (x*H + y*V + O - C)^2 + MFloat h[3] = { ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero() }; + MFloat v[3] = { ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero() }; + MFloat o[3] = { ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero() }; + + bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0); + bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0); + + MFloat totalError = ParallelMath::MakeFloatZero(); + MUInt15 bestCoeffs[3][3]; // [Channel][Coeff] + for (int ch = 0; ch < 3; ch++) + { + float fhh = 0.f; + float fho = 0.f; + float fhv = 0.f; + float foo = 0.f; + float fov = 0.f; + float fvv = 0.f; + MFloat fc = ParallelMath::MakeFloatZero(); + MFloat fh = ParallelMath::MakeFloatZero(); + MFloat fv = ParallelMath::MakeFloatZero(); + MFloat fo = ParallelMath::MakeFloatZero(); + + float &foh = fho; + float &fvh = fhv; + float &fvo = fov; + + for (int px = 0; px < 16; px++) + { + float x = static_cast<float>(px % 4); + float y = static_cast<float>(px / 4); + MFloat c = isFakeBT709 ? preWeightedPixels[px][ch] : ParallelMath::ToFloat(pixels[px][ch]); + + // (x*H + y*V + O - C)^2 + fhh += x * x; + fhv += x * y; + fho += x; + fh = fh - c * x; + + fvh += y * x; + fvv += y * y; + fvo += y; + fv = fv - c * y; + + foh += x; + fov += y; + foo += 1; + fo = fo - c; + + fh = fh - c * x; + fv = fv - c * y; + fo = fo - c; + fc = fc + c * c; + } + + //float totalError = fhh * h * h + fho * h*o + fhv * h*v + foo * o * o + fov * o*v + fvv * v * v + fh * h + fv * v + fo * o + fc; + + // error = fhh*h^2 + fho*h*o + fhv*h*v + foo*o^2 + fov*o*v + fvv*v^2 + fh*h + fv*v + fo*o + fc + // derror/dh = 2*fhh*h + fho*o + fhv*v + fh + // derror/dv = fhv*h + fov*o + 2*fvv*v + fv + // derror/do = fho*h + 2*foo*o + fov*v + fo + + // Solve system of equations + // h o v 1 = 0 + // ------- + // d e f g R0 + // i j k l R1 + // m n p q R2 + + float d = 2.0f * fhh; + float e = fho; + float f = fhv; + MFloat gD = fh; + + float i = fhv; + float j = fov; + float k = 2.0f * fvv; + MFloat lD = fv; + + float m = fho; + float n = 2.0f * foo; + float p = fov; + MFloat qD = fo; + + { + // Factor out first column from R1 and R2 + float r0to1 = -i / d; + float r0to2 = -m / d; + + // 0 j1 k1 l1D + float j1 = j + r0to1 * e; + float k1 = k + r0to1 * f; + MFloat l1D = lD + gD * r0to1; + + // 0 n1 p1 q1D + float n1 = n + r0to2 * e; + float p1 = p + r0to2 * f; + MFloat q1D = qD + gD * r0to2; + + // Factor out third column from R2 + float r1to2 = -p1 / k1; + + // 0 n2 0 q2D + float n2 = n1 + r1to2 * j1; + MFloat q2D = q1D + l1D * r1to2; + + o[ch] = -q2D / n2; + + // Factor out second column from R1 + // 0 n2 0 q2D + + float r2to1 = -j1 / n2; + + // 0 0 k1 l2D + // 0 n2 0 q2D + MFloat l2D = l1D + q2D * r2to1; + + float elim2 = -f / k1; + float elim1 = -e / n2; + + // d 0 0 g2D + MFloat g2D = gD + l2D * elim2 + q2D * elim1; + + // n2*o + q2 = 0 + // o = -q2 / n2 + h[ch] = -g2D / d; + v[ch] = -l2D / k1; + } + + // Undo the local transformation + h[ch] = h[ch] * 4.0f + o[ch]; + v[ch] = v[ch] * 4.0f + o[ch]; + } + + if (isFakeBT709) + { + MFloat oRGB[3]; + MFloat hRGB[3]; + MFloat vRGB[3]; + + ConvertFromFakeBT709(oRGB, o); + ConvertFromFakeBT709(hRGB, h); + ConvertFromFakeBT709(vRGB, v); + + // Twiddling in fake BT.607 is a mess, just round off for now (the precision is pretty good anyway) + { + ParallelMath::RoundTowardNearestForScope rtn; + + for (int ch = 0; ch < 3; ch++) + { + MFloat fcoeffs[3] = { oRGB[ch], hRGB[ch], vRGB[ch] }; + + for (int c = 0; c < 3; c++) + { + MFloat coeff = ParallelMath::Max(ParallelMath::MakeFloatZero(), fcoeffs[c]); + if (ch == 1) + coeff = ParallelMath::Min(ParallelMath::MakeFloat(127.0f), coeff * (127.0f / 255.0f)); + else + coeff = ParallelMath::Min(ParallelMath::MakeFloat(63.0f), coeff * (63.0f / 255.0f)); + fcoeffs[c] = coeff; + } + + for (int c = 0; c < 3; c++) + bestCoeffs[ch][c] = ParallelMath::RoundAndConvertToU15(fcoeffs[c], &rtn); + } + } + + MUInt15 reconstructed[16][3]; + for (int ch = 0; ch < 3; ch++) + { + MUInt15 dO = DecodePlanarCoeff(bestCoeffs[ch][0], ch); + MUInt15 dH = DecodePlanarCoeff(bestCoeffs[ch][1], ch); + MUInt15 dV = DecodePlanarCoeff(bestCoeffs[ch][2], ch); + + MSInt16 hMinusO = ParallelMath::LosslessCast<MSInt16>::Cast(dH) - ParallelMath::LosslessCast<MSInt16>::Cast(dO); + MSInt16 vMinusO = ParallelMath::LosslessCast<MSInt16>::Cast(dV) - ParallelMath::LosslessCast<MSInt16>::Cast(dO); + + MFloat error = ParallelMath::MakeFloatZero(); + + MSInt16 addend = ParallelMath::LosslessCast<MSInt16>::Cast(dO << 2) + 2; + + for (int px = 0; px < 16; px++) + { + MUInt15 pxv = ParallelMath::MakeUInt15(px); + MSInt16 x = ParallelMath::LosslessCast<MSInt16>::Cast(pxv & ParallelMath::MakeUInt15(3)); + MSInt16 y = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RightShift(pxv, 2)); + + MSInt16 interpolated = ParallelMath::RightShift(ParallelMath::CompactMultiply(x, hMinusO) + ParallelMath::CompactMultiply(y, vMinusO) + addend, 2); + MUInt15 clampedLow = ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::MakeSInt16(0), interpolated)); + reconstructed[px][ch] = ParallelMath::Min(ParallelMath::MakeUInt15(255), clampedLow); + } + } + + totalError = ParallelMath::MakeFloatZero(); + for (int px = 0; px < 16; px++) + totalError = totalError + ComputeErrorFakeBT709(reconstructed[px], preWeightedPixels[px]); + } + else + { + for (int ch = 0; ch < 3; ch++) + { + MFloat fcoeffs[3] = { o[ch], h[ch], v[ch] }; + MUInt15 coeffRanges[3][2]; + + for (int c = 0; c < 3; c++) + { + MFloat coeff = ParallelMath::Max(ParallelMath::MakeFloatZero(), fcoeffs[c]); + if (ch == 1) + coeff = ParallelMath::Min(ParallelMath::MakeFloat(127.0f), coeff * (127.0f / 255.0f)); + else + coeff = ParallelMath::Min(ParallelMath::MakeFloat(63.0f), coeff * (63.0f / 255.0f)); + fcoeffs[c] = coeff; + } + + { + ParallelMath::RoundDownForScope rd; + for (int c = 0; c < 3; c++) + coeffRanges[c][0] = ParallelMath::RoundAndConvertToU15(fcoeffs[c], &rd); + } + + { + ParallelMath::RoundUpForScope ru; + for (int c = 0; c < 3; c++) + coeffRanges[c][1] = ParallelMath::RoundAndConvertToU15(fcoeffs[c], &ru); + } + + MFloat bestChannelError = ParallelMath::MakeFloat(FLT_MAX); + for (int io = 0; io < 2; io++) + { + MUInt15 dO = DecodePlanarCoeff(coeffRanges[0][io], ch); + + for (int ih = 0; ih < 2; ih++) + { + MUInt15 dH = DecodePlanarCoeff(coeffRanges[1][ih], ch); + MSInt16 hMinusO = ParallelMath::LosslessCast<MSInt16>::Cast(dH) - ParallelMath::LosslessCast<MSInt16>::Cast(dO); + + for (int iv = 0; iv < 2; iv++) + { + MUInt15 dV = DecodePlanarCoeff(coeffRanges[2][iv], ch); + MSInt16 vMinusO = ParallelMath::LosslessCast<MSInt16>::Cast(dV) - ParallelMath::LosslessCast<MSInt16>::Cast(dO); + + MFloat error = ParallelMath::MakeFloatZero(); + + MSInt16 addend = ParallelMath::LosslessCast<MSInt16>::Cast(dO << 2) + 2; + + for (int px = 0; px < 16; px++) + { + MUInt15 pxv = ParallelMath::MakeUInt15(px); + MSInt16 x = ParallelMath::LosslessCast<MSInt16>::Cast(pxv & ParallelMath::MakeUInt15(3)); + MSInt16 y = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RightShift(pxv, 2)); + + MSInt16 interpolated = ParallelMath::RightShift(ParallelMath::CompactMultiply(x, hMinusO) + ParallelMath::CompactMultiply(y, vMinusO) + addend, 2); + MUInt15 clampedLow = ParallelMath::ToUInt15(ParallelMath::Max(ParallelMath::MakeSInt16(0), interpolated)); + MUInt15 dec = ParallelMath::Min(ParallelMath::MakeUInt15(255), clampedLow); + + MSInt16 delta = ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][ch]) - ParallelMath::LosslessCast<MSInt16>::Cast(dec); + + MFloat deltaF = ParallelMath::ToFloat(delta); + error = error + deltaF * deltaF; + } + + ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, bestChannelError)); + if (ParallelMath::AnySet(errorBetter)) + { + bestChannelError = ParallelMath::Min(error, bestChannelError); + ParallelMath::ConditionalSet(bestCoeffs[ch][0], errorBetter, coeffRanges[0][io]); + ParallelMath::ConditionalSet(bestCoeffs[ch][1], errorBetter, coeffRanges[1][ih]); + ParallelMath::ConditionalSet(bestCoeffs[ch][2], errorBetter, coeffRanges[2][iv]); + } + } + } + } + + if (!isUniform) + { + switch (ch) + { + case 0: + bestChannelError = bestChannelError * (options.redWeight * options.redWeight); + break; + case 1: + bestChannelError = bestChannelError * (options.greenWeight * options.greenWeight); + break; + case 2: + bestChannelError = bestChannelError * (options.blueWeight * options.blueWeight); + break; + default: + break; + } + } + + totalError = totalError + bestChannelError; + } + } + + ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(totalError, bestError)); + if (ParallelMath::AnySet(errorBetter)) + { + bestError = ParallelMath::Min(bestError, totalError); + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + if (!ParallelMath::Extract(errorBetter, block)) + continue; + + int ro = ParallelMath::Extract(bestCoeffs[0][0], block); + int rh = ParallelMath::Extract(bestCoeffs[0][1], block); + int rv = ParallelMath::Extract(bestCoeffs[0][2], block); + + int go = ParallelMath::Extract(bestCoeffs[1][0], block); + int gh = ParallelMath::Extract(bestCoeffs[1][1], block); + int gv = ParallelMath::Extract(bestCoeffs[1][2], block); + + int bo = ParallelMath::Extract(bestCoeffs[2][0], block); + int bh = ParallelMath::Extract(bestCoeffs[2][1], block); + int bv = ParallelMath::Extract(bestCoeffs[2][2], block); + + int go1 = go >> 6; + int go2 = go & 63; + + int bo1 = bo >> 5; + int bo2 = (bo >> 3) & 3; + int bo3 = bo & 7; + + int rh1 = (rh >> 1); + int rh2 = rh & 1; + + int fakeR = ro >> 2; + int fakeDR = go1 | ((ro & 3) << 1); + + int fakeG = (go2 >> 2); + int fakeDG = ((go2 & 3) << 1) | bo1; + + int fakeB = bo2; + int fakeDB = bo3 >> 1; + + uint32_t highBits = 0; + uint32_t lowBits = 0; + + // Avoid overflowing R + if ((fakeDR & 4) != 0 && fakeR + fakeDR < 8) + highBits |= 1 << (63 - 32); + + // Avoid overflowing G + if ((fakeDG & 4) != 0 && fakeG + fakeDG < 8) + highBits |= 1 << (55 - 32); + + // Overflow B + if (fakeB + fakeDB < 4) + { + // Overflow low + highBits |= 1 << (42 - 32); + } + else + { + // Overflow high + highBits |= 7 << (45 - 32); + } + + highBits |= ro << (57 - 32); + highBits |= go1 << (56 - 32); + highBits |= go2 << (49 - 32); + highBits |= bo1 << (48 - 32); + highBits |= bo2 << (43 - 32); + highBits |= bo3 << (39 - 32); + highBits |= rh1 << (34 - 32); + highBits |= 1 << (33 - 32); + highBits |= rh2 << (32 - 32); + + lowBits |= gh << 25; + lowBits |= bh << 19; + lowBits |= rv << 13; + lowBits |= gv << 6; + lowBits |= bv << 0; + + for (int i = 0; i < 4; i++) + outputBuffer[block * 8 + i] = (highBits >> (24 - i * 8)) & 0xff; + for (int i = 0; i < 4; i++) + outputBuffer[block * 8 + i + 4] = (lowBits >> (24 - i * 8)) & 0xff; + } + } +} + +void cvtt::Internal::ETCComputer::CompressETC2Block(uint8_t *outputBuffer, const PixelBlockU8 *pixelBlocks, ETC2CompressionData *compressionData, const Options &options, bool punchthroughAlpha) +{ + ParallelMath::Int16CompFlag pixelIsTransparent[16]; + ParallelMath::Int16CompFlag anyTransparent = ParallelMath::MakeBoolInt16(false); + ParallelMath::Int16CompFlag allTransparent = ParallelMath::MakeBoolInt16(true); + + if (punchthroughAlpha) + { + const float fThreshold = std::max<float>(std::min<float>(1.0f, options.threshold), 0.0f) * 255.0f; + + // +1.0f is intentional, we want to take the next valid integer (even if it's 256) since everything else lower is transparent + MUInt15 threshold = ParallelMath::MakeUInt15(static_cast<uint16_t>(std::floor(fThreshold + 1.0f))); + + for (int px = 0; px < 16; px++) + { + MUInt15 alpha; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + ParallelMath::PutUInt15(alpha, block, pixelBlocks[block].m_pixels[px][3]); + + ParallelMath::Int16CompFlag isTransparent = ParallelMath::Less(alpha, threshold); + anyTransparent = (anyTransparent | isTransparent); + allTransparent = (allTransparent & isTransparent); + pixelIsTransparent[px] = isTransparent; + } + } + else + { + for (int px = 0; px < 16; px++) + pixelIsTransparent[px] = ParallelMath::MakeBoolInt16(false); + + allTransparent = anyTransparent = ParallelMath::MakeBoolInt16(false); + } + + MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); + + ETC2CompressionDataInternal* internalData = static_cast<ETC2CompressionDataInternal*>(compressionData); + + MUInt15 pixels[16][3]; + MFloat preWeightedPixels[16][3]; + ExtractBlocks(pixels, preWeightedPixels, pixelBlocks, options); + + if (ParallelMath::AnySet(anyTransparent)) + { + for (int px = 0; px < 16; px++) + { + ParallelMath::Int16CompFlag flag = pixelIsTransparent[px]; + ParallelMath::FloatCompFlag fflag = ParallelMath::Int16FlagToFloat(flag); + + for (int ch = 0; ch < 3; ch++) + { + ParallelMath::ConditionalSet(pixels[px][ch], flag, ParallelMath::MakeUInt15(0)); + ParallelMath::ConditionalSet(preWeightedPixels[px][ch], fflag, ParallelMath::MakeFloat(0.0f)); + } + } + } + + if (!ParallelMath::AllSet(allTransparent)) + EncodePlanar(outputBuffer, bestError, pixels, preWeightedPixels, options); + + MFloat chromaDelta[16][2]; + + MUInt15 numOpaque = ParallelMath::MakeUInt15(16); + for (int px = 0; px < 16; px++) + numOpaque = numOpaque - ParallelMath::SelectOrZero(pixelIsTransparent[px], ParallelMath::MakeUInt15(1)); + + if (options.flags & cvtt::Flags::Uniform) + { + MSInt16 chromaCoordinates3[16][2]; + for (int px = 0; px < 16; px++) + { + chromaCoordinates3[px][0] = ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][0]) - ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][2]); + chromaCoordinates3[px][1] = ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][0]) - ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][1] << 1) + ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px][2]); + } + + MSInt16 chromaCoordinateCentroid[2] = { ParallelMath::MakeSInt16(0), ParallelMath::MakeSInt16(0) }; + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 2; ch++) + chromaCoordinateCentroid[ch] = chromaCoordinateCentroid[ch] + chromaCoordinates3[px][ch]; + } + + if (punchthroughAlpha) + { + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 2; ch++) + { + MUInt15 chromaCoordinateMultiplied = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(chromaCoordinates3[px][ch], numOpaque)); + MSInt16 delta = ParallelMath::LosslessCast<MSInt16>::Cast(chromaCoordinateMultiplied) - chromaCoordinateCentroid[ch]; + chromaDelta[px][ch] = ParallelMath::ToFloat(delta); + } + } + } + else + { + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 2; ch++) + chromaDelta[px][ch] = ParallelMath::ToFloat((chromaCoordinates3[px][ch] << 4) - chromaCoordinateCentroid[ch]); + } + } + + const MFloat rcpSqrt3 = ParallelMath::MakeFloat(0.57735026918962576450914878050196f); + + for (int px = 0; px < 16; px++) + chromaDelta[px][1] = chromaDelta[px][1] * rcpSqrt3; + } + else + { + const float chromaAxis0[3] = { internalData->m_chromaSideAxis0[0], internalData->m_chromaSideAxis0[1], internalData->m_chromaSideAxis0[2] }; + const float chromaAxis1[3] = { internalData->m_chromaSideAxis1[0], internalData->m_chromaSideAxis1[1], internalData->m_chromaSideAxis1[2] }; + + MFloat chromaCoordinates3[16][2]; + for (int px = 0; px < 16; px++) + { + const MFloat &px0 = preWeightedPixels[px][0]; + const MFloat &px1 = preWeightedPixels[px][1]; + const MFloat &px2 = preWeightedPixels[px][2]; + + chromaCoordinates3[px][0] = px0 * chromaAxis0[0] + px1 * chromaAxis0[1] + px2 * chromaAxis0[2]; + chromaCoordinates3[px][1] = px0 * chromaAxis1[0] + px1 * chromaAxis1[1] + px2 * chromaAxis1[2]; + } + + MFloat chromaCoordinateCentroid[2] = { ParallelMath::MakeFloatZero(), ParallelMath::MakeFloatZero() }; + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 2; ch++) + chromaCoordinateCentroid[ch] = chromaCoordinateCentroid[ch] + chromaCoordinates3[px][ch]; + } + + if (punchthroughAlpha) + { + const MFloat numOpaqueF = ParallelMath::ToFloat(numOpaque); + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 2; ch++) + { + MFloat chromaCoordinateMultiplied = chromaCoordinates3[px][ch] * numOpaqueF; + MFloat delta = chromaCoordinateMultiplied - chromaCoordinateCentroid[ch]; + chromaDelta[px][ch] = delta; + } + } + } + else + { + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 2; ch++) + chromaDelta[px][ch] = chromaCoordinates3[px][ch] * 16.0f - chromaCoordinateCentroid[ch]; + } + } + } + + + MFloat covXX = ParallelMath::MakeFloatZero(); + MFloat covYY = ParallelMath::MakeFloatZero(); + MFloat covXY = ParallelMath::MakeFloatZero(); + + for (int px = 0; px < 16; px++) + { + MFloat nx = chromaDelta[px][0]; + MFloat ny = chromaDelta[px][1]; + + covXX = covXX + nx * nx; + covYY = covYY + ny * ny; + covXY = covXY + nx * ny; + } + + MFloat halfTrace = (covXX + covYY) * 0.5f; + MFloat det = covXX * covYY - covXY * covXY; + + MFloat mm = ParallelMath::Sqrt(ParallelMath::Max(ParallelMath::MakeFloatZero(), halfTrace * halfTrace - det)); + + MFloat ev = halfTrace + mm; + + MFloat dx = (covYY - ev + covXY); + MFloat dy = -(covXX - ev + covXY); + + // If evenly distributed, pick an arbitrary plane + ParallelMath::FloatCompFlag allZero = ParallelMath::Equal(dx, ParallelMath::MakeFloatZero()) & ParallelMath::Equal(dy, ParallelMath::MakeFloatZero()); + ParallelMath::ConditionalSet(dx, allZero, ParallelMath::MakeFloat(1.f)); + + ParallelMath::Int16CompFlag sectorAssignments[16]; + for (int px = 0; px < 16; px++) + sectorAssignments[px] = ParallelMath::FloatFlagToInt16(ParallelMath::Less(chromaDelta[px][0] * dx + chromaDelta[px][1] * dy, ParallelMath::MakeFloatZero())); + + if (!ParallelMath::AllSet(allTransparent)) + { + EncodeTMode(outputBuffer, bestError, sectorAssignments, pixels, preWeightedPixels, options); + + // Flip sector assignments + for (int px = 0; px < 16; px++) + sectorAssignments[px] = ParallelMath::Not(sectorAssignments[px]); + + EncodeTMode(outputBuffer, bestError, sectorAssignments, pixels, preWeightedPixels, options); + + EncodeHMode(outputBuffer, bestError, sectorAssignments, pixels, internalData->m_h, preWeightedPixels, options); + + CompressETC1BlockInternal(bestError, outputBuffer, pixels, preWeightedPixels, internalData->m_drs, options, true); + } + + if (ParallelMath::AnySet(anyTransparent)) + { + if (!ParallelMath::AllSet(allTransparent)) + { + // Flip sector assignments + for (int px = 0; px < 16; px++) + sectorAssignments[px] = ParallelMath::Not(sectorAssignments[px]); + } + + // Reset the error of any transparent blocks to max and retry with punchthrough modes + ParallelMath::ConditionalSet(bestError, ParallelMath::Int16FlagToFloat(anyTransparent), ParallelMath::MakeFloat(FLT_MAX)); + + EncodeVirtualTModePunchthrough(outputBuffer, bestError, sectorAssignments, pixels, preWeightedPixels, pixelIsTransparent, anyTransparent, allTransparent, options); + + // Flip sector assignments + for (int px = 0; px < 16; px++) + sectorAssignments[px] = ParallelMath::Not(sectorAssignments[px]); + + EncodeVirtualTModePunchthrough(outputBuffer, bestError, sectorAssignments, pixels, preWeightedPixels, pixelIsTransparent, anyTransparent, allTransparent, options); + + CompressETC1PunchthroughBlockInternal(bestError, outputBuffer, pixels, preWeightedPixels, pixelIsTransparent, static_cast<ETC2CompressionDataInternal*>(compressionData)->m_drs, options); + } +} + +void cvtt::Internal::ETCComputer::CompressETC2AlphaBlock(uint8_t *outputBuffer, const PixelBlockU8 *pixelBlocks, const Options &options) +{ + MUInt15 pixels[16]; + + for (int px = 0; px < 16; px++) + { + for (int block = 0; block < ParallelMath::ParallelSize; block++) + ParallelMath::PutUInt15(pixels[px], block, pixelBlocks[block].m_pixels[px][3]); + } + + CompressETC2AlphaBlockInternal(outputBuffer, pixels, false, false, options); +} + +void cvtt::Internal::ETCComputer::CompressETC2AlphaBlockInternal(uint8_t *outputBuffer, const MUInt15 pixels[16], bool is11Bit, bool isSigned, const Options &options) +{ + MUInt15 minAlpha = ParallelMath::MakeUInt15(is11Bit ? 2047 : 255); + MUInt15 maxAlpha = ParallelMath::MakeUInt15(0); + + for (int px = 0; px < 16; px++) + { + minAlpha = ParallelMath::Min(minAlpha, pixels[px]); + maxAlpha = ParallelMath::Max(maxAlpha, pixels[px]); + } + + MUInt15 alphaSpan = maxAlpha - minAlpha; + MUInt15 alphaSpanMidpointTimes2 = maxAlpha + minAlpha; + + MUInt31 bestTotalError = ParallelMath::MakeUInt31(0x7fffffff); + MUInt15 bestTableIndex = ParallelMath::MakeUInt15(0); + MUInt15 bestBaseCodeword = ParallelMath::MakeUInt15(0); + MUInt15 bestMultiplier = ParallelMath::MakeUInt15(0); + MUInt15 bestIndexes[16]; + + for (int px = 0; px < 16; px++) + bestIndexes[px] = ParallelMath::MakeUInt15(0); + + const int numAlphaRanges = 10; + for (uint16_t tableIndex = 0; tableIndex < 16; tableIndex++) + { + for (int r = 0; r < numAlphaRanges; r++) + { + int subrange = r % 3; + int mainRange = r / 3; + + int16_t maxOffset = Tables::ETC2::g_alphaModifierTablePositive[tableIndex][3 - mainRange - (subrange & 1)]; + int16_t minOffset = -Tables::ETC2::g_alphaModifierTablePositive[tableIndex][3 - mainRange - ((subrange >> 1) & 1)] - 1; + uint16_t offsetSpan = static_cast<uint16_t>(maxOffset - minOffset); + + MSInt16 vminOffset = ParallelMath::MakeSInt16(minOffset); + MUInt15 vmaxOffset = ParallelMath::MakeUInt15(maxOffset); + MUInt15 voffsetSpan = ParallelMath::MakeUInt15(offsetSpan); + + MUInt15 minMultiplier = ParallelMath::MakeUInt15(0); + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint16_t singleAlphaSpan = ParallelMath::Extract(alphaSpan, block); + + uint16_t lowMultiplier = singleAlphaSpan / offsetSpan; + ParallelMath::PutUInt15(minMultiplier, block, lowMultiplier); + } + + if (is11Bit) + { + // Clamps this to valid multipliers under 15 and rounds down to nearest multiple of 8 + minMultiplier = ParallelMath::Min(minMultiplier, ParallelMath::MakeUInt15(112)) & ParallelMath::MakeUInt15(120); + } + else + { + // We cap at 1 and 14 so both multipliers are valid and dividable + // Cases where offset span is 0 should be caught by multiplier 1 of table 13 + minMultiplier = ParallelMath::Max(ParallelMath::Min(minMultiplier, ParallelMath::MakeUInt15(14)), ParallelMath::MakeUInt15(1)); + } + + for (uint16_t multiplierOffset = 0; multiplierOffset < 2; multiplierOffset++) + { + MUInt15 multiplier = minMultiplier; + + if (is11Bit) + { + if (multiplierOffset == 1) + multiplier = multiplier + ParallelMath::MakeUInt15(8); + else + multiplier = ParallelMath::Max(multiplier, ParallelMath::MakeUInt15(1)); + } + else + { + if (multiplierOffset == 1) + multiplier = multiplier + ParallelMath::MakeUInt15(1); + } + + MSInt16 multipliedMinOffset = ParallelMath::CompactMultiply(ParallelMath::LosslessCast<MSInt16>::Cast(multiplier), vminOffset); + MUInt15 multipliedMaxOffset = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(multiplier, vmaxOffset)); + + // codeword = (maxOffset + minOffset + minAlpha + maxAlpha) / 2 + MSInt16 unclampedBaseAlphaTimes2 = ParallelMath::LosslessCast<MSInt16>::Cast(alphaSpanMidpointTimes2) - ParallelMath::LosslessCast<MSInt16>::Cast(multipliedMaxOffset) - multipliedMinOffset; + + MUInt15 baseAlpha; + if (is11Bit) + { + // In unsigned, 4 is added to the unquantized alpha, so compensating for that cancels the 4 we have to add to do rounding. + if (isSigned) + unclampedBaseAlphaTimes2 = unclampedBaseAlphaTimes2 + ParallelMath::MakeSInt16(8); + + // -128 is illegal for some reason + MSInt16 minBaseAlphaTimes2 = isSigned ? ParallelMath::MakeSInt16(16) : ParallelMath::MakeSInt16(0); + + MUInt15 clampedBaseAlphaTimes2 = ParallelMath::Min(ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(unclampedBaseAlphaTimes2, minBaseAlphaTimes2)), ParallelMath::MakeUInt15(4095)); + baseAlpha = ParallelMath::RightShift(clampedBaseAlphaTimes2, 1) & ParallelMath::MakeUInt15(2040); + + if (!isSigned) + baseAlpha = baseAlpha + ParallelMath::MakeUInt15(4); + } + else + { + MUInt15 clampedBaseAlphaTimes2 = ParallelMath::Min(ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(unclampedBaseAlphaTimes2, ParallelMath::MakeSInt16(0))), ParallelMath::MakeUInt15(510)); + baseAlpha = ParallelMath::RightShift(clampedBaseAlphaTimes2 + ParallelMath::MakeUInt15(1), 1); + } + + MUInt15 indexes[16]; + MUInt31 totalError = ParallelMath::MakeUInt31(0); + for (int px = 0; px < 16; px++) + { + MUInt15 quantizedValues; + QuantizeETC2Alpha(tableIndex, pixels[px], baseAlpha, multiplier, is11Bit, isSigned, indexes[px], quantizedValues); + + if (is11Bit) + { + MSInt16 delta = ParallelMath::LosslessCast<MSInt16>::Cast(quantizedValues) - ParallelMath::LosslessCast<MSInt16>::Cast(pixels[px]); + MSInt32 deltaSq = ParallelMath::XMultiply(delta, delta); + totalError = totalError + ParallelMath::LosslessCast<MUInt31>::Cast(deltaSq); + } + else + totalError = totalError + ParallelMath::ToUInt31(ParallelMath::SqDiffUInt8(quantizedValues, pixels[px])); + } + + ParallelMath::Int16CompFlag isBetter = ParallelMath::Int32FlagToInt16(ParallelMath::Less(totalError, bestTotalError)); + if (ParallelMath::AnySet(isBetter)) + { + ParallelMath::ConditionalSet(bestTotalError, isBetter, totalError); + ParallelMath::ConditionalSet(bestTableIndex, isBetter, ParallelMath::MakeUInt15(tableIndex)); + ParallelMath::ConditionalSet(bestBaseCodeword, isBetter, baseAlpha); + ParallelMath::ConditionalSet(bestMultiplier, isBetter, multiplier); + + for (int px = 0; px < 16; px++) + ParallelMath::ConditionalSet(bestIndexes[px], isBetter, indexes[px]); + } + + // TODO: Do one refine pass + } + } + } + + if (is11Bit) + { + bestMultiplier = ParallelMath::RightShift(bestMultiplier, 3); + + if (isSigned) + bestBaseCodeword = bestBaseCodeword ^ ParallelMath::MakeUInt15(0x80); + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint8_t *output = outputBuffer + block * 8; + + output[0] = static_cast<uint8_t>(ParallelMath::Extract(bestBaseCodeword, block)); + + ParallelMath::ScalarUInt16 multiplier = ParallelMath::Extract(bestMultiplier, block); + ParallelMath::ScalarUInt16 tableIndex = ParallelMath::Extract(bestTableIndex, block); + + output[1] = static_cast<uint8_t>((multiplier << 4) | tableIndex); + + static const int pixelSelectorOrder[16] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; + + ParallelMath::ScalarUInt16 indexes[16]; + for (int px = 0; px < 16; px++) + indexes[pixelSelectorOrder[px]] = ParallelMath::Extract(bestIndexes[px], block); + + int outputOffset = 2; + int outputBits = 0; + int numOutputBits = 0; + for (int s = 0; s < 16; s++) + { + outputBits = (outputBits << 3) | indexes[s]; + numOutputBits += 3; + + if (numOutputBits >= 8) + { + output[outputOffset++] = static_cast<uint8_t>(outputBits >> (numOutputBits - 8)); + numOutputBits -= 8; + + outputBits &= ((1 << numOutputBits) - 1); + } + } + + assert(outputOffset == 8 && numOutputBits == 0); + } +} + +void cvtt::Internal::ETCComputer::CompressEACBlock(uint8_t *outputBuffer, const PixelBlockScalarS16 *inputBlocks, bool isSigned, const Options &options) +{ + MUInt15 pixels[16]; + for (int px = 0; px < 16; px++) + { + MSInt16 adjustedPixel; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + ParallelMath::PutSInt16(adjustedPixel, block, inputBlocks[block].m_pixels[px]); + + // We use a slightly shifted range here so we can keep the unquantized base color in a UInt15 + // That is, signed range is 1..2047, and unsigned range is 0..2047 + if (isSigned) + { + adjustedPixel = ParallelMath::Min(adjustedPixel, ParallelMath::MakeSInt16(1023)) + ParallelMath::MakeSInt16(1024); + adjustedPixel = ParallelMath::Max(ParallelMath::MakeSInt16(1), adjustedPixel); + } + else + { + adjustedPixel = ParallelMath::Min(adjustedPixel, ParallelMath::MakeSInt16(2047)); + adjustedPixel = ParallelMath::Max(ParallelMath::MakeSInt16(0), adjustedPixel); + } + + + pixels[px] = ParallelMath::LosslessCast<MUInt15>::Cast(adjustedPixel); + } + + CompressETC2AlphaBlockInternal(outputBuffer, pixels, true, isSigned, options); +} + +void cvtt::Internal::ETCComputer::CompressETC1Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC1CompressionData *compressionData, const Options &options) +{ + DifferentialResolveStorage &drs = static_cast<ETC1CompressionDataInternal*>(compressionData)->m_drs; + MFloat bestTotalError = ParallelMath::MakeFloat(FLT_MAX); + + MUInt15 pixels[16][3]; + MFloat preWeightedPixels[16][3]; + ExtractBlocks(pixels, preWeightedPixels, inputBlocks, options); + + CompressETC1BlockInternal(bestTotalError, outputBuffer, pixels, preWeightedPixels, drs, options, false); +} + +void cvtt::Internal::ETCComputer::ExtractBlocks(MUInt15 pixels[16][3], MFloat preWeightedPixels[16][3], const PixelBlockU8 *inputBlocks, const Options &options) +{ + bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0); + bool isUniform = ((options.flags & cvtt::Flags::Uniform) != 0); + + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 3; ch++) + { + for (int block = 0; block < ParallelMath::ParallelSize; block++) + ParallelMath::PutUInt15(pixels[px][ch], block, inputBlocks[block].m_pixels[px][ch]); + } + + if (isFakeBT709) + ConvertToFakeBT709(preWeightedPixels[px], pixels[px]); + else if (isUniform) + { + for (int ch = 0; ch < 3; ch++) + preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]); + } + else + { + preWeightedPixels[px][0] = ParallelMath::ToFloat(pixels[px][0]) * options.redWeight; + preWeightedPixels[px][1] = ParallelMath::ToFloat(pixels[px][1]) * options.greenWeight; + preWeightedPixels[px][2] = ParallelMath::ToFloat(pixels[px][2]) * options.blueWeight; + } + } +} + +void cvtt::Internal::ETCComputer::ResolveHalfBlockFakeBT709RoundingAccurate(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential) +{ + for (int ch = 0; ch < 3; ch++) + { + const MUInt15& cu15 = sectorCumulative[ch]; + + if (isDifferential) + { + //quantized[ch] = (cu * 31 + (cu >> 3)) >> 11; + quantized[ch] = ParallelMath::ToUInt15( + ParallelMath::RightShift( + (ParallelMath::LosslessCast<MUInt16>::Cast(cu15) << 5) - ParallelMath::LosslessCast<MUInt16>::Cast(cu15) + ParallelMath::LosslessCast<MUInt16>::Cast(ParallelMath::RightShift(cu15, 3)) + , 11) + ); + } + else + { + //quantized[ch] = (cu * 30 + (cu >> 3)) >> 12; + quantized[ch] = ParallelMath::ToUInt15( + ParallelMath::RightShift( + (ParallelMath::LosslessCast<MUInt16>::Cast(cu15) << 5) - ParallelMath::LosslessCast<MUInt16>::Cast(cu15 << 1) + ParallelMath::LosslessCast<MUInt16>::Cast(ParallelMath::RightShift(cu15, 3)) + , 12) + ); + } + } + + MFloat lowOctantRGBFloat[3]; + MFloat highOctantRGBFloat[3]; + + for (int ch = 0; ch < 3; ch++) + { + MUInt15 unquantized; + MUInt15 unquantizedNext; + if (isDifferential) + { + unquantized = (quantized[ch] << 3) | ParallelMath::RightShift(quantized[ch], 2); + MUInt15 quantizedNext = ParallelMath::Min(ParallelMath::MakeUInt15(31), quantized[ch] + ParallelMath::MakeUInt15(1)); + unquantizedNext = (quantizedNext << 3) | ParallelMath::RightShift(quantizedNext, 2); + } + else + { + unquantized = (quantized[ch] << 4) | quantized[ch]; + unquantizedNext = ParallelMath::Min(ParallelMath::MakeUInt15(255), unquantized + ParallelMath::MakeUInt15(17)); + } + lowOctantRGBFloat[ch] = ParallelMath::ToFloat(unquantized << 3); + highOctantRGBFloat[ch] = ParallelMath::ToFloat(unquantizedNext << 3); + } + + MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); + MUInt15 bestOctant = ParallelMath::MakeUInt15(0); + + MFloat cumulativeYUV[3]; + ConvertToFakeBT709(cumulativeYUV, sectorCumulative); + + for (uint16_t octant = 0; octant < 8; octant++) + { + const MFloat &r = (octant & 1) ? highOctantRGBFloat[0] : lowOctantRGBFloat[0]; + const MFloat &g = (octant & 2) ? highOctantRGBFloat[1] : lowOctantRGBFloat[1]; + const MFloat &b = (octant & 4) ? highOctantRGBFloat[2] : lowOctantRGBFloat[2]; + + MFloat octantYUV[3]; + ConvertToFakeBT709(octantYUV, r, g, b); + + MFloat delta[3]; + for (int ch = 0; ch < 3; ch++) + delta[ch] = octantYUV[ch] - cumulativeYUV[ch]; + + MFloat error = delta[0] * delta[0] + delta[1] + delta[1] + delta[2] * delta[2]; + ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, bestError)); + ParallelMath::ConditionalSet(bestOctant, errorBetter, ParallelMath::MakeUInt15(octant)); + bestError = ParallelMath::Min(error, bestError); + } + + for (int ch = 0; ch < 3; ch++) + quantized[ch] = quantized[ch] + (ParallelMath::RightShift(bestOctant, ch) & ParallelMath::MakeUInt15(1)); +} + +void cvtt::Internal::ETCComputer::ResolveHalfBlockFakeBT709RoundingFast(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential) +{ + // sectorCumulative range is 0..2040 (11 bits) + MUInt15 roundingOffset = ParallelMath::MakeUInt15(0); + + MUInt15 rOffset; + MUInt15 gOffset; + MUInt15 bOffset; + MUInt15 quantizedBase[3]; + MUInt15 upperBound; + + MUInt15 sectorCumulativeFillIn[3]; + for (int ch = 0; ch < 3; ch++) + sectorCumulativeFillIn[ch] = sectorCumulative[ch] + ParallelMath::RightShift(sectorCumulative[ch], 8); + + if (isDifferential) + { + rOffset = (sectorCumulativeFillIn[0] << 6) & ParallelMath::MakeUInt15(0xf00); + gOffset = (sectorCumulativeFillIn[1] << 4) & ParallelMath::MakeUInt15(0x0f0); + bOffset = ParallelMath::RightShift(sectorCumulativeFillIn[2], 2) & ParallelMath::MakeUInt15(0x00f); + + for (int ch = 0; ch < 3; ch++) + quantizedBase[ch] = ParallelMath::RightShift(sectorCumulativeFillIn[ch], 6); + + upperBound = ParallelMath::MakeUInt15(31); + } + else + { + rOffset = (sectorCumulativeFillIn[0] << 5) & ParallelMath::MakeUInt15(0xf00); + gOffset = (sectorCumulativeFillIn[1] << 1) & ParallelMath::MakeUInt15(0x0f0); + bOffset = ParallelMath::RightShift(sectorCumulativeFillIn[2], 3) & ParallelMath::MakeUInt15(0x00f); + + for (int ch = 0; ch < 3; ch++) + quantizedBase[ch] = ParallelMath::RightShift(sectorCumulativeFillIn[ch], 7); + + upperBound = ParallelMath::MakeUInt15(15); + } + + MUInt15 lookupIndex = (rOffset | gOffset | bOffset); + + MUInt15 octant; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + ParallelMath::PutUInt15(octant, block, Tables::FakeBT709::g_rounding16[ParallelMath::Extract(lookupIndex, block)]); + + quantizedBase[0] = quantizedBase[0] + (octant & ParallelMath::MakeUInt15(1)); + quantizedBase[1] = quantizedBase[1] + (ParallelMath::RightShift(octant, 1) & ParallelMath::MakeUInt15(1)); + quantizedBase[2] = quantizedBase[2] + (ParallelMath::RightShift(octant, 2) & ParallelMath::MakeUInt15(1)); + + for (int ch = 0; ch < 3; ch++) + quantized[ch] = ParallelMath::Min(quantizedBase[ch], upperBound); +} + +void cvtt::Internal::ETCComputer::ResolveTHFakeBT709Rounding(MUInt15 quantized[3], const MUInt15 targets[3], const MUInt15 &granularity) +{ + MFloat lowOctantRGBFloat[3]; + MFloat highOctantRGBFloat[3]; + + for (int ch = 0; ch < 3; ch++) + { + MUInt15 unquantized = (quantized[ch] << 4) | quantized[ch]; + MUInt15 unquantizedNext = ParallelMath::Min(ParallelMath::MakeUInt15(255), unquantized + ParallelMath::MakeUInt15(17)); + + lowOctantRGBFloat[ch] = ParallelMath::ToFloat(ParallelMath::CompactMultiply(unquantized, granularity) << 1); + highOctantRGBFloat[ch] = ParallelMath::ToFloat(ParallelMath::CompactMultiply(unquantizedNext, granularity) << 1); + } + + MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); + MUInt15 bestOctant = ParallelMath::MakeUInt15(0); + + MFloat cumulativeYUV[3]; + ConvertToFakeBT709(cumulativeYUV, ParallelMath::ToFloat(targets[0]), ParallelMath::ToFloat(targets[1]), ParallelMath::ToFloat(targets[2])); + + for (uint16_t octant = 0; octant < 8; octant++) + { + const MFloat &r = (octant & 1) ? highOctantRGBFloat[0] : lowOctantRGBFloat[0]; + const MFloat &g = (octant & 2) ? highOctantRGBFloat[1] : lowOctantRGBFloat[1]; + const MFloat &b = (octant & 4) ? highOctantRGBFloat[2] : lowOctantRGBFloat[2]; + + MFloat octantYUV[3]; + ConvertToFakeBT709(octantYUV, r, g, b); + + MFloat delta[3]; + for (int ch = 0; ch < 3; ch++) + delta[ch] = octantYUV[ch] - cumulativeYUV[ch]; + + MFloat error = delta[0] * delta[0] + delta[1] + delta[1] + delta[2] * delta[2]; + ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, bestError)); + ParallelMath::ConditionalSet(bestOctant, errorBetter, ParallelMath::MakeUInt15(octant)); + bestError = ParallelMath::Min(error, bestError); + } + + for (int ch = 0; ch < 3; ch++) + quantized[ch] = quantized[ch] + (ParallelMath::RightShift(bestOctant, ch) & ParallelMath::MakeUInt15(1)); +} + +void cvtt::Internal::ETCComputer::ConvertToFakeBT709(MFloat yuv[3], const MUInt15 color[3]) +{ + MFloat floatRGB[3]; + for (int ch = 0; ch < 3; ch++) + floatRGB[ch] = ParallelMath::ToFloat(color[ch]); + + ConvertToFakeBT709(yuv, floatRGB); +} + +void cvtt::Internal::ETCComputer::ConvertToFakeBT709(MFloat yuv[3], const MFloat color[3]) +{ + ConvertToFakeBT709(yuv, color[0], color[1], color[2]); +} + +void cvtt::Internal::ETCComputer::ConvertToFakeBT709(MFloat yuv[3], const MFloat &pr, const MFloat &pg, const MFloat &pb) +{ + MFloat r = pr; + MFloat g = pg; + MFloat b = pb; + + yuv[0] = r * 0.368233989135369f + g * 1.23876274963149f + b * 0.125054068802017f; + yuv[1] = r * 0.5f - g * 0.4541529f - b * 0.04584709f; + yuv[2] = r * -0.081014709086133f - g * 0.272538676238785f + b * 0.353553390593274f; +} + +void cvtt::Internal::ETCComputer::ConvertFromFakeBT709(MFloat rgb[3], const MFloat yuv[3]) +{ + MFloat yy = yuv[0] * 0.57735026466774571071f; + MFloat u = yuv[1]; + MFloat v = yuv[2]; + + rgb[0] = yy + u * 1.5748000207960953486f; + rgb[1] = yy - u * 0.46812425854364753669f - v * 0.26491652528157560861f; + rgb[2] = yy + v * 2.6242146882856944069f; +} + + +void cvtt::Internal::ETCComputer::QuantizeETC2Alpha(int tableIndex, const MUInt15& value, const MUInt15& baseValue, const MUInt15& multiplier, bool is11Bit, bool isSigned, MUInt15& outIndexes, MUInt15& outQuantizedValues) +{ + MSInt16 offset = ParallelMath::LosslessCast<MSInt16>::Cast(value) - ParallelMath::LosslessCast<MSInt16>::Cast(baseValue); + MSInt16 offsetTimes2 = offset + offset; + + // ETC2's offset tables all have a reflect about 0.5*multiplier + MSInt16 offsetAboutReflectorTimes2 = offsetTimes2 + ParallelMath::LosslessCast<MSInt16>::Cast(multiplier); + + MUInt15 absOffsetAboutReflectorTimes2 = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Abs(offsetAboutReflectorTimes2)); + MUInt15 lookupIndex = ParallelMath::RightShift(absOffsetAboutReflectorTimes2, 1); + + MUInt15 positiveIndex; + MUInt15 positiveOffsetUnmultiplied; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint16_t blockLookupIndex = ParallelMath::Extract(lookupIndex, block) / ParallelMath::Extract(multiplier, block); + if (blockLookupIndex >= Tables::ETC2::g_alphaRoundingTableWidth) + blockLookupIndex = Tables::ETC2::g_alphaRoundingTableWidth - 1; + uint16_t index = Tables::ETC2::g_alphaRoundingTables[tableIndex][blockLookupIndex]; + ParallelMath::PutUInt15(positiveIndex, block, index); + ParallelMath::PutUInt15(positiveOffsetUnmultiplied, block, Tables::ETC2::g_alphaModifierTablePositive[tableIndex][index]); + + // TODO: This is suboptimal when the offset is capped. We should detect 0 and 255 values and always map them to the maximum offsets. + // Doing that will also affect refinement though. + } + + MSInt16 signBits = ParallelMath::RightShift(offsetAboutReflectorTimes2, 15); + MSInt16 offsetUnmultiplied = ParallelMath::LosslessCast<MSInt16>::Cast(positiveOffsetUnmultiplied) ^ signBits; + MSInt16 quantizedOffset = ParallelMath::CompactMultiply(offsetUnmultiplied, multiplier); + + MSInt16 offsetValue = ParallelMath::LosslessCast<MSInt16>::Cast(baseValue) + quantizedOffset; + + if (is11Bit) + { + if (isSigned) + outQuantizedValues = ParallelMath::Min(ParallelMath::MakeUInt15(2047), ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(1), offsetValue))); + else + outQuantizedValues = ParallelMath::Min(ParallelMath::MakeUInt15(2047), ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), offsetValue))); + } + else + outQuantizedValues = ParallelMath::Min(ParallelMath::MakeUInt15(255), ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::Max(ParallelMath::MakeSInt16(0), offsetValue))); + + MUInt15 indexSub = ParallelMath::LosslessCast<MUInt15>::Cast(signBits) & ParallelMath::MakeUInt15(4); + + outIndexes = positiveIndex + ParallelMath::MakeUInt15(4) - indexSub; +} + + +void cvtt::Internal::ETCComputer::EmitTModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 lineColor[3], const ParallelMath::ScalarUInt16 isolatedColor[3], int32_t packedSelectors, ParallelMath::ScalarUInt16 table, bool opaque) +{ + static const int selectorOrder[] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; + + uint32_t lowBits = 0; + uint32_t highBits = 0; + + int rh = ((isolatedColor[0] >> 2) & 3); + int rl = (isolatedColor[0] & 3); + + if (rh + rl < 4) + { + // Overflow low + highBits |= 1 << (58 - 32); + } + else + { + // Overflow high + highBits |= 7 << (61 - 32); + } + + highBits |= rh << (59 - 32); + highBits |= rl << (56 - 32); + highBits |= isolatedColor[1] << (52 - 32); + highBits |= isolatedColor[2] << (48 - 32); + highBits |= lineColor[0] << (44 - 32); + highBits |= lineColor[1] << (40 - 32); + highBits |= lineColor[2] << (36 - 32); + highBits |= ((table >> 1) & 3) << (34 - 32); + if (opaque) + highBits |= 1 << (33 - 32); + highBits |= (table & 1) << (32 - 32); + + for (int px = 0; px < 16; px++) + { + int sel = (packedSelectors >> (2 * selectorOrder[px])) & 3; + if ((sel & 0x1) != 0) + lowBits |= (1 << px); + if ((sel & 0x2) != 0) + lowBits |= (1 << (16 + px)); + } + + for (int i = 0; i < 4; i++) + outputBuffer[i] = (highBits >> (24 - i * 8)) & 0xff; + for (int i = 0; i < 4; i++) + outputBuffer[i + 4] = (lowBits >> (24 - i * 8)) & 0xff; +} + +void cvtt::Internal::ETCComputer::EmitHModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 blockColors[2], ParallelMath::ScalarUInt16 sectorBits, ParallelMath::ScalarUInt16 signBits, ParallelMath::ScalarUInt16 table, bool opaque) +{ + if (blockColors[0] == blockColors[1]) + { + // Base colors are the same. + // If the table low bit isn't 1, then we can't encode this, because swapping the block colors will have no effect + // on their order. + // Instead, we encode this as T mode where all of the indexes are on the line. + + ParallelMath::ScalarUInt16 lineColor[3]; + ParallelMath::ScalarUInt16 isolatedColor[3]; + + lineColor[0] = isolatedColor[0] = (blockColors[0] >> 10) & 0x1f; + lineColor[1] = isolatedColor[1] = (blockColors[0] >> 5) & 0x1f; + lineColor[2] = isolatedColor[2] = (blockColors[0] >> 0) & 0x1f; + + int32_t packedSelectors = 0x55555555; + for (int px = 0; px < 16; px++) + packedSelectors |= ((signBits >> px) & 1) << ((px * 2) + 1); + + EmitTModeBlock(outputBuffer, lineColor, isolatedColor, packedSelectors, table, opaque); + return; + } + + static const int selectorOrder[] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; + + int16_t colors[2][3]; + for (int sector = 0; sector < 2; sector++) + { + for (int ch = 0; ch < 3; ch++) + colors[sector][ch] = (blockColors[sector] >> ((2 - ch) * 5)) & 15; + } + + uint32_t lowBits = 0; + uint32_t highBits = 0; + + if (((table & 1) == 1) != (blockColors[0] > blockColors[1])) + { + for (int ch = 0; ch < 3; ch++) + std::swap(colors[0][ch], colors[1][ch]); + sectorBits ^= 0xffff; + } + + int r1 = colors[0][0]; + int g1a = colors[0][1] >> 1; + int g1b = (colors[0][1] & 1); + int b1a = colors[0][2] >> 3; + int b1b = colors[0][2] & 7; + int r2 = colors[1][0]; + int g2 = colors[1][1]; + int b2 = colors[1][2]; + + // Avoid overflowing R + if ((g1a & 4) != 0 && r1 + g1a < 8) + highBits |= 1 << (63 - 32); + + int fakeDG = b1b >> 1; + int fakeG = b1a | (g1b << 1); + + if (fakeG + fakeDG < 4) + { + // Overflow low + highBits |= 1 << (50 - 32); + } + else + { + // Overflow high + highBits |= 7 << (53 - 32); + } + + int da = (table >> 2) & 1; + int db = (table >> 1) & 1; + + highBits |= r1 << (59 - 32); + highBits |= g1a << (56 - 32); + highBits |= g1b << (52 - 32); + highBits |= b1a << (51 - 32); + highBits |= b1b << (47 - 32); + highBits |= r2 << (43 - 32); + highBits |= g2 << (39 - 32); + highBits |= b2 << (35 - 32); + highBits |= da << (34 - 32); + if (opaque) + highBits |= 1 << (33 - 32); + highBits |= db << (32 - 32); + + for (int px = 0; px < 16; px++) + { + int sectorBit = (sectorBits >> selectorOrder[px]) & 1; + int signBit = (signBits >> selectorOrder[px]) & 1; + + lowBits |= (signBit << px); + lowBits |= (sectorBit << (16 + px)); + } + + uint8_t *output = outputBuffer; + + for (int i = 0; i < 4; i++) + output[i] = (highBits >> (24 - i * 8)) & 0xff; + for (int i = 0; i < 4; i++) + output[i + 4] = (lowBits >> (24 - i * 8)) & 0xff; +} + +void cvtt::Internal::ETCComputer::EmitETC1Block(uint8_t *outputBuffer, int blockBestFlip, int blockBestD, const int blockBestColors[2][3], const int blockBestTables[2], const ParallelMath::ScalarUInt16 blockBestSelectors[2], bool transparent) +{ + uint32_t highBits = 0; + uint32_t lowBits = 0; + + if (blockBestD == 0) + { + highBits |= blockBestColors[0][0] << 28; + highBits |= blockBestColors[1][0] << 24; + highBits |= blockBestColors[0][1] << 20; + highBits |= blockBestColors[1][1] << 16; + highBits |= blockBestColors[0][2] << 12; + highBits |= blockBestColors[1][2] << 8; + } + else + { + highBits |= blockBestColors[0][0] << 27; + highBits |= ((blockBestColors[1][0] - blockBestColors[0][0]) & 7) << 24; + highBits |= blockBestColors[0][1] << 19; + highBits |= ((blockBestColors[1][1] - blockBestColors[0][1]) & 7) << 16; + highBits |= blockBestColors[0][2] << 11; + highBits |= ((blockBestColors[1][2] - blockBestColors[0][2]) & 7) << 8; + } + + highBits |= (blockBestTables[0] << 5); + highBits |= (blockBestTables[1] << 2); + if (!transparent) + highBits |= (blockBestD << 1); + highBits |= blockBestFlip; + + const uint8_t modifierCodes[4] = { 3, 2, 0, 1 }; + + uint8_t unpackedSelectors[16]; + uint8_t unpackedSelectorCodes[16]; + for (int sector = 0; sector < 2; sector++) + { + int blockSectorBestSelectors = blockBestSelectors[sector]; + + for (int px = 0; px < 8; px++) + { + int selector = (blockSectorBestSelectors >> (2 * px)) & 3; + unpackedSelectorCodes[g_flipTables[blockBestFlip][sector][px]] = modifierCodes[selector]; + unpackedSelectors[g_flipTables[blockBestFlip][sector][px]] = selector; + } + } + + const int pixelSelectorOrder[16] = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; + + int lowBitOffset = 0; + for (int sb = 0; sb < 2; sb++) + for (int px = 0; px < 16; px++) + lowBits |= ((unpackedSelectorCodes[pixelSelectorOrder[px]] >> sb) & 1) << (px + sb * 16); + + for (int i = 0; i < 4; i++) + outputBuffer[i] = (highBits >> (24 - i * 8)) & 0xff; + for (int i = 0; i < 4; i++) + outputBuffer[i + 4] = (lowBits >> (24 - i * 8)) & 0xff; +} + +void cvtt::Internal::ETCComputer::CompressETC1BlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], DifferentialResolveStorage &drs, const Options &options, bool punchthrough) +{ + int numTries = 0; + + MUInt15 zeroU15 = ParallelMath::MakeUInt15(0); + MUInt16 zeroU16 = ParallelMath::MakeUInt16(0); + + MUInt15 bestColors[2] = { zeroU15, zeroU15 }; + MUInt16 bestSelectors[2] = { zeroU16, zeroU16 }; + MUInt15 bestTables[2] = { zeroU15, zeroU15 }; + MUInt15 bestFlip = zeroU15; + MUInt15 bestD = zeroU15; + + MUInt15 sectorPixels[2][2][8][3]; + MFloat sectorPreWeightedPixels[2][2][8][3]; + MUInt15 sectorCumulative[2][2][3]; + + ParallelMath::Int16CompFlag bestIsThisMode = ParallelMath::MakeBoolInt16(false); + + for (int flip = 0; flip < 2; flip++) + { + for (int sector = 0; sector < 2; sector++) + { + for (int ch = 0; ch < 3; ch++) + sectorCumulative[flip][sector][ch] = zeroU15; + + for (int px = 0; px < 8; px++) + { + for (int ch = 0; ch < 3; ch++) + { + MUInt15 pixelChannelValue = pixels[g_flipTables[flip][sector][px]][ch]; + sectorPixels[flip][sector][px][ch] = pixelChannelValue; + sectorPreWeightedPixels[flip][sector][px][ch] = preWeightedPixels[g_flipTables[flip][sector][px]][ch]; + sectorCumulative[flip][sector][ch] = sectorCumulative[flip][sector][ch] + pixelChannelValue; + } + } + } + } + + static const MSInt16 modifierTables[8][4] = + { + { ParallelMath::MakeSInt16(-8), ParallelMath::MakeSInt16(-2), ParallelMath::MakeSInt16(2), ParallelMath::MakeSInt16(8) }, + { ParallelMath::MakeSInt16(-17), ParallelMath::MakeSInt16(-5), ParallelMath::MakeSInt16(5), ParallelMath::MakeSInt16(17) }, + { ParallelMath::MakeSInt16(-29), ParallelMath::MakeSInt16(-9), ParallelMath::MakeSInt16(9), ParallelMath::MakeSInt16(29) }, + { ParallelMath::MakeSInt16(-42), ParallelMath::MakeSInt16(-13), ParallelMath::MakeSInt16(13), ParallelMath::MakeSInt16(42) }, + { ParallelMath::MakeSInt16(-60), ParallelMath::MakeSInt16(-18), ParallelMath::MakeSInt16(18), ParallelMath::MakeSInt16(60) }, + { ParallelMath::MakeSInt16(-80), ParallelMath::MakeSInt16(-24), ParallelMath::MakeSInt16(24), ParallelMath::MakeSInt16(80) }, + { ParallelMath::MakeSInt16(-106), ParallelMath::MakeSInt16(-33), ParallelMath::MakeSInt16(33), ParallelMath::MakeSInt16(106) }, + { ParallelMath::MakeSInt16(-183), ParallelMath::MakeSInt16(-47), ParallelMath::MakeSInt16(47), ParallelMath::MakeSInt16(183) }, + }; + + bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0); + + int minD = punchthrough ? 1 : 0; + + for (int flip = 0; flip < 2; flip++) + { + drs.diffNumAttempts[0] = drs.diffNumAttempts[1] = zeroU15; + + MFloat bestIndError[2] = { ParallelMath::MakeFloat(FLT_MAX), ParallelMath::MakeFloat(FLT_MAX) }; + MUInt16 bestIndSelectors[2] = { ParallelMath::MakeUInt16(0), ParallelMath::MakeUInt16(0) }; + MUInt15 bestIndColors[2] = { zeroU15, zeroU15 }; + MUInt15 bestIndTable[2] = { zeroU15, zeroU15 }; + + for (int d = minD; d < 2; d++) + { + for (int sector = 0; sector < 2; sector++) + { + const int16_t *potentialOffsets = cvtt::Tables::ETC1::g_potentialOffsets4; + + for (int table = 0; table < 8; table++) + { + int16_t numOffsets = *potentialOffsets++; + + MUInt15 possibleColors[cvtt::Tables::ETC1::g_maxPotentialOffsets]; + + MUInt15 quantized[3]; + for (int oi = 0; oi < numOffsets; oi++) + { + if (!isFakeBT709) + { + for (int ch = 0; ch < 3; ch++) + { + // cu is in range 0..2040 + MUInt15 cu15 = ParallelMath::Min( + ParallelMath::MakeUInt15(2040), + ParallelMath::ToUInt15( + ParallelMath::Max( + ParallelMath::MakeSInt16(0), + ParallelMath::LosslessCast<MSInt16>::Cast(sectorCumulative[flip][sector][ch]) + ParallelMath::MakeSInt16(potentialOffsets[oi]) + ) + ) + ); + + if (d == 1) + { + //quantized[ch] = (cu * 31 + (cu >> 3) + 1024) >> 11; + quantized[ch] = ParallelMath::ToUInt15( + ParallelMath::RightShift( + (ParallelMath::LosslessCast<MUInt16>::Cast(cu15) << 5) - ParallelMath::LosslessCast<MUInt16>::Cast(cu15) + ParallelMath::LosslessCast<MUInt16>::Cast(ParallelMath::RightShift(cu15, 3)) + ParallelMath::MakeUInt16(1024) + , 11) + ); + } + else + { + //quantized[ch] = (cu * 30 + (cu >> 3) + 2048) >> 12; + quantized[ch] = ParallelMath::ToUInt15( + ParallelMath::RightShift( + (ParallelMath::LosslessCast<MUInt16>::Cast(cu15) << 5) - ParallelMath::LosslessCast<MUInt16>::Cast(cu15 << 1) + ParallelMath::LosslessCast<MUInt16>::Cast(ParallelMath::RightShift(cu15, 3)) + ParallelMath::MakeUInt16(2048) + , 12) + ); + } + } + } + else + { + MUInt15 offsetCumulative[3]; + for (int ch = 0; ch < 3; ch++) + { + // cu is in range 0..2040 + MUInt15 cu15 = ParallelMath::Min( + ParallelMath::MakeUInt15(2040), + ParallelMath::ToUInt15( + ParallelMath::Max( + ParallelMath::MakeSInt16(0), + ParallelMath::LosslessCast<MSInt16>::Cast(sectorCumulative[flip][sector][ch]) + ParallelMath::MakeSInt16(potentialOffsets[oi]) + ) + ) + ); + + offsetCumulative[ch] = cu15; + } + + if ((options.flags & cvtt::Flags::ETC_FakeBT709Accurate) != 0) + ResolveHalfBlockFakeBT709RoundingAccurate(quantized, offsetCumulative, d == 1); + else + ResolveHalfBlockFakeBT709RoundingFast(quantized, offsetCumulative, d == 1); + } + + possibleColors[oi] = quantized[0] | (quantized[1] << 5) | (quantized[2] << 10); + } + + potentialOffsets += numOffsets; + + ParallelMath::UInt15 numUniqueColors; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint16_t blockNumUniqueColors = 1; + for (int i = 1; i < numOffsets; i++) + { + uint16_t color = ParallelMath::Extract(possibleColors[i], block); + if (color != ParallelMath::Extract(possibleColors[blockNumUniqueColors - 1], block)) + ParallelMath::PutUInt15(possibleColors[blockNumUniqueColors++], block, color); + } + + ParallelMath::PutUInt15(numUniqueColors, block, blockNumUniqueColors); + } + + int maxUniqueColors = ParallelMath::Extract(numUniqueColors, 0); + for (int block = 1; block < ParallelMath::ParallelSize; block++) + maxUniqueColors = std::max<int>(maxUniqueColors, ParallelMath::Extract(numUniqueColors, block)); + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint16_t fillColor = ParallelMath::Extract(possibleColors[0], block); + for (int i = ParallelMath::Extract(numUniqueColors, block); i < maxUniqueColors; i++) + ParallelMath::PutUInt15(possibleColors[i], block, fillColor); + } + + for (int i = 0; i < maxUniqueColors; i++) + { + MFloat error = ParallelMath::MakeFloatZero(); + MUInt16 selectors = ParallelMath::MakeUInt16(0); + MUInt15 quantized = possibleColors[i]; + TestHalfBlock(error, selectors, quantized, sectorPixels[flip][sector], sectorPreWeightedPixels[flip][sector], modifierTables[table], d == 1, options); + + if (d == 0) + { + ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(error, bestIndError[sector])); + if (ParallelMath::AnySet(errorBetter)) + { + bestIndError[sector] = ParallelMath::Min(error, bestIndError[sector]); + ParallelMath::ConditionalSet(bestIndSelectors[sector], errorBetter, selectors); + ParallelMath::ConditionalSet(bestIndColors[sector], errorBetter, quantized); + ParallelMath::ConditionalSet(bestIndTable[sector], errorBetter, ParallelMath::MakeUInt15(table)); + } + } + else + { + ParallelMath::Int16CompFlag isInBounds = ParallelMath::Less(ParallelMath::MakeUInt15(i), numUniqueColors); + + MUInt15 storageIndexes = drs.diffNumAttempts[sector]; + drs.diffNumAttempts[sector] = drs.diffNumAttempts[sector] + ParallelMath::SelectOrZero(isInBounds, ParallelMath::MakeUInt15(1)); + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int storageIndex = ParallelMath::Extract(storageIndexes, block); + + ParallelMath::PutFloat(drs.diffErrors[sector][storageIndex], block, ParallelMath::Extract(error, block)); + ParallelMath::PutUInt16(drs.diffSelectors[sector][storageIndex], block, ParallelMath::Extract(selectors, block)); + ParallelMath::PutUInt15(drs.diffColors[sector][storageIndex], block, ParallelMath::Extract(quantized, block)); + ParallelMath::PutUInt15(drs.diffTables[sector][storageIndex], block, table); + } + } + } + } + } + + if (d == 0) + { + MFloat bestIndErrorTotal = bestIndError[0] + bestIndError[1]; + ParallelMath::Int16CompFlag errorBetter = ParallelMath::FloatFlagToInt16(ParallelMath::Less(bestIndErrorTotal, bestTotalError)); + if (ParallelMath::AnySet(errorBetter)) + { + bestIsThisMode = bestIsThisMode | errorBetter; + + bestTotalError = ParallelMath::Min(bestTotalError, bestIndErrorTotal); + ParallelMath::ConditionalSet(bestFlip, errorBetter, ParallelMath::MakeUInt15(flip)); + ParallelMath::ConditionalSet(bestD, errorBetter, ParallelMath::MakeUInt15(d)); + for (int sector = 0; sector < 2; sector++) + { + ParallelMath::ConditionalSet(bestColors[sector], errorBetter, bestIndColors[sector]); + ParallelMath::ConditionalSet(bestSelectors[sector], errorBetter, bestIndSelectors[sector]); + ParallelMath::ConditionalSet(bestTables[sector], errorBetter, bestIndTable[sector]); + } + } + } + else + { + ParallelMath::Int16CompFlag canIgnoreSector[2] = { ParallelMath::MakeBoolInt16(false), ParallelMath::MakeBoolInt16(false) }; + FindBestDifferentialCombination(flip, d, canIgnoreSector, bestIsThisMode, bestTotalError, bestFlip, bestD, bestColors, bestSelectors, bestTables, drs); + } + } + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + if (!ParallelMath::Extract(bestIsThisMode, block)) + continue; + + uint32_t highBits = 0; + uint32_t lowBits = 0; + + int blockBestFlip = ParallelMath::Extract(bestFlip, block); + int blockBestD = ParallelMath::Extract(bestD, block); + int blockBestTables[2] = { ParallelMath::Extract(bestTables[0], block), ParallelMath::Extract(bestTables[1], block) }; + ParallelMath::ScalarUInt16 blockBestSelectors[2] = { ParallelMath::Extract(bestSelectors[0], block), ParallelMath::Extract(bestSelectors[1], block) }; + + int colors[2][3]; + for (int sector = 0; sector < 2; sector++) + { + int sectorColor = ParallelMath::Extract(bestColors[sector], block); + for (int ch = 0; ch < 3; ch++) + colors[sector][ch] = (sectorColor >> (ch * 5)) & 31; + } + + EmitETC1Block(outputBuffer + block * 8, blockBestFlip, blockBestD, colors, blockBestTables, blockBestSelectors, false); + } +} + + +void cvtt::Internal::ETCComputer::CompressETC1PunchthroughBlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], DifferentialResolveStorage &drs, const Options &options) +{ + int numTries = 0; + + MUInt15 zeroU15 = ParallelMath::MakeUInt15(0); + MUInt16 zeroU16 = ParallelMath::MakeUInt16(0); + + MUInt15 bestColors[2] = { zeroU15, zeroU15 }; + MUInt16 bestSelectors[2] = { zeroU16, zeroU16 }; + MUInt15 bestTables[2] = { zeroU15, zeroU15 }; + MUInt15 bestFlip = zeroU15; + + MUInt15 sectorPixels[2][2][8][3]; + ParallelMath::Int16CompFlag sectorTransparent[2][2][8]; + MFloat sectorPreWeightedPixels[2][2][8][3]; + MUInt15 sectorCumulative[2][2][3]; + + ParallelMath::Int16CompFlag bestIsThisMode = ParallelMath::MakeBoolInt16(false); + + for (int flip = 0; flip < 2; flip++) + { + for (int sector = 0; sector < 2; sector++) + { + for (int ch = 0; ch < 3; ch++) + sectorCumulative[flip][sector][ch] = zeroU15; + + for (int px = 0; px < 8; px++) + { + for (int ch = 0; ch < 3; ch++) + { + MUInt15 pixelChannelValue = pixels[g_flipTables[flip][sector][px]][ch]; + sectorPixels[flip][sector][px][ch] = pixelChannelValue; + sectorPreWeightedPixels[flip][sector][px][ch] = preWeightedPixels[g_flipTables[flip][sector][px]][ch]; + sectorCumulative[flip][sector][ch] = sectorCumulative[flip][sector][ch] + pixelChannelValue; + } + + sectorTransparent[flip][sector][px] = isTransparent[g_flipTables[flip][sector][px]]; + } + } + } + + static const MUInt15 modifiers[8] = + { + ParallelMath::MakeUInt15(8), + ParallelMath::MakeUInt15(17), + ParallelMath::MakeUInt15(29), + ParallelMath::MakeUInt15(42), + ParallelMath::MakeUInt15(60), + ParallelMath::MakeUInt15(80), + ParallelMath::MakeUInt15(106), + ParallelMath::MakeUInt15(183), + }; + + bool isFakeBT709 = ((options.flags & cvtt::Flags::ETC_UseFakeBT709) != 0); + + const int maxSectorCumulativeOffsets = 17; + + for (int flip = 0; flip < 2; flip++) + { + ParallelMath::Int16CompFlag canIgnoreSector[2] = { ParallelMath::MakeBoolInt16(true), ParallelMath::MakeBoolInt16(false) }; + + for (int sector = 0; sector < 2; sector++) + for (int px = 0; px < 8; px++) + canIgnoreSector[sector] = canIgnoreSector[sector] & sectorTransparent[flip][sector][px]; + + drs.diffNumAttempts[0] = drs.diffNumAttempts[1] = zeroU15; + + for (int sector = 0; sector < 2; sector++) + { + MUInt15 sectorNumOpaque = ParallelMath::MakeUInt15(0); + for (int px = 0; px < 8; px++) + sectorNumOpaque = sectorNumOpaque + ParallelMath::SelectOrZero(sectorTransparent[flip][sector][px], ParallelMath::MakeUInt15(1)); + + int sectorMaxOpaque = 0; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + sectorMaxOpaque = std::max<int>(sectorMaxOpaque, ParallelMath::Extract(sectorNumOpaque, block)); + + int sectorNumOpaqueMultipliers = sectorMaxOpaque * 2 + 1; + + MUInt15 sectorNumOpaqueDenominator = ParallelMath::Max(ParallelMath::MakeUInt15(1), sectorNumOpaque) << 8; + MUInt15 sectorNumOpaqueAddend = sectorNumOpaque << 7; + + MSInt16 sectorNumOpaqueSigned = ParallelMath::LosslessCast<MSInt16>::Cast(sectorNumOpaque); + MSInt16 negSectorNumOpaqueSigned = ParallelMath::MakeSInt16(0) - sectorNumOpaqueSigned; + + MUInt15 sectorCumulativeMax = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(ParallelMath::MakeUInt15(255), sectorNumOpaque)); + + for (int table = 0; table < 8; table++) + { + MUInt15 possibleColors[maxSectorCumulativeOffsets]; + + MUInt15 quantized[3]; + for (int om = -sectorMaxOpaque; om <= sectorMaxOpaque; om++) + { + MSInt16 clampedOffsetMult = ParallelMath::Max(ParallelMath::Min(ParallelMath::MakeSInt16(om), sectorNumOpaqueSigned), negSectorNumOpaqueSigned); + MSInt16 offset = ParallelMath::CompactMultiply(clampedOffsetMult, modifiers[table]); + + for (int ch = 0; ch < 3; ch++) + { + // cu is in range 0..255*numOpaque (at most 0..2040) + MUInt15 cu15 = ParallelMath::Min( + sectorCumulativeMax, + ParallelMath::ToUInt15( + ParallelMath::Max( + ParallelMath::MakeSInt16(0), + ParallelMath::LosslessCast<MSInt16>::Cast(sectorCumulative[flip][sector][ch]) + offset + ) + ) + ); + + //quantized[ch] = (cu * 31 + (cu >> 3) + (numOpaque * 128)) / (numOpaque * 256) + MUInt16 cuTimes31 = (ParallelMath::LosslessCast<MUInt16>::Cast(cu15) << 5) - ParallelMath::LosslessCast<MUInt16>::Cast(cu15); + MUInt15 cuDiv8 = ParallelMath::RightShift(cu15, 3); + MUInt16 numerator = cuTimes31 + ParallelMath::LosslessCast<MUInt16>::Cast(cuDiv8 + sectorNumOpaqueAddend); + for (int block = 0; block < ParallelMath::ParallelSize; block++) + ParallelMath::PutUInt15(quantized[ch], block, ParallelMath::Extract(numerator, block) / ParallelMath::Extract(sectorNumOpaqueDenominator, block)); + } + + possibleColors[om + sectorMaxOpaque] = quantized[0] | (quantized[1] << 5) | (quantized[2] << 10); + } + + ParallelMath::UInt15 numUniqueColors; + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint16_t blockNumUniqueColors = 1; + for (int i = 1; i < sectorNumOpaqueMultipliers; i++) + { + uint16_t color = ParallelMath::Extract(possibleColors[i], block); + if (color != ParallelMath::Extract(possibleColors[blockNumUniqueColors - 1], block)) + ParallelMath::PutUInt15(possibleColors[blockNumUniqueColors++], block, color); + } + + ParallelMath::PutUInt15(numUniqueColors, block, blockNumUniqueColors); + } + + int maxUniqueColors = ParallelMath::Extract(numUniqueColors, 0); + for (int block = 1; block < ParallelMath::ParallelSize; block++) + maxUniqueColors = std::max<int>(maxUniqueColors, ParallelMath::Extract(numUniqueColors, block)); + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + uint16_t fillColor = ParallelMath::Extract(possibleColors[0], block); + for (int i = ParallelMath::Extract(numUniqueColors, block); i < maxUniqueColors; i++) + ParallelMath::PutUInt15(possibleColors[i], block, fillColor); + } + + for (int i = 0; i < maxUniqueColors; i++) + { + MFloat error = ParallelMath::MakeFloatZero(); + MUInt16 selectors = ParallelMath::MakeUInt16(0); + MUInt15 quantized = possibleColors[i]; + TestHalfBlockPunchthrough(error, selectors, quantized, sectorPixels[flip][sector], sectorPreWeightedPixels[flip][sector], sectorTransparent[flip][sector], modifiers[table], options); + + ParallelMath::Int16CompFlag isInBounds = ParallelMath::Less(ParallelMath::MakeUInt15(i), numUniqueColors); + + MUInt15 storageIndexes = drs.diffNumAttempts[sector]; + drs.diffNumAttempts[sector] = drs.diffNumAttempts[sector] + ParallelMath::SelectOrZero(isInBounds, ParallelMath::MakeUInt15(1)); + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int storageIndex = ParallelMath::Extract(storageIndexes, block); + + ParallelMath::PutFloat(drs.diffErrors[sector][storageIndex], block, ParallelMath::Extract(error, block)); + ParallelMath::PutUInt16(drs.diffSelectors[sector][storageIndex], block, ParallelMath::Extract(selectors, block)); + ParallelMath::PutUInt15(drs.diffColors[sector][storageIndex], block, ParallelMath::Extract(quantized, block)); + ParallelMath::PutUInt15(drs.diffTables[sector][storageIndex], block, table); + } + } + } + } + + MUInt15 bestDDummy = ParallelMath::MakeUInt15(0); + FindBestDifferentialCombination(flip, 1, canIgnoreSector, bestIsThisMode, bestTotalError, bestFlip, bestDDummy, bestColors, bestSelectors, bestTables, drs); + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + if (!ParallelMath::Extract(bestIsThisMode, block)) + continue; + + int blockBestColors[2][3]; + int blockBestTables[2]; + ParallelMath::ScalarUInt16 blockBestSelectors[2]; + for (int sector = 0; sector < 2; sector++) + { + int sectorColor = ParallelMath::Extract(bestColors[sector], block); + for (int ch = 0; ch < 3; ch++) + blockBestColors[sector][ch] = (sectorColor >> (ch * 5)) & 31; + + blockBestTables[sector] = ParallelMath::Extract(bestTables[sector], block); + blockBestSelectors[sector] = ParallelMath::Extract(bestSelectors[sector], block); + } + + EmitETC1Block(outputBuffer + block * 8, ParallelMath::Extract(bestFlip, block), 1, blockBestColors, blockBestTables, blockBestSelectors, true); + } +} + + +cvtt::ETC1CompressionData *cvtt::Internal::ETCComputer::AllocETC1Data(cvtt::Kernels::allocFunc_t allocFunc, void *context) +{ + void *buffer = allocFunc(context, sizeof(cvtt::Internal::ETCComputer::ETC1CompressionDataInternal)); + if (!buffer) + return NULL; + new (buffer) cvtt::Internal::ETCComputer::ETC1CompressionDataInternal(context); + return static_cast<ETC1CompressionData*>(buffer); +} + +void cvtt::Internal::ETCComputer::ReleaseETC1Data(ETC1CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc) +{ + cvtt::Internal::ETCComputer::ETC1CompressionDataInternal* internalData = static_cast<cvtt::Internal::ETCComputer::ETC1CompressionDataInternal*>(compressionData); + void *context = internalData->m_context; + internalData->~ETC1CompressionDataInternal(); + freeFunc(context, compressionData, sizeof(cvtt::Internal::ETCComputer::ETC1CompressionDataInternal)); +} + +cvtt::ETC2CompressionData *cvtt::Internal::ETCComputer::AllocETC2Data(cvtt::Kernels::allocFunc_t allocFunc, void *context, const cvtt::Options &options) +{ + void *buffer = allocFunc(context, sizeof(cvtt::Internal::ETCComputer::ETC2CompressionDataInternal)); + if (!buffer) + return NULL; + new (buffer) cvtt::Internal::ETCComputer::ETC2CompressionDataInternal(context, options); + return static_cast<ETC2CompressionData*>(buffer); +} + +void cvtt::Internal::ETCComputer::ReleaseETC2Data(ETC2CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc) +{ + cvtt::Internal::ETCComputer::ETC2CompressionDataInternal* internalData = static_cast<cvtt::Internal::ETCComputer::ETC2CompressionDataInternal*>(compressionData); + void *context = internalData->m_context; + internalData->~ETC2CompressionDataInternal(); + freeFunc(context, compressionData, sizeof(cvtt::Internal::ETCComputer::ETC2CompressionDataInternal)); +} + +cvtt::Internal::ETCComputer::ETC2CompressionDataInternal::ETC2CompressionDataInternal(void *context, const cvtt::Options &options) + : m_context(context) +{ + const float cd[3] = { options.redWeight, options.greenWeight, options.blueWeight }; + const float rotCD[3] = { cd[1], cd[2], cd[0] }; + + const float offs = -(rotCD[0] * cd[0] + rotCD[1] * cd[1] + rotCD[2] * cd[2]) / (cd[0] * cd[0] + cd[1] * cd[1] + cd[2] * cd[2]); + + const float chromaAxis0[3] = { rotCD[0] + cd[0] * offs, rotCD[1] + cd[1] * offs, rotCD[2] + cd[2] * offs }; + + const float chromaAxis1Unnormalized[3] = + { + chromaAxis0[1] * cd[2] - chromaAxis0[2] * cd[1], + chromaAxis0[2] * cd[0] - chromaAxis0[0] * cd[2], + chromaAxis0[0] * cd[1] - chromaAxis0[1] * cd[0] + }; + + const float ca0LengthSq = (chromaAxis0[0] * chromaAxis0[0] + chromaAxis0[1] * chromaAxis0[1] + chromaAxis0[2] * chromaAxis0[2]); + const float ca1UNLengthSq = (chromaAxis1Unnormalized[0] * chromaAxis1Unnormalized[0] + chromaAxis1Unnormalized[1] * chromaAxis1Unnormalized[1] + chromaAxis1Unnormalized[2] * chromaAxis1Unnormalized[2]); + const float lengthRatio = static_cast<float>(std::sqrt(ca0LengthSq / ca1UNLengthSq)); + + const float chromaAxis1[3] = { chromaAxis1Unnormalized[0] * lengthRatio, chromaAxis1Unnormalized[1] * lengthRatio, chromaAxis1Unnormalized[2] * lengthRatio }; + + for (int i = 0; i < 3; i++) + { + m_chromaSideAxis0[i] = chromaAxis0[i]; + m_chromaSideAxis1[i] = chromaAxis1[i]; + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_ETC.h b/thirdparty/cvtt/ConvectionKernels_ETC.h new file mode 100644 index 0000000000..5e3c4d74fd --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_ETC.h @@ -0,0 +1,126 @@ +#pragma once +#ifndef __CVTT_CONVECTIONKERNELS_ETC_H__ +#define __CVTT_CONVECTIONKERNELS_ETC_H__ + +#include "ConvectionKernels.h" +#include "ConvectionKernels_ParallelMath.h" + +namespace cvtt +{ + struct Options; + + namespace Internal + { + class ETCComputer + { + public: + static void CompressETC1Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC1CompressionData *compressionData, const Options &options); + static void CompressETC2Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC2CompressionData *compressionData, const Options &options, bool punchthroughAlpha); + static void CompressETC2AlphaBlock(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, const Options &options); + static void CompressEACBlock(uint8_t *outputBuffer, const PixelBlockScalarS16 *inputBlocks, bool isSigned, const Options &options); + + static ETC2CompressionData *AllocETC2Data(cvtt::Kernels::allocFunc_t allocFunc, void *context, const cvtt::Options &options); + static void ReleaseETC2Data(ETC2CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc); + + static ETC1CompressionData *AllocETC1Data(cvtt::Kernels::allocFunc_t allocFunc, void *context); + static void ReleaseETC1Data(ETC1CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc); + + private: + typedef ParallelMath::Float MFloat; + typedef ParallelMath::SInt16 MSInt16; + typedef ParallelMath::UInt15 MUInt15; + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::SInt32 MSInt32; + typedef ParallelMath::UInt31 MUInt31; + + struct DifferentialResolveStorage + { + static const unsigned int MaxAttemptsPerSector = 57 + 81 + 81 + 81 + 81 + 81 + 81 + 81; + + MUInt15 diffNumAttempts[2]; + MFloat diffErrors[2][MaxAttemptsPerSector]; + MUInt16 diffSelectors[2][MaxAttemptsPerSector]; + MUInt15 diffColors[2][MaxAttemptsPerSector]; + MUInt15 diffTables[2][MaxAttemptsPerSector]; + + uint16_t attemptSortIndexes[2][MaxAttemptsPerSector]; + }; + + struct HModeEval + { + MFloat errors[62][16]; + MUInt16 signBits[62]; + MUInt15 uniqueQuantizedColors[62]; + MUInt15 numUniqueColors[2]; + }; + + struct ETC1CompressionDataInternal : public cvtt::ETC1CompressionData + { + explicit ETC1CompressionDataInternal(void *context) + : m_context(context) + { + } + + DifferentialResolveStorage m_drs; + void *m_context; + }; + + struct ETC2CompressionDataInternal : public cvtt::ETC2CompressionData + { + explicit ETC2CompressionDataInternal(void *context, const cvtt::Options &options); + + HModeEval m_h; + DifferentialResolveStorage m_drs; + + void *m_context; + float m_chromaSideAxis0[3]; + float m_chromaSideAxis1[3]; + }; + + static MFloat ComputeErrorUniform(const MUInt15 pixelA[3], const MUInt15 pixelB[3]); + static MFloat ComputeErrorWeighted(const MUInt15 reconstructed[3], const MFloat pixelB[3], const Options options); + static MFloat ComputeErrorFakeBT709(const MUInt15 reconstructed[3], const MFloat pixelB[3]); + + static void TestHalfBlock(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const MSInt16 modifiers[4], bool isDifferential, const Options &options); + static void TestHalfBlockPunchthrough(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const ParallelMath::Int16CompFlag isTransparent[8], const MUInt15 modifier, const Options &options); + static void FindBestDifferentialCombination(int flip, int d, const ParallelMath::Int16CompFlag canIgnoreSector[2], ParallelMath::Int16CompFlag& bestIsThisMode, MFloat& bestTotalError, MUInt15& bestFlip, MUInt15& bestD, MUInt15 bestColors[2], MUInt16 bestSelectors[2], MUInt15 bestTables[2], DifferentialResolveStorage &drs); + + static ParallelMath::Int16CompFlag ETCDifferentialIsLegalForChannel(const MUInt15 &a, const MUInt15 &b); + static ParallelMath::Int16CompFlag ETCDifferentialIsLegal(const MUInt15 &a, const MUInt15 &b); + static bool ETCDifferentialIsLegalForChannelScalar(const uint16_t &a, const uint16_t &b); + static bool ETCDifferentialIsLegalScalar(const uint16_t &a, const uint16_t &b); + + static void EncodeTMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options); + static void EncodeHMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag groupings[16], const MUInt15 pixels[16][3], HModeEval &he, const MFloat preWeightedPixels[16][3], const Options &options); + + static void EncodeVirtualTModePunchthrough(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], const ParallelMath::Int16CompFlag& anyTransparent, const ParallelMath::Int16CompFlag& allTransparent, const Options &options); + + static MUInt15 DecodePlanarCoeff(const MUInt15 &coeff, int ch); + static void EncodePlanar(uint8_t *outputBuffer, MFloat &bestError, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options); + + static void CompressETC1BlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], DifferentialResolveStorage& compressionData, const Options &options, bool punchthrough); + static void CompressETC1PunchthroughBlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], DifferentialResolveStorage& compressionData, const Options &options); + static void CompressETC2AlphaBlockInternal(uint8_t *outputBuffer, const MUInt15 pixels[16], bool is11Bit, bool isSigned, const Options &options); + + static void ExtractBlocks(MUInt15 pixels[16][3], MFloat preWeightedPixels[16][3], const PixelBlockU8 *inputBlocks, const Options &options); + + static void ResolveHalfBlockFakeBT709RoundingAccurate(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential); + static void ResolveHalfBlockFakeBT709RoundingFast(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential); + static void ResolveTHFakeBT709Rounding(MUInt15 quantized[3], const MUInt15 target[3], const MUInt15 &granularity); + static void ConvertToFakeBT709(MFloat yuv[3], const MUInt15 color[3]); + static void ConvertToFakeBT709(MFloat yuv[3], const MFloat color[3]); + static void ConvertToFakeBT709(MFloat yuv[3], const MFloat &r, const MFloat &g, const MFloat &b); + static void ConvertFromFakeBT709(MFloat rgb[3], const MFloat yuv[3]); + + static void QuantizeETC2Alpha(int tableIndex, const MUInt15& value, const MUInt15& baseValue, const MUInt15& multiplier, bool is11Bit, bool isSigned, MUInt15& outIndexes, MUInt15& outQuantizedValues); + + static void EmitTModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 lineColor[3], const ParallelMath::ScalarUInt16 isolatedColor[3], int32_t packedSelectors, ParallelMath::ScalarUInt16 table, bool opaque); + static void EmitHModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 blockColors[2], ParallelMath::ScalarUInt16 sectorBits, ParallelMath::ScalarUInt16 signBits, ParallelMath::ScalarUInt16 table, bool opaque); + static void EmitETC1Block(uint8_t *outputBuffer, int blockBestFlip, int blockBestD, const int blockBestColors[2][3], const int blockBestTables[2], const ParallelMath::ScalarUInt16 blockBestSelectors[2], bool transparent); + + static const int g_flipTables[2][2][8]; + }; + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_ETC1.h b/thirdparty/cvtt/ConvectionKernels_ETC1.h new file mode 100644 index 0000000000..775e41669f --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_ETC1.h @@ -0,0 +1,29 @@ +#include <stdint.h> + +namespace cvtt +{ + namespace Tables + { + namespace ETC1 + { + const int16_t g_potentialOffsets4[] = + { + 57, -64, -58, -54, -52, -48, -46, -44, -42, -40, -38, -36, -34, -32, -30, -28, -26, -24, -22, -20, -18, -16, -14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 52, 54, 58, 64, + 81, -136, -124, -114, -112, -102, -100, -92, -90, -88, -80, -78, -76, -70, -68, -66, -64, -58, -56, -54, -52, -48, -46, -44, -42, -40, -36, -34, -32, -30, -26, -24, -22, -20, -18, -14, -12, -10, -8, -4, -2, 0, 2, 4, 8, 10, 12, 14, 18, 20, 22, 24, 26, 30, 32, 34, 36, 40, 42, 44, 46, 48, 52, 54, 56, 58, 64, 66, 68, 70, 76, 78, 80, 88, 90, 92, 100, 102, 112, 114, 124, 136, + 81, -232, -212, -194, -192, -174, -172, -156, -154, -152, -136, -134, -132, -118, -116, -114, -112, -98, -96, -94, -92, -80, -78, -76, -74, -72, -60, -58, -56, -54, -42, -40, -38, -36, -34, -22, -20, -18, -16, -4, -2, 0, 2, 4, 16, 18, 20, 22, 34, 36, 38, 40, 42, 54, 56, 58, 60, 72, 74, 76, 78, 80, 92, 94, 96, 98, 112, 114, 116, 118, 132, 134, 136, 152, 154, 156, 172, 174, 192, 194, 212, 232, + 81, -336, -307, -281, -278, -252, -249, -226, -223, -220, -197, -194, -191, -171, -168, -165, -162, -142, -139, -136, -133, -116, -113, -110, -107, -104, -87, -84, -81, -78, -61, -58, -55, -52, -49, -32, -29, -26, -23, -6, -3, 0, 3, 6, 23, 26, 29, 32, 49, 52, 55, 58, 61, 78, 81, 84, 87, 104, 107, 110, 113, 116, 133, 136, 139, 142, 162, 165, 168, 171, 191, 194, 197, 220, 223, 226, 249, 252, 278, 281, 307, 336, + 81, -480, -438, -402, -396, -360, -354, -324, -318, -312, -282, -276, -270, -246, -240, -234, -228, -204, -198, -192, -186, -168, -162, -156, -150, -144, -126, -120, -114, -108, -90, -84, -78, -72, -66, -48, -42, -36, -30, -12, -6, 0, 6, 12, 30, 36, 42, 48, 66, 72, 78, 84, 90, 108, 114, 120, 126, 144, 150, 156, 162, 168, 186, 192, 198, 204, 228, 234, 240, 246, 270, 276, 282, 312, 318, 324, 354, 360, 396, 402, 438, 480, + 81, -640, -584, -536, -528, -480, -472, -432, -424, -416, -376, -368, -360, -328, -320, -312, -304, -272, -264, -256, -248, -224, -216, -208, -200, -192, -168, -160, -152, -144, -120, -112, -104, -96, -88, -64, -56, -48, -40, -16, -8, 0, 8, 16, 40, 48, 56, 64, 88, 96, 104, 112, 120, 144, 152, 160, 168, 192, 200, 208, 216, 224, 248, 256, 264, 272, 304, 312, 320, 328, 360, 368, 376, 416, 424, 432, 472, 480, 528, 536, 584, 640, + 81, -848, -775, -709, -702, -636, -629, -570, -563, -556, -497, -490, -483, -431, -424, -417, -410, -358, -351, -344, -337, -292, -285, -278, -271, -264, -219, -212, -205, -198, -153, -146, -139, -132, -125, -80, -73, -66, -59, -14, -7, 0, 7, 14, 59, 66, 73, 80, 125, 132, 139, 146, 153, 198, 205, 212, 219, 264, 271, 278, 285, 292, 337, 344, 351, 358, 410, 417, 424, 431, 483, 490, 497, 556, 563, 570, 629, 636, 702, 709, 775, 848, + 81, -1464, -1328, -1234, -1192, -1098, -1056, -1004, -962, -920, -868, -826, -784, -774, -732, -690, -648, -638, -596, -554, -544, -512, -502, -460, -418, -408, -376, -366, -324, -314, -282, -272, -230, -188, -178, -146, -136, -94, -84, -52, -42, 0, 42, 52, 84, 94, 136, 146, 178, 188, 230, 272, 282, 314, 324, 366, 376, 408, 418, 460, 502, 512, 544, 554, 596, 638, 648, 690, 732, 774, 784, 826, 868, 920, 962, 1004, 1056, 1098, 1192, 1234, 1328, 1464 + }; + + const unsigned int g_maxPotentialOffsets = 81; + + const int16_t g_thModifierTable[8] = + { + 3, 6, 11, 16, 23, 32, 41, 64 + }; + } + } +} diff --git a/thirdparty/cvtt/ConvectionKernels_ETC2.h b/thirdparty/cvtt/ConvectionKernels_ETC2.h new file mode 100644 index 0000000000..4befc8e8c2 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_ETC2.h @@ -0,0 +1,35 @@ +#include <stdint.h> + +namespace cvtt +{ + namespace Tables + { + namespace ETC2 + { + const int16_t g_thModifierTable[8] = + { + 3, 6, 11, 16, 23, 32, 41, 64 + }; + + const int16_t g_alphaModifierTablePositive[16][4] = + { + { 2, 5, 8, 14, }, + { 2, 6, 9, 12, }, + { 1, 4, 7, 12, }, + { 1, 3, 5, 12, }, + { 2, 5, 7, 11, }, + { 2, 6, 8, 10, }, + { 3, 6, 7, 10, }, + { 2, 4, 7, 10, }, + { 1, 5, 7, 9, }, + { 1, 4, 7, 9, }, + { 1, 3, 7, 9, }, + { 1, 4, 6, 9, }, + { 2, 3, 6, 9, }, + { 0, 1, 2, 9, }, + { 3, 5, 7, 8, }, + { 2, 4, 6, 8, }, + }; + } + } +} diff --git a/thirdparty/cvtt/ConvectionKernels_ETC2_Rounding.h b/thirdparty/cvtt/ConvectionKernels_ETC2_Rounding.h new file mode 100644 index 0000000000..a4f5a3ddfa --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_ETC2_Rounding.h @@ -0,0 +1,27 @@ +#pragma once +#include <stdint.h> + +// This file is generated by the MakeTables app. Do not edit this file manually. + +namespace cvtt { namespace Tables { namespace ETC2 { + const int g_alphaRoundingTableWidth = 13; + const uint8_t g_alphaRoundingTables[16][13] = + { + { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3 }, + { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3 }, + { 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3 }, + { 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 }, + { 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 }, + { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3 }, + { 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 }, + { 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3 }, + { 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 }, + { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }, + { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }, + { 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3 }, + { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3 }, + { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3 }, + { 0, 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3 }, + { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3 }, + }; +}}} diff --git a/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h b/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h new file mode 100644 index 0000000000..c1276553b2 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_EndpointRefiner.h @@ -0,0 +1,181 @@ +#pragma once +#ifndef __CVTT_ENDPOINTREFINER_H__ +#define __CVTT_ENDPOINTREFINER_H__ + +#include "ConvectionKernels_ParallelMath.h" + +namespace cvtt +{ + namespace Internal + { + // Solve for a, b where v = a*t + b + // This allows endpoints to be mapped to where T=0 and T=1 + // Least squares from totals: + // a = (tv - t*v/w)/(tt - t*t/w) + // b = (v - a*t)/w + template<int TVectorSize> + class EndpointRefiner + { + public: + typedef ParallelMath::Float MFloat; + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::UInt15 MUInt15; + typedef ParallelMath::AInt16 MAInt16; + typedef ParallelMath::SInt16 MSInt16; + typedef ParallelMath::SInt32 MSInt32; + + MFloat m_tv[TVectorSize]; + MFloat m_v[TVectorSize]; + MFloat m_tt; + MFloat m_t; + MFloat m_w; + int m_wu; + + float m_rcpMaxIndex; + float m_channelWeights[TVectorSize]; + float m_rcpChannelWeights[TVectorSize]; + + void Init(int indexRange, const float channelWeights[TVectorSize]) + { + for (int ch = 0; ch < TVectorSize; ch++) + { + m_tv[ch] = ParallelMath::MakeFloatZero(); + m_v[ch] = ParallelMath::MakeFloatZero(); + } + m_tt = ParallelMath::MakeFloatZero(); + m_t = ParallelMath::MakeFloatZero(); + m_w = ParallelMath::MakeFloatZero(); + + m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1); + + for (int ch = 0; ch < TVectorSize; ch++) + { + m_channelWeights[ch] = channelWeights[ch]; + m_rcpChannelWeights[ch] = 1.0f; + if (m_channelWeights[ch] != 0.0f) + m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch]; + } + + m_wu = 0; + } + + void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight) + { + MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex; + + for (int ch = 0; ch < TVectorSize; ch++) + { + MFloat v = pwFloatPixel[ch] * weight; + + m_tv[ch] = m_tv[ch] + t * v; + m_v[ch] = m_v[ch] + v; + } + m_tt = m_tt + weight * t * t; + m_t = m_t + weight * t; + m_w = m_w + weight; + } + + void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels) + { + MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex; + + for (int ch = 0; ch < numRealChannels; ch++) + { + MFloat v = pwFloatPixel[ch]; + + m_tv[ch] = m_tv[ch] + t * v; + m_v[ch] = m_v[ch] + v; + } + m_tt = m_tt + t * t; + m_t = m_t + t; + m_wu++; + } + + void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index) + { + ContributeUnweightedPW(floatPixel, index, TVectorSize); + } + + void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize]) + { + // a = (tv - t*v/w)/(tt - t*t/w) + // b = (v - a*t)/w + MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu)); + + ParallelMath::MakeSafeDenominator(w); + MFloat wRcp = ParallelMath::Reciprocal(w); + + MFloat adenom = (m_tt * w - m_t * m_t) * wRcp; + + ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero()); + ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f)); + + for (int ch = 0; ch < TVectorSize; ch++) + { + /* + if (adenom == 0.0) + p1 = p2 = er.v / er.w; + else + { + float4 a = (er.tv - er.t*er.v / er.w) / adenom; + float4 b = (er.v - a * er.t) / er.w; + p1 = b; + p2 = a + b; + } + */ + + MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom; + MFloat b = (m_v[ch] - a * m_t) * wRcp; + + MFloat p1 = b; + MFloat p2 = a + b; + + ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp)); + ParallelMath::ConditionalSet(p2, adenomZero, p1); + + // Unweight + float inverseWeight = m_rcpChannelWeights[ch]; + + endPoint[0][ch] = p1 * inverseWeight; + endPoint[1][ch] = p2 * inverseWeight; + } + } + + void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode) + { + MFloat floatEndPoint[2][TVectorSize]; + GetRefinedEndpoints(floatEndPoint); + + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < TVectorSize; ch++) + endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode); + } + + void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode) + { + GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode); + } + + void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode) + { + MFloat floatEndPoint[2][TVectorSize]; + GetRefinedEndpoints(floatEndPoint); + + for (int epi = 0; epi < 2; epi++) + { + for (int ch = 0; ch < TVectorSize; ch++) + { + MFloat f = floatEndPoint[epi][ch]; + if (isSigned) + endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode)); + else + endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode)); + } + } + } + }; + } +} + +#endif + diff --git a/thirdparty/cvtt/ConvectionKernels_EndpointSelector.h b/thirdparty/cvtt/ConvectionKernels_EndpointSelector.h new file mode 100644 index 0000000000..e09dfd248c --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_EndpointSelector.h @@ -0,0 +1,153 @@ +#pragma once +#ifndef __CVTT_ENDPOINTSELECTOR_H__ +#define __CVTT_ENDPOINTSELECTOR_H__ + +#include "ConvectionKernels_ParallelMath.h" +#include "ConvectionKernels_UnfinishedEndpoints.h" +#include "ConvectionKernels_PackedCovarianceMatrix.h" + +namespace cvtt +{ + namespace Internal + { + static const int NumEndpointSelectorPasses = 3; + + template<int TVectorSize, int TIterationCount> + class EndpointSelector + { + public: + typedef ParallelMath::Float MFloat; + + EndpointSelector() + { + for (int ch = 0; ch < TVectorSize; ch++) + { + m_centroid[ch] = ParallelMath::MakeFloatZero(); + m_direction[ch] = ParallelMath::MakeFloatZero(); + } + m_weightTotal = ParallelMath::MakeFloatZero(); + m_minDist = ParallelMath::MakeFloat(FLT_MAX); + m_maxDist = ParallelMath::MakeFloat(-FLT_MAX); + } + + void ContributePass(const MFloat *value, int pass, const MFloat &weight) + { + if (pass == 0) + ContributeCentroid(value, weight); + else if (pass == 1) + ContributeDirection(value, weight); + else if (pass == 2) + ContributeMinMax(value); + } + + void FinishPass(int pass) + { + if (pass == 0) + FinishCentroid(); + else if (pass == 1) + FinishDirection(); + } + + UnfinishedEndpoints<TVectorSize> GetEndpoints(const float channelWeights[TVectorSize]) const + { + MFloat unweightedBase[TVectorSize]; + MFloat unweightedOffset[TVectorSize]; + + for (int ch = 0; ch < TVectorSize; ch++) + { + MFloat min = m_centroid[ch] + m_direction[ch] * m_minDist; + MFloat max = m_centroid[ch] + m_direction[ch] * m_maxDist; + + float safeWeight = channelWeights[ch]; + if (safeWeight == 0.f) + safeWeight = 1.0f; + + unweightedBase[ch] = min / channelWeights[ch]; + unweightedOffset[ch] = (max - min) / channelWeights[ch]; + } + + return UnfinishedEndpoints<TVectorSize>(unweightedBase, unweightedOffset); + } + + private: + void ContributeCentroid(const MFloat *value, const MFloat &weight) + { + for (int ch = 0; ch < TVectorSize; ch++) + m_centroid[ch] = m_centroid[ch] + value[ch] * weight; + m_weightTotal = m_weightTotal + weight; + } + + void FinishCentroid() + { + MFloat denom = m_weightTotal; + ParallelMath::MakeSafeDenominator(denom); + + for (int ch = 0; ch < TVectorSize; ch++) + m_centroid[ch] = m_centroid[ch] / denom; + } + + void ContributeDirection(const MFloat *value, const MFloat &weight) + { + MFloat diff[TVectorSize]; + for (int ch = 0; ch < TVectorSize; ch++) + diff[ch] = value[ch] - m_centroid[ch]; + + m_covarianceMatrix.Add(diff, weight); + } + + void FinishDirection() + { + MFloat approx[TVectorSize]; + for (int ch = 0; ch < TVectorSize; ch++) + approx[ch] = ParallelMath::MakeFloat(1.0f); + + for (int i = 0; i < TIterationCount; i++) + { + MFloat product[TVectorSize]; + m_covarianceMatrix.Product(product, approx); + + MFloat largestComponent = product[0]; + for (int ch = 1; ch < TVectorSize; ch++) + largestComponent = ParallelMath::Max(largestComponent, product[ch]); + + // product = largestComponent*newApprox + ParallelMath::MakeSafeDenominator(largestComponent); + for (int ch = 0; ch < TVectorSize; ch++) + approx[ch] = product[ch] / largestComponent; + } + + // Normalize + MFloat approxLen = ParallelMath::MakeFloatZero(); + for (int ch = 0; ch < TVectorSize; ch++) + approxLen = approxLen + approx[ch] * approx[ch]; + + approxLen = ParallelMath::Sqrt(approxLen); + + ParallelMath::MakeSafeDenominator(approxLen); + + for (int ch = 0; ch < TVectorSize; ch++) + m_direction[ch] = approx[ch] / approxLen; + } + + void ContributeMinMax(const MFloat *value) + { + MFloat dist = ParallelMath::MakeFloatZero(); + for (int ch = 0; ch < TVectorSize; ch++) + dist = dist + m_direction[ch] * (value[ch] - m_centroid[ch]); + + m_minDist = ParallelMath::Min(m_minDist, dist); + m_maxDist = ParallelMath::Max(m_maxDist, dist); + } + + ParallelMath::Float m_centroid[TVectorSize]; + ParallelMath::Float m_direction[TVectorSize]; + PackedCovarianceMatrix<TVectorSize> m_covarianceMatrix; + ParallelMath::Float m_weightTotal; + + ParallelMath::Float m_minDist; + ParallelMath::Float m_maxDist; + }; + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_FakeBT709_Rounding.h b/thirdparty/cvtt/ConvectionKernels_FakeBT709_Rounding.h new file mode 100644 index 0000000000..1eb924befe --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_FakeBT709_Rounding.h @@ -0,0 +1,282 @@ +#pragma once +#include <stdint.h> + +// This file is generated by the MakeTables app. Do not edit this file manually. + +namespace cvtt { namespace Tables { namespace FakeBT709 { + const uint8_t g_rounding16[] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 4, 4, 4, 4, 4, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6, + + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, + 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, + + }; +}}} diff --git a/thirdparty/cvtt/ConvectionKernels_IndexSelector.cpp b/thirdparty/cvtt/ConvectionKernels_IndexSelector.cpp new file mode 100644 index 0000000000..b3d1b5497e --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_IndexSelector.cpp @@ -0,0 +1,66 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------------------- + +Portions based on DirectX Texture Library (DirectXTex) + +Copyright (c) Microsoft Corporation. All rights reserved. +Licensed under the MIT License. + +http://go.microsoft.com/fwlink/?LinkId=248926 +*/ +#include "ConvectionKernels_Config.h" + +#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) + +#include "ConvectionKernels_IndexSelector.h" + +namespace cvtt +{ + namespace Internal + { + const ParallelMath::UInt16 g_weightReciprocals[17] = + { + ParallelMath::MakeUInt16(0), // -1 + ParallelMath::MakeUInt16(0), // 0 + ParallelMath::MakeUInt16(32768), // 1 + ParallelMath::MakeUInt16(16384), // 2 + ParallelMath::MakeUInt16(10923), // 3 + ParallelMath::MakeUInt16(8192), // 4 + ParallelMath::MakeUInt16(6554), // 5 + ParallelMath::MakeUInt16(5461), // 6 + ParallelMath::MakeUInt16(4681), // 7 + ParallelMath::MakeUInt16(4096), // 8 + ParallelMath::MakeUInt16(3641), // 9 + ParallelMath::MakeUInt16(3277), // 10 + ParallelMath::MakeUInt16(2979), // 11 + ParallelMath::MakeUInt16(2731), // 12 + ParallelMath::MakeUInt16(2521), // 13 + ParallelMath::MakeUInt16(2341), // 14 + ParallelMath::MakeUInt16(2185), // 15 + }; + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_IndexSelector.h b/thirdparty/cvtt/ConvectionKernels_IndexSelector.h new file mode 100644 index 0000000000..0f9d209183 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_IndexSelector.h @@ -0,0 +1,147 @@ +#pragma once +#ifndef __CVTT_INDEXSELECTOR_H__ +#define __CVTT_INDEXSELECTOR_H__ + +#include "ConvectionKernels_ParallelMath.h" + +namespace cvtt +{ + namespace Internal + { + extern const ParallelMath::UInt16 g_weightReciprocals[17]; + + template<int TVectorSize> + class IndexSelector + { + public: + typedef ParallelMath::Float MFloat; + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::UInt15 MUInt15; + typedef ParallelMath::SInt16 MSInt16; + typedef ParallelMath::AInt16 MAInt16; + typedef ParallelMath::SInt32 MSInt32; + typedef ParallelMath::UInt31 MUInt31; + + + template<class TInterpolationEPType, class TColorEPType> + void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range) + { + // In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space. + // We need to select indexes using the color-space endpoints. + + m_isUniform = true; + for (int ch = 1; ch < TVectorSize; ch++) + { + if (channelWeights[ch] != channelWeights[0]) + m_isUniform = false; + } + + // To work with channel weights, we need something where: + // pxDiff = px - ep[0] + // epDiff = ep[1] - ep[0] + // + // weightedEPDiff = epDiff * channelWeights + // normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff) + // normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff) + // index = normalizedIndex * maxValue + // + // Equivalent to: + // axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights) + // index = dot(axis, pxDiff) + + for (int ep = 0; ep < 2; ep++) + for (int ch = 0; ch < TVectorSize; ch++) + m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]); + + m_range = range; + m_maxValue = static_cast<float>(range - 1); + + MFloat epDiffWeighted[TVectorSize]; + for (int ch = 0; ch < TVectorSize; ch++) + { + m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]); + MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]); + epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch]; + } + + MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0]; + for (int ch = 1; ch < TVectorSize; ch++) + lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch]; + + ParallelMath::MakeSafeDenominator(lenSquared); + + MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared; + + for (int ch = 0; ch < TVectorSize; ch++) + m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared; + } + + template<bool TSigned> + void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range) + { + MAInt16 converted[2][TVectorSize]; + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < TVectorSize; ch++) + converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]); + + Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range); + } + + void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels) + { + MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); + + for (int ch = 0; ch < numRealChannels; ch++) + { + MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); + MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); + pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6)); + } + } + + void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels) + { + MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7)); + + for (int ch = 0; ch < numRealChannels; ch++) + { + MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch]))); + MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch]))); + pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8)); + } + } + + void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel) + { + ReconstructLDR_BC7(index, pixel, TVectorSize); + } + + void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel) + { + ReconstructLDRPrecise(index, pixel, TVectorSize); + } + + MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const + { + MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0]; + for (int ch = 1; ch < TVectorSize; ch++) + dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch]; + + return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn); + } + + protected: + MAInt16 m_endPoint[2][TVectorSize]; + + private: + MFloat m_origin[TVectorSize]; + MFloat m_axis[TVectorSize]; + int m_range; + float m_maxValue; + bool m_isUniform; + }; + } +} + +#endif + diff --git a/thirdparty/cvtt/ConvectionKernels_IndexSelectorHDR.h b/thirdparty/cvtt/ConvectionKernels_IndexSelectorHDR.h new file mode 100644 index 0000000000..84795cd689 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_IndexSelectorHDR.h @@ -0,0 +1,155 @@ +#pragma once +#ifndef __CVTT_INDEXSELECTORHDR_H__ +#define __CVTT_INDEXSELECTORHDR_H__ + +#include "ConvectionKernels_ParallelMath.h" +#include "ConvectionKernels_IndexSelector.h" + +namespace cvtt +{ + namespace Internal + { + ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v); + ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v); + + template<int TVectorSize> + class IndexSelectorHDR : public IndexSelector<TVectorSize> + { + public: + typedef ParallelMath::UInt15 MUInt15; + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::UInt31 MUInt31; + typedef ParallelMath::SInt16 MSInt16; + typedef ParallelMath::SInt32 MSInt32; + typedef ParallelMath::Float MFloat; + + private: + + MUInt15 InvertSingle(const MUInt15& anIndex) const + { + MUInt15 inverted = m_maxValueMinusOne - anIndex; + return ParallelMath::Select(m_isInverted, inverted, anIndex); + } + + void ReconstructHDRSignedUninverted(const MUInt15 &index, MSInt16* pixel) const + { + MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); + + for (int ch = 0; ch < TVectorSize; ch++) + { + MSInt16 ep0 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[0][ch]); + MSInt16 ep1 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[1][ch]); + + MSInt32 pixel32 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1); + + pixel32 = ParallelMath::RightShift(pixel32 + ParallelMath::MakeSInt32(32), 6); + + pixel[ch] = UnscaleHDRValueSigned(ParallelMath::ToSInt16(pixel32)); + } + } + + void ReconstructHDRUnsignedUninverted(const MUInt15 &index, MSInt16* pixel) const + { + MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9)); + + for (int ch = 0; ch < TVectorSize; ch++) + { + MUInt16 ep0 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[0][ch]); + MUInt16 ep1 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[1][ch]); + + MUInt31 pixel31 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1); + + pixel31 = ParallelMath::RightShift(pixel31 + ParallelMath::MakeUInt31(32), 6); + + pixel[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::ToUInt16(pixel31))); + } + } + + MFloat ErrorForInterpolatorComponent(int index, int ch, const MFloat *pixel) const + { + MFloat diff = pixel[ch] - m_reconstructedInterpolators[index][ch]; + return diff * diff; + } + + MFloat ErrorForInterpolator(int index, const MFloat *pixel) const + { + MFloat error = ErrorForInterpolatorComponent(index, 0, pixel); + for (int ch = 1; ch < TVectorSize; ch++) + error = error + ErrorForInterpolatorComponent(index, ch, pixel); + return error; + } + + public: + + void InitHDR(int range, bool isSigned, bool fastIndexing, const float *channelWeights) + { + assert(range <= 16); + + m_range = range; + + m_isInverted = ParallelMath::MakeBoolInt16(false); + m_maxValueMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(range - 1)); + + if (!fastIndexing) + { + for (int i = 0; i < range; i++) + { + MSInt16 recon2CL[TVectorSize]; + + if (isSigned) + ReconstructHDRSignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL); + else + ReconstructHDRUnsignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL); + + for (int ch = 0; ch < TVectorSize; ch++) + m_reconstructedInterpolators[i][ch] = ParallelMath::TwosCLHalfToFloat(recon2CL[ch]) * channelWeights[ch]; + } + } + } + + void ReconstructHDRSigned(const MUInt15 &index, MSInt16* pixel) const + { + ReconstructHDRSignedUninverted(InvertSingle(index), pixel); + } + + void ReconstructHDRUnsigned(const MUInt15 &index, MSInt16* pixel) const + { + ReconstructHDRUnsignedUninverted(InvertSingle(index), pixel); + } + + void ConditionalInvert(const ParallelMath::Int16CompFlag &invert) + { + m_isInverted = invert; + } + + MUInt15 SelectIndexHDRSlow(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope*) const + { + MUInt15 index = ParallelMath::MakeUInt15(0); + + MFloat bestError = ErrorForInterpolator(0, pixel); + for (int i = 1; i < m_range; i++) + { + MFloat error = ErrorForInterpolator(i, pixel); + ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError); + ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(static_cast<uint16_t>(i))); + bestError = ParallelMath::Min(bestError, error); + } + + return InvertSingle(index); + } + + MUInt15 SelectIndexHDRFast(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const + { + return InvertSingle(this->SelectIndexLDR(pixel, rtn)); + } + + private: + MFloat m_reconstructedInterpolators[16][TVectorSize]; + ParallelMath::Int16CompFlag m_isInverted; + MUInt15 m_maxValueMinusOne; + int m_range; + }; + } +} +#endif + diff --git a/thirdparty/cvtt/ConvectionKernels_PackedCovarianceMatrix.h b/thirdparty/cvtt/ConvectionKernels_PackedCovarianceMatrix.h new file mode 100644 index 0000000000..7ac3d4fdda --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_PackedCovarianceMatrix.h @@ -0,0 +1,68 @@ +#pragma once +#ifndef __CVTT_COVARIANCEMATRIX_H__ +#define __CVTT_COVARIANCEMATRIX_H__ + +namespace cvtt +{ + namespace Internal + { + + template<int TMatrixSize> + class PackedCovarianceMatrix + { + public: + // 0: xx, + // 1: xy, yy + // 3: xz, yz, zz + // 6: xw, yw, zw, ww + // ... etc. + static const int PyramidSize = (TMatrixSize * (TMatrixSize + 1)) / 2; + + typedef ParallelMath::Float MFloat; + + PackedCovarianceMatrix() + { + for (int i = 0; i < PyramidSize; i++) + m_values[i] = ParallelMath::MakeFloatZero(); + } + + void Add(const ParallelMath::Float *vec, const ParallelMath::Float &weight) + { + int index = 0; + for (int row = 0; row < TMatrixSize; row++) + { + for (int col = 0; col <= row; col++) + { + m_values[index] = m_values[index] + vec[row] * vec[col] * weight; + index++; + } + } + } + + void Product(MFloat *outVec, const MFloat *inVec) + { + for (int row = 0; row < TMatrixSize; row++) + { + MFloat sum = ParallelMath::MakeFloatZero(); + + int index = (row * (row + 1)) >> 1; + for (int col = 0; col < TMatrixSize; col++) + { + sum = sum + inVec[col] * m_values[index]; + if (col >= row) + index += col + 1; + else + index++; + } + + outVec[row] = sum; + } + } + + private: + ParallelMath::Float m_values[PyramidSize]; + }; + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_ParallelMath.h b/thirdparty/cvtt/ConvectionKernels_ParallelMath.h new file mode 100644 index 0000000000..9e25280f45 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_ParallelMath.h @@ -0,0 +1,1816 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +*/ +#pragma once +#ifndef __CVTT_PARALLELMATH_H__ +#define __CVTT_PARALLELMATH_H__ + +#include "ConvectionKernels.h" +#include "ConvectionKernels_Config.h" + +#ifdef CVTT_USE_SSE2 +#include <emmintrin.h> +#endif + +#include <float.h> +#include <assert.h> +#include <string.h> +#include <algorithm> +#include <math.h> + +#define UNREFERENCED_PARAMETER(n) ((void)n) + +// Parallel math implementation +// +// After preprocessor defs are handled, what this should do is expose the following types: +// SInt16 - Signed 16-bit integer +// UInt16 - Signed 16-bit integer +// UInt15 - Unsigned 15-bit integer +// SInt32 - Signed 32-bit integer +// UInt31 - Unsigned 31-bit integer +// AInt16 - 16-bit integer of unknown signedness (only used for storage) +// Int16CompFlag - Comparison flags from comparing 16-bit integers +// Int32CompFlag - Comparison flags from comparing 32-bit integers +// FloatCompFlag - Comparison flags from comparing 32-bit floats +// +// The reason for these distinctions are that depending on the instruction set, signed or unsigned versions of certain ops +// (particularly max, min, compares, and right shift) may not be available. In cases where ops are not available, it's +// necessary to do high bit manipulations to accomplish the operation with 16-bit numbers. The 15-bit and 31-bit uint types +// can elide the bit flips if unsigned versions are not available. + +namespace cvtt +{ +#ifdef CVTT_USE_SSE2 + // SSE2 version + struct ParallelMath + { + typedef uint16_t ScalarUInt16; + typedef int16_t ScalarSInt16; + + template<unsigned int TRoundingMode> + struct RoundForScope + { + unsigned int m_oldCSR; + + RoundForScope() + { + m_oldCSR = _mm_getcsr(); + _mm_setcsr((m_oldCSR & ~_MM_ROUND_MASK) | (TRoundingMode)); + } + + ~RoundForScope() + { + _mm_setcsr(m_oldCSR); + } + }; + + struct RoundTowardZeroForScope : RoundForScope<_MM_ROUND_TOWARD_ZERO> + { + }; + + struct RoundTowardNearestForScope : RoundForScope<_MM_ROUND_NEAREST> + { + }; + + struct RoundUpForScope : RoundForScope<_MM_ROUND_UP> + { + }; + + struct RoundDownForScope : RoundForScope<_MM_ROUND_DOWN> + { + }; + + static const int ParallelSize = 8; + + enum Int16Subtype + { + IntSubtype_Signed, + IntSubtype_UnsignedFull, + IntSubtype_UnsignedTruncated, + IntSubtype_Abstract, + }; + + template<int TSubtype> + struct VInt16 + { + __m128i m_value; + + inline VInt16 operator+(int16_t other) const + { + VInt16 result; + result.m_value = _mm_add_epi16(m_value, _mm_set1_epi16(static_cast<int16_t>(other))); + return result; + } + + inline VInt16 operator+(const VInt16 &other) const + { + VInt16 result; + result.m_value = _mm_add_epi16(m_value, other.m_value); + return result; + } + + inline VInt16 operator|(const VInt16 &other) const + { + VInt16 result; + result.m_value = _mm_or_si128(m_value, other.m_value); + return result; + } + + inline VInt16 operator&(const VInt16 &other) const + { + VInt16 result; + result.m_value = _mm_and_si128(m_value, other.m_value); + return result; + } + + inline VInt16 operator-(const VInt16 &other) const + { + VInt16 result; + result.m_value = _mm_sub_epi16(m_value, other.m_value); + return result; + } + + inline VInt16 operator<<(int bits) const + { + VInt16 result; + result.m_value = _mm_slli_epi16(m_value, bits); + return result; + } + + inline VInt16 operator^(const VInt16 &other) const + { + VInt16 result; + result.m_value = _mm_xor_si128(m_value, other.m_value); + return result; + } + }; + + typedef VInt16<IntSubtype_Signed> SInt16; + typedef VInt16<IntSubtype_UnsignedFull> UInt16; + typedef VInt16<IntSubtype_UnsignedTruncated> UInt15; + typedef VInt16<IntSubtype_Abstract> AInt16; + + template<int TSubtype> + struct VInt32 + { + __m128i m_values[2]; + + inline VInt32 operator+(const VInt32& other) const + { + VInt32 result; + result.m_values[0] = _mm_add_epi32(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_add_epi32(m_values[1], other.m_values[1]); + return result; + } + + inline VInt32 operator-(const VInt32& other) const + { + VInt32 result; + result.m_values[0] = _mm_sub_epi32(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_sub_epi32(m_values[1], other.m_values[1]); + return result; + } + + inline VInt32 operator<<(const int other) const + { + VInt32 result; + result.m_values[0] = _mm_slli_epi32(m_values[0], other); + result.m_values[1] = _mm_slli_epi32(m_values[1], other); + return result; + } + + inline VInt32 operator|(const VInt32& other) const + { + VInt32 result; + result.m_values[0] = _mm_or_si128(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_or_si128(m_values[1], other.m_values[1]); + return result; + } + }; + + typedef VInt32<IntSubtype_Signed> SInt32; + typedef VInt32<IntSubtype_UnsignedTruncated> UInt31; + typedef VInt32<IntSubtype_UnsignedFull> UInt32; + typedef VInt32<IntSubtype_Abstract> AInt32; + + template<class TTargetType> + struct LosslessCast + { +#ifdef CVTT_PERMIT_ALIASING + template<int TSrcSubtype> + static const TTargetType& Cast(const VInt32<TSrcSubtype> &src) + { + return reinterpret_cast<VInt32<TSubtype>&>(src); + } + + template<int TSrcSubtype> + static const TTargetType& Cast(const VInt16<TSrcSubtype> &src) + { + return reinterpret_cast<VInt16<TSubtype>&>(src); + } +#else + template<int TSrcSubtype> + static TTargetType Cast(const VInt32<TSrcSubtype> &src) + { + TTargetType result; + result.m_values[0] = src.m_values[0]; + result.m_values[1] = src.m_values[1]; + return result; + } + + template<int TSrcSubtype> + static TTargetType Cast(const VInt16<TSrcSubtype> &src) + { + TTargetType result; + result.m_value = src.m_value; + return result; + } +#endif + }; + + struct Int64 + { + __m128i m_values[4]; + }; + + struct Float + { + __m128 m_values[2]; + + inline Float operator+(const Float &other) const + { + Float result; + result.m_values[0] = _mm_add_ps(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_add_ps(m_values[1], other.m_values[1]); + return result; + } + + inline Float operator+(float other) const + { + Float result; + result.m_values[0] = _mm_add_ps(m_values[0], _mm_set1_ps(other)); + result.m_values[1] = _mm_add_ps(m_values[1], _mm_set1_ps(other)); + return result; + } + + inline Float operator-(const Float& other) const + { + Float result; + result.m_values[0] = _mm_sub_ps(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_sub_ps(m_values[1], other.m_values[1]); + return result; + } + + inline Float operator-() const + { + Float result; + result.m_values[0] = _mm_sub_ps(_mm_setzero_ps(), m_values[0]); + result.m_values[1] = _mm_sub_ps(_mm_setzero_ps(), m_values[1]); + return result; + } + + inline Float operator*(const Float& other) const + { + Float result; + result.m_values[0] = _mm_mul_ps(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_mul_ps(m_values[1], other.m_values[1]); + return result; + } + + inline Float operator*(float other) const + { + Float result; + result.m_values[0] = _mm_mul_ps(m_values[0], _mm_set1_ps(other)); + result.m_values[1] = _mm_mul_ps(m_values[1], _mm_set1_ps(other)); + return result; + } + + inline Float operator/(const Float &other) const + { + Float result; + result.m_values[0] = _mm_div_ps(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_div_ps(m_values[1], other.m_values[1]); + return result; + } + + inline Float operator/(float other) const + { + Float result; + result.m_values[0] = _mm_div_ps(m_values[0], _mm_set1_ps(other)); + result.m_values[1] = _mm_div_ps(m_values[1], _mm_set1_ps(other)); + return result; + } + }; + + struct Int16CompFlag + { + __m128i m_value; + + inline Int16CompFlag operator&(const Int16CompFlag &other) const + { + Int16CompFlag result; + result.m_value = _mm_and_si128(m_value, other.m_value); + return result; + } + + inline Int16CompFlag operator|(const Int16CompFlag &other) const + { + Int16CompFlag result; + result.m_value = _mm_or_si128(m_value, other.m_value); + return result; + } + }; + + struct Int32CompFlag + { + __m128i m_values[2]; + + inline Int32CompFlag operator&(const Int32CompFlag &other) const + { + Int32CompFlag result; + result.m_values[0] = _mm_and_si128(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_and_si128(m_values[1], other.m_values[1]); + return result; + } + + inline Int32CompFlag operator|(const Int32CompFlag &other) const + { + Int32CompFlag result; + result.m_values[0] = _mm_or_si128(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_or_si128(m_values[1], other.m_values[1]); + return result; + } + }; + + struct FloatCompFlag + { + __m128 m_values[2]; + + inline FloatCompFlag operator&(const FloatCompFlag &other) const + { + FloatCompFlag result; + result.m_values[0] = _mm_and_ps(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_and_ps(m_values[1], other.m_values[1]); + return result; + } + + inline FloatCompFlag operator|(const FloatCompFlag &other) const + { + FloatCompFlag result; + result.m_values[0] = _mm_or_ps(m_values[0], other.m_values[0]); + result.m_values[1] = _mm_or_ps(m_values[1], other.m_values[1]); + return result; + } + }; + + template<int TSubtype> + static VInt16<TSubtype> AbstractAdd(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b) + { + VInt16<TSubtype> result; + result.m_value = _mm_add_epi16(a.m_value, b.m_value); + return result; + } + + template<int TSubtype> + static VInt16<TSubtype> AbstractSubtract(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b) + { + VInt16<TSubtype> result; + result.m_value = _mm_sub_epi16(a.m_value, b.m_value); + return result; + } + + static Float Select(const FloatCompFlag &flag, const Float &a, const Float &b) + { + Float result; + for (int i = 0; i < 2; i++) + result.m_values[i] = _mm_or_ps(_mm_and_ps(flag.m_values[i], a.m_values[i]), _mm_andnot_ps(flag.m_values[i], b.m_values[i])); + return result; + } + + template<int TSubtype> + static VInt16<TSubtype> Select(const Int16CompFlag &flag, const VInt16<TSubtype> &a, const VInt16<TSubtype> &b) + { + VInt16<TSubtype> result; + result.m_value = _mm_or_si128(_mm_and_si128(flag.m_value, a.m_value), _mm_andnot_si128(flag.m_value, b.m_value)); + return result; + } + + template<int TSubtype> + static VInt16<TSubtype> SelectOrZero(const Int16CompFlag &flag, const VInt16<TSubtype> &a) + { + VInt16<TSubtype> result; + result.m_value = _mm_and_si128(flag.m_value, a.m_value); + return result; + } + + template<int TSubtype> + static void ConditionalSet(VInt16<TSubtype> &dest, const Int16CompFlag &flag, const VInt16<TSubtype> &src) + { + dest.m_value = _mm_or_si128(_mm_andnot_si128(flag.m_value, dest.m_value), _mm_and_si128(flag.m_value, src.m_value)); + } + + template<int TSubtype> + static void ConditionalSet(VInt32<TSubtype> &dest, const Int16CompFlag &flag, const VInt32<TSubtype> &src) + { + __m128i lowFlags = _mm_unpacklo_epi16(flag.m_value, flag.m_value); + __m128i highFlags = _mm_unpackhi_epi16(flag.m_value, flag.m_value); + dest.m_values[0] = _mm_or_si128(_mm_andnot_si128(lowFlags, dest.m_values[0]), _mm_and_si128(lowFlags, src.m_values[0])); + dest.m_values[1] = _mm_or_si128(_mm_andnot_si128(highFlags, dest.m_values[1]), _mm_and_si128(highFlags, src.m_values[1])); + } + + static void ConditionalSet(ParallelMath::Int16CompFlag &dest, const Int16CompFlag &flag, const ParallelMath::Int16CompFlag &src) + { + dest.m_value = _mm_or_si128(_mm_andnot_si128(flag.m_value, dest.m_value), _mm_and_si128(flag.m_value, src.m_value)); + } + + static SInt16 ConditionalNegate(const Int16CompFlag &flag, const SInt16 &v) + { + SInt16 result; + result.m_value = _mm_add_epi16(_mm_xor_si128(flag.m_value, v.m_value), _mm_srli_epi16(flag.m_value, 15)); + return result; + } + + template<int TSubtype> + static void NotConditionalSet(VInt16<TSubtype> &dest, const Int16CompFlag &flag, const VInt16<TSubtype> &src) + { + dest.m_value = _mm_or_si128(_mm_and_si128(flag.m_value, dest.m_value), _mm_andnot_si128(flag.m_value, src.m_value)); + } + + static void ConditionalSet(Float &dest, const FloatCompFlag &flag, const Float &src) + { + for (int i = 0; i < 2; i++) + dest.m_values[i] = _mm_or_ps(_mm_andnot_ps(flag.m_values[i], dest.m_values[i]), _mm_and_ps(flag.m_values[i], src.m_values[i])); + } + + static void NotConditionalSet(Float &dest, const FloatCompFlag &flag, const Float &src) + { + for (int i = 0; i < 2; i++) + dest.m_values[i] = _mm_or_ps(_mm_and_ps(flag.m_values[i], dest.m_values[i]), _mm_andnot_ps(flag.m_values[i], src.m_values[i])); + } + + static void MakeSafeDenominator(Float& v) + { + ConditionalSet(v, Equal(v, MakeFloatZero()), MakeFloat(1.0f)); + } + + static SInt16 TruncateToPrecisionSigned(const SInt16 &v, int precision) + { + int lostBits = 16 - precision; + if (lostBits == 0) + return v; + + SInt16 result; + result.m_value = _mm_srai_epi16(_mm_slli_epi16(v.m_value, lostBits), lostBits); + return result; + } + + static UInt16 TruncateToPrecisionUnsigned(const UInt16 &v, int precision) + { + int lostBits = 16 - precision; + if (lostBits == 0) + return v; + + UInt16 result; + result.m_value = _mm_srli_epi16(_mm_slli_epi16(v.m_value, lostBits), lostBits); + return result; + } + + static UInt16 Min(const UInt16 &a, const UInt16 &b) + { + __m128i bitFlip = _mm_set1_epi16(-32768); + + UInt16 result; + result.m_value = _mm_xor_si128(_mm_min_epi16(_mm_xor_si128(a.m_value, bitFlip), _mm_xor_si128(b.m_value, bitFlip)), bitFlip); + return result; + } + + static SInt16 Min(const SInt16 &a, const SInt16 &b) + { + SInt16 result; + result.m_value = _mm_min_epi16(a.m_value, b.m_value); + return result; + } + + static UInt15 Min(const UInt15 &a, const UInt15 &b) + { + UInt15 result; + result.m_value = _mm_min_epi16(a.m_value, b.m_value); + return result; + } + + static Float Min(const Float &a, const Float &b) + { + Float result; + for (int i = 0; i < 2; i++) + result.m_values[i] = _mm_min_ps(a.m_values[i], b.m_values[i]); + return result; + } + + static UInt16 Max(const UInt16 &a, const UInt16 &b) + { + __m128i bitFlip = _mm_set1_epi16(-32768); + + UInt16 result; + result.m_value = _mm_xor_si128(_mm_max_epi16(_mm_xor_si128(a.m_value, bitFlip), _mm_xor_si128(b.m_value, bitFlip)), bitFlip); + return result; + } + + static SInt16 Max(const SInt16 &a, const SInt16 &b) + { + SInt16 result; + result.m_value = _mm_max_epi16(a.m_value, b.m_value); + return result; + } + + static UInt15 Max(const UInt15 &a, const UInt15 &b) + { + UInt15 result; + result.m_value = _mm_max_epi16(a.m_value, b.m_value); + return result; + } + + static Float Max(const Float &a, const Float &b) + { + Float result; + for (int i = 0; i < 2; i++) + result.m_values[i] = _mm_max_ps(a.m_values[i], b.m_values[i]); + return result; + } + + static Float Clamp(const Float &v, float min, float max) + { + Float result; + for (int i = 0; i < 2; i++) + result.m_values[i] = _mm_max_ps(_mm_min_ps(v.m_values[i], _mm_set1_ps(max)), _mm_set1_ps(min)); + return result; + } + + static Float Reciprocal(const Float &v) + { + Float result; + for (int i = 0; i < 2; i++) + result.m_values[i] = _mm_rcp_ps(v.m_values[i]); + return result; + } + + static void ConvertLDRInputs(const PixelBlockU8* inputBlocks, int pxOffset, int channel, UInt15 &chOut) + { + int16_t values[8]; + for (int i = 0; i < 8; i++) + values[i] = inputBlocks[i].m_pixels[pxOffset][channel]; + + chOut.m_value = _mm_set_epi16(values[7], values[6], values[5], values[4], values[3], values[2], values[1], values[0]); + } + + static void ConvertHDRInputs(const PixelBlockF16* inputBlocks, int pxOffset, int channel, SInt16 &chOut) + { + int16_t values[8]; + for (int i = 0; i < 8; i++) + values[i] = inputBlocks[i].m_pixels[pxOffset][channel]; + + chOut.m_value = _mm_set_epi16(values[7], values[6], values[5], values[4], values[3], values[2], values[1], values[0]); + } + + static Float MakeFloat(float v) + { + Float f; + f.m_values[0] = f.m_values[1] = _mm_set1_ps(v); + return f; + } + + static Float MakeFloatZero() + { + Float f; + f.m_values[0] = f.m_values[1] = _mm_setzero_ps(); + return f; + } + + static UInt16 MakeUInt16(uint16_t v) + { + UInt16 result; + result.m_value = _mm_set1_epi16(static_cast<short>(v)); + return result; + } + + static SInt16 MakeSInt16(int16_t v) + { + SInt16 result; + result.m_value = _mm_set1_epi16(static_cast<short>(v)); + return result; + } + + static AInt16 MakeAInt16(int16_t v) + { + AInt16 result; + result.m_value = _mm_set1_epi16(static_cast<short>(v)); + return result; + } + + static UInt15 MakeUInt15(uint16_t v) + { + UInt15 result; + result.m_value = _mm_set1_epi16(static_cast<short>(v)); + return result; + } + + static SInt32 MakeSInt32(int32_t v) + { + SInt32 result; + result.m_values[0] = _mm_set1_epi32(v); + result.m_values[1] = _mm_set1_epi32(v); + return result; + } + + static UInt31 MakeUInt31(uint32_t v) + { + UInt31 result; + result.m_values[0] = _mm_set1_epi32(v); + result.m_values[1] = _mm_set1_epi32(v); + return result; + } + + static uint16_t Extract(const UInt16 &v, int offset) + { + return reinterpret_cast<const uint16_t*>(&v.m_value)[offset]; + } + + static int16_t Extract(const SInt16 &v, int offset) + { + return reinterpret_cast<const int16_t*>(&v.m_value)[offset]; + } + + static uint16_t Extract(const UInt15 &v, int offset) + { + return reinterpret_cast<const uint16_t*>(&v.m_value)[offset]; + } + + static int16_t Extract(const AInt16 &v, int offset) + { + return reinterpret_cast<const int16_t*>(&v.m_value)[offset]; + } + + static int32_t Extract(const SInt32 &v, int offset) + { + return reinterpret_cast<const int32_t*>(&v.m_values[offset >> 2])[offset & 3]; + } + + static float Extract(const Float &v, int offset) + { + return reinterpret_cast<const float*>(&v.m_values[offset >> 2])[offset & 3]; + } + + static bool Extract(const ParallelMath::Int16CompFlag &v, int offset) + { + return reinterpret_cast<const int16_t*>(&v.m_value)[offset] != 0; + } + + static void PutUInt16(UInt16 &dest, int offset, uint16_t v) + { + reinterpret_cast<uint16_t*>(&dest)[offset] = v; + } + + static void PutUInt15(UInt15 &dest, int offset, uint16_t v) + { + reinterpret_cast<uint16_t*>(&dest)[offset] = v; + } + + static void PutSInt16(SInt16 &dest, int offset, int16_t v) + { + reinterpret_cast<int16_t*>(&dest)[offset] = v; + } + + static float ExtractFloat(const Float& v, int offset) + { + return reinterpret_cast<const float*>(&v)[offset]; + } + + static void PutFloat(Float &dest, int offset, float v) + { + reinterpret_cast<float*>(&dest)[offset] = v; + } + + static void PutBoolInt16(Int16CompFlag &dest, int offset, bool v) + { + reinterpret_cast<int16_t*>(&dest)[offset] = v ? -1 : 0; + } + + static Int32CompFlag Less(const UInt31 &a, const UInt31 &b) + { + Int32CompFlag result; + result.m_values[0] = _mm_cmplt_epi32(a.m_values[0], b.m_values[0]); + result.m_values[1] = _mm_cmplt_epi32(a.m_values[1], b.m_values[1]); + return result; + } + + static Int16CompFlag Less(const SInt16 &a, const SInt16 &b) + { + Int16CompFlag result; + result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value); + return result; + } + + static Int16CompFlag Less(const UInt15 &a, const UInt15 &b) + { + Int16CompFlag result; + result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value); + return result; + } + + static Int16CompFlag LessOrEqual(const UInt15 &a, const UInt15 &b) + { + Int16CompFlag result; + result.m_value = _mm_cmplt_epi16(a.m_value, b.m_value); + return result; + } + + static FloatCompFlag Less(const Float &a, const Float &b) + { + FloatCompFlag result; + for (int i = 0; i < 2; i++) + result.m_values[i] = _mm_cmplt_ps(a.m_values[i], b.m_values[i]); + return result; + } + + static FloatCompFlag LessOrEqual(const Float &a, const Float &b) + { + FloatCompFlag result; + for (int i = 0; i < 2; i++) + result.m_values[i] = _mm_cmple_ps(a.m_values[i], b.m_values[i]); + return result; + } + + template<int TSubtype> + static Int16CompFlag Equal(const VInt16<TSubtype> &a, const VInt16<TSubtype> &b) + { + Int16CompFlag result; + result.m_value = _mm_cmpeq_epi16(a.m_value, b.m_value); + return result; + } + + static FloatCompFlag Equal(const Float &a, const Float &b) + { + FloatCompFlag result; + for (int i = 0; i < 2; i++) + result.m_values[i] = _mm_cmpeq_ps(a.m_values[i], b.m_values[i]); + return result; + } + + static Int16CompFlag Equal(const Int16CompFlag &a, const Int16CompFlag &b) + { + Int16CompFlag notResult; + notResult.m_value = _mm_xor_si128(a.m_value, b.m_value); + return Not(notResult); + } + + static Float ToFloat(const UInt16 &v) + { + Float result; + result.m_values[0] = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v.m_value, _mm_setzero_si128())); + result.m_values[1] = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v.m_value, _mm_setzero_si128())); + return result; + } + + static UInt31 ToUInt31(const UInt16 &v) + { + UInt31 result; + result.m_values[0] = _mm_unpacklo_epi16(v.m_value, _mm_setzero_si128()); + result.m_values[1] = _mm_unpackhi_epi16(v.m_value, _mm_setzero_si128()); + return result; + } + + static SInt32 ToInt32(const UInt16 &v) + { + SInt32 result; + result.m_values[0] = _mm_unpacklo_epi16(v.m_value, _mm_setzero_si128()); + result.m_values[1] = _mm_unpackhi_epi16(v.m_value, _mm_setzero_si128()); + return result; + } + + static SInt32 ToInt32(const UInt15 &v) + { + SInt32 result; + result.m_values[0] = _mm_unpacklo_epi16(v.m_value, _mm_setzero_si128()); + result.m_values[1] = _mm_unpackhi_epi16(v.m_value, _mm_setzero_si128()); + return result; + } + + static SInt32 ToInt32(const SInt16 &v) + { + SInt32 result; + result.m_values[0] = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), v.m_value), 16); + result.m_values[1] = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), v.m_value), 16); + return result; + } + + static Float ToFloat(const SInt16 &v) + { + Float result; + result.m_values[0] = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), v.m_value), 16)); + result.m_values[1] = _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), v.m_value), 16)); + return result; + } + + static Float ToFloat(const UInt15 &v) + { + Float result; + result.m_values[0] = _mm_cvtepi32_ps(_mm_unpacklo_epi16(v.m_value, _mm_setzero_si128())); + result.m_values[1] = _mm_cvtepi32_ps(_mm_unpackhi_epi16(v.m_value, _mm_setzero_si128())); + return result; + } + + static Float ToFloat(const UInt31 &v) + { + Float result; + result.m_values[0] = _mm_cvtepi32_ps(v.m_values[0]); + result.m_values[1] = _mm_cvtepi32_ps(v.m_values[1]); + return result; + } + + static Int16CompFlag FloatFlagToInt16(const FloatCompFlag &v) + { + __m128i lo = _mm_castps_si128(v.m_values[0]); + __m128i hi = _mm_castps_si128(v.m_values[1]); + + Int16CompFlag result; + result.m_value = _mm_packs_epi32(lo, hi); + return result; + } + + static FloatCompFlag Int16FlagToFloat(const Int16CompFlag &v) + { + __m128i lo = _mm_unpacklo_epi16(v.m_value, v.m_value); + __m128i hi = _mm_unpackhi_epi16(v.m_value, v.m_value); + + FloatCompFlag result; + result.m_values[0] = _mm_castsi128_ps(lo); + result.m_values[1] = _mm_castsi128_ps(hi); + return result; + } + + static Int16CompFlag Int32FlagToInt16(const Int32CompFlag &v) + { + __m128i lo = v.m_values[0]; + __m128i hi = v.m_values[1]; + + Int16CompFlag result; + result.m_value = _mm_packs_epi32(lo, hi); + return result; + } + + static Int16CompFlag MakeBoolInt16(bool b) + { + Int16CompFlag result; + if (b) + result.m_value = _mm_set1_epi16(-1); + else + result.m_value = _mm_setzero_si128(); + return result; + } + + static FloatCompFlag MakeBoolFloat(bool b) + { + FloatCompFlag result; + if (b) + result.m_values[0] = result.m_values[1] = _mm_castsi128_ps(_mm_set1_epi32(-1)); + else + result.m_values[0] = result.m_values[1] = _mm_setzero_ps(); + return result; + } + + static Int16CompFlag AndNot(const Int16CompFlag &a, const Int16CompFlag &b) + { + Int16CompFlag result; + result.m_value = _mm_andnot_si128(b.m_value, a.m_value); + return result; + } + + static Int16CompFlag Not(const Int16CompFlag &b) + { + Int16CompFlag result; + result.m_value = _mm_xor_si128(b.m_value, _mm_set1_epi32(-1)); + return result; + } + + static Int32CompFlag Not(const Int32CompFlag &b) + { + Int32CompFlag result; + result.m_values[0] = _mm_xor_si128(b.m_values[0], _mm_set1_epi32(-1)); + result.m_values[1] = _mm_xor_si128(b.m_values[1], _mm_set1_epi32(-1)); + return result; + } + + static UInt16 RoundAndConvertToU16(const Float &v, const void* /*roundingMode*/) + { + __m128i lo = _mm_cvtps_epi32(_mm_add_ps(v.m_values[0], _mm_set1_ps(-32768))); + __m128i hi = _mm_cvtps_epi32(_mm_add_ps(v.m_values[1], _mm_set1_ps(-32768))); + + __m128i packed = _mm_packs_epi32(lo, hi); + + UInt16 result; + result.m_value = _mm_xor_si128(packed, _mm_set1_epi16(-32768)); + return result; + } + + static UInt15 RoundAndConvertToU15(const Float &v, const void* /*roundingMode*/) + { + __m128i lo = _mm_cvtps_epi32(v.m_values[0]); + __m128i hi = _mm_cvtps_epi32(v.m_values[1]); + + __m128i packed = _mm_packs_epi32(lo, hi); + + UInt15 result; + result.m_value = _mm_packs_epi32(lo, hi); + return result; + } + + static SInt16 RoundAndConvertToS16(const Float &v, const void* /*roundingMode*/) + { + __m128i lo = _mm_cvtps_epi32(v.m_values[0]); + __m128i hi = _mm_cvtps_epi32(v.m_values[1]); + + __m128i packed = _mm_packs_epi32(lo, hi); + + SInt16 result; + result.m_value = _mm_packs_epi32(lo, hi); + return result; + } + + static Float Sqrt(const Float &f) + { + Float result; + for (int i = 0; i < 2; i++) + result.m_values[i] = _mm_sqrt_ps(f.m_values[i]); + return result; + } + + static UInt16 Abs(const SInt16 &a) + { + __m128i signBitsXor = _mm_srai_epi16(a.m_value, 15); + __m128i signBitsAdd = _mm_srli_epi16(a.m_value, 15); + + UInt16 result; + result.m_value = _mm_add_epi16(_mm_xor_si128(a.m_value, signBitsXor), signBitsAdd); + return result; + } + + static Float Abs(const Float& a) + { + __m128 invMask = _mm_set1_ps(-0.0f); + + Float result; + result.m_values[0] = _mm_andnot_ps(invMask, a.m_values[0]); + result.m_values[1] = _mm_andnot_ps(invMask, a.m_values[1]); + return result; + } + + static UInt16 SqDiffUInt8(const UInt15 &a, const UInt15 &b) + { + __m128i diff = _mm_sub_epi16(a.m_value, b.m_value); + + UInt16 result; + result.m_value = _mm_mullo_epi16(diff, diff); + return result; + } + + static Float SqDiffSInt16(const SInt16 &a, const SInt16 &b) + { + __m128i diffU = _mm_sub_epi16(_mm_max_epi16(a.m_value, b.m_value), _mm_min_epi16(a.m_value, b.m_value)); + + __m128i mulHi = _mm_mulhi_epu16(diffU, diffU); + __m128i mulLo = _mm_mullo_epi16(diffU, diffU); + __m128i sqDiffHi = _mm_unpackhi_epi16(mulLo, mulHi); + __m128i sqDiffLo = _mm_unpacklo_epi16(mulLo, mulHi); + + Float result; + result.m_values[0] = _mm_cvtepi32_ps(sqDiffLo); + result.m_values[1] = _mm_cvtepi32_ps(sqDiffHi); + + return result; + } + + static Float TwosCLHalfToFloat(const SInt16 &v) + { + __m128i absV = _mm_add_epi16(_mm_xor_si128(v.m_value, _mm_srai_epi16(v.m_value, 15)), _mm_srli_epi16(v.m_value, 15)); + + __m128i signBits = _mm_and_si128(v.m_value, _mm_set1_epi16(-32768)); + __m128i mantissa = _mm_and_si128(v.m_value, _mm_set1_epi16(0x03ff)); + __m128i exponent = _mm_and_si128(v.m_value, _mm_set1_epi16(0x7c00)); + + __m128i isDenormal = _mm_cmpeq_epi16(exponent, _mm_setzero_si128()); + + // Convert exponent to high-bits + exponent = _mm_add_epi16(_mm_srli_epi16(exponent, 3), _mm_set1_epi16(14336)); + + __m128i denormalCorrectionHigh = _mm_and_si128(isDenormal, _mm_or_si128(signBits, _mm_set1_epi16(14336))); + + __m128i highBits = _mm_or_si128(signBits, _mm_or_si128(exponent, _mm_srli_epi16(mantissa, 3))); + __m128i lowBits = _mm_slli_epi16(mantissa, 13); + + __m128i flow = _mm_unpacklo_epi16(lowBits, highBits); + __m128i fhigh = _mm_unpackhi_epi16(lowBits, highBits); + + __m128i correctionLow = _mm_unpacklo_epi16(_mm_setzero_si128(), denormalCorrectionHigh); + __m128i correctionHigh = _mm_unpackhi_epi16(_mm_setzero_si128(), denormalCorrectionHigh); + + Float result; + result.m_values[0] = _mm_sub_ps(_mm_castsi128_ps(flow), _mm_castsi128_ps(correctionLow)); + result.m_values[1] = _mm_sub_ps(_mm_castsi128_ps(fhigh), _mm_castsi128_ps(correctionHigh)); + + return result; + } + + static Float SqDiff2CLFloat(const SInt16 &a, const Float &b) + { + Float fa = TwosCLHalfToFloat(a); + + Float diff = fa - b; + return diff * diff; + } + + static Float SqDiff2CL(const SInt16 &a, const SInt16 &b) + { + Float fa = TwosCLHalfToFloat(a); + Float fb = TwosCLHalfToFloat(b); + + Float diff = fa - fb; + return diff * diff; + } + + static Float SqDiff2CLFloat(const SInt16 &a, float aWeight, const Float &b) + { + Float fa = TwosCLHalfToFloat(a) * aWeight; + + Float diff = fa - b; + return diff * diff; + } + + static UInt16 RightShift(const UInt16 &v, int bits) + { + UInt16 result; + result.m_value = _mm_srli_epi16(v.m_value, bits); + return result; + } + + static UInt31 RightShift(const UInt31 &v, int bits) + { + UInt31 result; + result.m_values[0] = _mm_srli_epi32(v.m_values[0], bits); + result.m_values[1] = _mm_srli_epi32(v.m_values[1], bits); + return result; + } + + static SInt16 RightShift(const SInt16 &v, int bits) + { + SInt16 result; + result.m_value = _mm_srai_epi16(v.m_value, bits); + return result; + } + + static UInt15 RightShift(const UInt15 &v, int bits) + { + UInt15 result; + result.m_value = _mm_srli_epi16(v.m_value, bits); + return result; + } + + static SInt32 RightShift(const SInt32 &v, int bits) + { + SInt32 result; + result.m_values[0] = _mm_srai_epi32(v.m_values[0], bits); + result.m_values[1] = _mm_srai_epi32(v.m_values[1], bits); + return result; + } + + static SInt16 ToSInt16(const SInt32 &v) + { + SInt16 result; + result.m_value = _mm_packs_epi32(v.m_values[0], v.m_values[1]); + return result; + } + + static SInt16 ToSInt16(const UInt16 &v) + { + SInt16 result; + result.m_value = v.m_value; + return result; + } + + static SInt16 ToSInt16(const UInt15 &v) + { + SInt16 result; + result.m_value = v.m_value; + return result; + } + + static UInt16 ToUInt16(const UInt32 &v) + { + __m128i low = _mm_srai_epi32(_mm_slli_epi32(v.m_values[0], 16), 16); + __m128i high = _mm_srai_epi32(_mm_slli_epi32(v.m_values[1], 16), 16); + + UInt16 result; + result.m_value = _mm_packs_epi32(low, high); + return result; + } + + static UInt16 ToUInt16(const UInt31 &v) + { + __m128i low = _mm_srai_epi32(_mm_slli_epi32(v.m_values[0], 16), 16); + __m128i high = _mm_srai_epi32(_mm_slli_epi32(v.m_values[1], 16), 16); + + UInt16 result; + result.m_value = _mm_packs_epi32(low, high); + return result; + } + + static UInt15 ToUInt15(const UInt31 &v) + { + UInt15 result; + result.m_value = _mm_packs_epi32(v.m_values[0], v.m_values[1]); + return result; + } + + static UInt15 ToUInt15(const SInt16 &v) + { + UInt15 result; + result.m_value = v.m_value; + return result; + } + + static UInt15 ToUInt15(const UInt16 &v) + { + UInt15 result; + result.m_value = v.m_value; + return result; + } + + static SInt32 XMultiply(const SInt16 &a, const SInt16 &b) + { + __m128i high = _mm_mulhi_epi16(a.m_value, b.m_value); + __m128i low = _mm_mullo_epi16(a.m_value, b.m_value); + + SInt32 result; + result.m_values[0] = _mm_unpacklo_epi16(low, high); + result.m_values[1] = _mm_unpackhi_epi16(low, high); + return result; + } + + static SInt32 XMultiply(const SInt16 &a, const UInt15 &b) + { + __m128i high = _mm_mulhi_epi16(a.m_value, b.m_value); + __m128i low = _mm_mullo_epi16(a.m_value, b.m_value); + + SInt32 result; + result.m_values[0] = _mm_unpacklo_epi16(low, high); + result.m_values[1] = _mm_unpackhi_epi16(low, high); + return result; + } + + static SInt32 XMultiply(const UInt15 &a, const SInt16 &b) + { + return XMultiply(b, a); + } + + static UInt32 XMultiply(const UInt16 &a, const UInt16 &b) + { + __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value); + __m128i low = _mm_mullo_epi16(a.m_value, b.m_value); + + UInt32 result; + result.m_values[0] = _mm_unpacklo_epi16(low, high); + result.m_values[1] = _mm_unpackhi_epi16(low, high); + return result; + } + + static UInt16 CompactMultiply(const UInt16 &a, const UInt15 &b) + { + UInt16 result; + result.m_value = _mm_mullo_epi16(a.m_value, b.m_value); + return result; + } + + static UInt16 CompactMultiply(const UInt15 &a, const UInt15 &b) + { + UInt16 result; + result.m_value = _mm_mullo_epi16(a.m_value, b.m_value); + return result; + } + + static SInt16 CompactMultiply(const SInt16 &a, const UInt15 &b) + { + SInt16 result; + result.m_value = _mm_mullo_epi16(a.m_value, b.m_value); + return result; + } + + static SInt16 CompactMultiply(const SInt16 &a, const SInt16 &b) + { + SInt16 result; + result.m_value = _mm_mullo_epi16(a.m_value, b.m_value); + return result; + } + + static UInt31 XMultiply(const UInt15 &a, const UInt15 &b) + { + __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value); + __m128i low = _mm_mullo_epi16(a.m_value, b.m_value); + + UInt31 result; + result.m_values[0] = _mm_unpacklo_epi16(low, high); + result.m_values[1] = _mm_unpackhi_epi16(low, high); + return result; + } + + static UInt31 XMultiply(const UInt16 &a, const UInt15 &b) + { + __m128i high = _mm_mulhi_epu16(a.m_value, b.m_value); + __m128i low = _mm_mullo_epi16(a.m_value, b.m_value); + + UInt31 result; + result.m_values[0] = _mm_unpacklo_epi16(low, high); + result.m_values[1] = _mm_unpackhi_epi16(low, high); + return result; + } + + static UInt31 XMultiply(const UInt15 &a, const UInt16 &b) + { + return XMultiply(b, a); + } + + static bool AnySet(const Int16CompFlag &v) + { + return _mm_movemask_epi8(v.m_value) != 0; + } + + static bool AllSet(const Int16CompFlag &v) + { + return _mm_movemask_epi8(v.m_value) == 0xffff; + } + + static bool AnySet(const FloatCompFlag &v) + { + return _mm_movemask_ps(v.m_values[0]) != 0 || _mm_movemask_ps(v.m_values[1]) != 0; + } + + static bool AllSet(const FloatCompFlag &v) + { + return _mm_movemask_ps(v.m_values[0]) == 0xf && _mm_movemask_ps(v.m_values[1]) == 0xf; + } + }; + +#else + // Scalar version + struct ParallelMath + { + struct RoundTowardZeroForScope + { + }; + + struct RoundTowardNearestForScope + { + }; + + struct RoundUpForScope + { + }; + + struct RoundDownForScope + { + }; + + static const int ParallelSize = 1; + + enum Int16Subtype + { + IntSubtype_Signed, + IntSubtype_UnsignedFull, + IntSubtype_UnsignedTruncated, + IntSubtype_Abstract, + }; + + typedef int32_t SInt16; + typedef int32_t UInt15; + typedef int32_t UInt16; + typedef int32_t AInt16; + + typedef int32_t SInt32; + typedef int32_t UInt31; + typedef int32_t UInt32; + typedef int32_t AInt32; + + typedef int32_t ScalarUInt16; + typedef int32_t ScalarSInt16; + + typedef float Float; + + template<class TTargetType> + struct LosslessCast + { + static const int32_t& Cast(const int32_t &src) + { + return src; + } + }; + + typedef bool Int16CompFlag; + typedef bool FloatCompFlag; + + static int32_t AbstractAdd(const int32_t &a, const int32_t &b) + { + return a + b; + } + + static int32_t AbstractSubtract(const int32_t &a, const int32_t &b) + { + return a - b; + } + + static float Select(bool flag, float a, float b) + { + return flag ? a : b; + } + + static int32_t Select(bool flag, int32_t a, int32_t b) + { + return flag ? a : b; + } + + static int32_t SelectOrZero(bool flag, int32_t a) + { + return flag ? a : 0; + } + + static void ConditionalSet(int32_t& dest, bool flag, int32_t src) + { + if (flag) + dest = src; + } + + static void ConditionalSet(bool& dest, bool flag, bool src) + { + if (flag) + dest = src; + } + + static int32_t ConditionalNegate(bool flag, int32_t v) + { + return (flag) ? -v : v; + } + + static void NotConditionalSet(int32_t& dest, bool flag, int32_t src) + { + if (!flag) + dest = src; + } + + static void ConditionalSet(float& dest, bool flag, float src) + { + if (flag) + dest = src; + } + + static void NotConditionalSet(float& dest, bool flag, float src) + { + if (!flag) + dest = src; + } + + static void MakeSafeDenominator(float& v) + { + if (v == 0.0f) + v = 1.0f; + } + + static int32_t SignedRightShift(int32_t v, int bits) + { + return v >> bits; + } + + static int32_t TruncateToPrecisionSigned(int32_t v, int precision) + { + v = (v << (32 - precision)) & 0xffffffff; + return SignedRightShift(v, 32 - precision); + } + + static int32_t TruncateToPrecisionUnsigned(int32_t v, int precision) + { + return v & ((1 << precision) - 1); + } + + static int32_t Min(int32_t a, int32_t b) + { + if (a < b) + return a; + return b; + } + + static float Min(float a, float b) + { + if (a < b) + return a; + return b; + } + + static int32_t Max(int32_t a, int32_t b) + { + if (a > b) + return a; + return b; + } + + static float Max(float a, float b) + { + if (a > b) + return a; + return b; + } + + static float Abs(float a) + { + return fabsf(a); + } + + static int32_t Abs(int32_t a) + { + if (a < 0) + return -a; + return a; + } + + static float Clamp(float v, float min, float max) + { + if (v < min) + return min; + if (v > max) + return max; + return v; + } + + static float Reciprocal(float v) + { + return 1.0f / v; + } + + static void ConvertLDRInputs(const PixelBlockU8* inputBlocks, int pxOffset, int channel, int32_t& chOut) + { + chOut = inputBlocks[0].m_pixels[pxOffset][channel]; + } + + static void ConvertHDRInputs(const PixelBlockF16* inputBlocks, int pxOffset, int channel, int32_t& chOut) + { + chOut = inputBlocks[0].m_pixels[pxOffset][channel]; + } + + static float MakeFloat(float v) + { + return v; + } + + static float MakeFloatZero() + { + return 0.0f; + } + + static int32_t MakeUInt16(uint16_t v) + { + return v; + } + + static int32_t MakeSInt16(int16_t v) + { + return v; + } + + static int32_t MakeAInt16(int16_t v) + { + return v; + } + + static int32_t MakeUInt15(uint16_t v) + { + return v; + } + + static int32_t MakeSInt32(int32_t v) + { + return v; + } + + static int32_t MakeUInt31(int32_t v) + { + return v; + } + + static int32_t Extract(int32_t v, int offset) + { + UNREFERENCED_PARAMETER(offset); + return v; + } + + static bool Extract(bool v, int offset) + { + UNREFERENCED_PARAMETER(offset); + return v; + } + + static float Extract(float v, int offset) + { + UNREFERENCED_PARAMETER(offset); + return v; + } + + static void PutUInt16(int32_t &dest, int offset, ParallelMath::ScalarUInt16 v) + { + UNREFERENCED_PARAMETER(offset); + dest = v; + } + + static void PutUInt15(int32_t &dest, int offset, ParallelMath::ScalarUInt16 v) + { + UNREFERENCED_PARAMETER(offset); + dest = v; + } + + static void PutSInt16(int32_t &dest, int offset, ParallelMath::ScalarSInt16 v) + { + UNREFERENCED_PARAMETER(offset); + dest = v; + } + + static float ExtractFloat(float v, int offset) + { + UNREFERENCED_PARAMETER(offset); + return v; + } + + static void PutFloat(float &dest, int offset, float v) + { + UNREFERENCED_PARAMETER(offset); + dest = v; + } + + static void PutBoolInt16(bool &dest, int offset, bool v) + { + UNREFERENCED_PARAMETER(offset); + dest = v; + } + + static bool Less(int32_t a, int32_t b) + { + return a < b; + } + + static bool Less(float a, float b) + { + return a < b; + } + + static bool LessOrEqual(int32_t a, int32_t b) + { + return a < b; + } + + static bool LessOrEqual(float a, float b) + { + return a < b; + } + + static bool Equal(int32_t a, int32_t b) + { + return a == b; + } + + static bool Equal(float a, float b) + { + return a == b; + } + + static float ToFloat(int32_t v) + { + return static_cast<float>(v); + } + + static int32_t ToUInt31(int32_t v) + { + return v; + } + + static int32_t ToInt32(int32_t v) + { + return v; + } + + static bool FloatFlagToInt16(bool v) + { + return v; + } + + static bool Int32FlagToInt16(bool v) + { + return v; + } + + static bool Int16FlagToFloat(bool v) + { + return v; + } + + static bool MakeBoolInt16(bool b) + { + return b; + } + + static bool MakeBoolFloat(bool b) + { + return b; + } + + static bool AndNot(bool a, bool b) + { + return a && !b; + } + + static bool Not(bool b) + { + return !b; + } + + static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundTowardZeroForScope *rtz) + { + UNREFERENCED_PARAMETER(rtz); + return static_cast<int>(v); + } + + static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundUpForScope *ru) + { + UNREFERENCED_PARAMETER(ru); + return static_cast<int>(ceilf(v)); + } + + static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundDownForScope *rd) + { + UNREFERENCED_PARAMETER(rd); + return static_cast<int>(floorf(v)); + } + + static int32_t RoundAndConvertToInt(float v, const ParallelMath::RoundTowardNearestForScope *rtn) + { + UNREFERENCED_PARAMETER(rtn); + return static_cast<int>(floorf(v + 0.5f)); + } + + template<class TRoundMode> + static int32_t RoundAndConvertToU16(float v, const TRoundMode *roundingMode) + { + return RoundAndConvertToInt(v, roundingMode); + } + + template<class TRoundMode> + static int32_t RoundAndConvertToU15(float v, const TRoundMode *roundingMode) + { + return RoundAndConvertToInt(v, roundingMode); + } + + template<class TRoundMode> + static int32_t RoundAndConvertToS16(float v, const TRoundMode *roundingMode) + { + return RoundAndConvertToInt(v, roundingMode); + } + + static float Sqrt(float f) + { + return sqrtf(f); + } + + static int32_t SqDiffUInt8(int32_t a, int32_t b) + { + int32_t delta = a - b; + return delta * delta; + } + + static int32_t SqDiffInt16(int32_t a, int32_t b) + { + int32_t delta = a - b; + return delta * delta; + } + + static int32_t SqDiffSInt16(int32_t a, int32_t b) + { + int32_t delta = a - b; + return delta * delta; + } + + static float TwosCLHalfToFloat(int32_t v) + { + int32_t absV = (v < 0) ? -v : v; + + int32_t signBits = (absV & -32768); + int32_t mantissa = (absV & 0x03ff); + int32_t exponent = (absV & 0x7c00); + + bool isDenormal = (exponent == 0); + + // Convert exponent to high-bits + exponent = (exponent >> 3) + 14336; + + int32_t denormalCorrection = (isDenormal ? (signBits | 14336) : 0) << 16; + + int32_t fBits = ((exponent | signBits) << 16) | (mantissa << 13); + + float f, correction; + memcpy(&f, &fBits, 4); + memcpy(&correction, &denormalCorrection, 4); + + return f - correction; + } + + static Float SqDiff2CLFloat(const SInt16 &a, const Float &b) + { + Float fa = TwosCLHalfToFloat(a); + + Float diff = fa - b; + return diff * diff; + } + + static Float SqDiff2CL(const SInt16 &a, const SInt16 &b) + { + Float fa = TwosCLHalfToFloat(a); + Float fb = TwosCLHalfToFloat(b); + + Float diff = fa - fb; + return diff * diff; + } + + static Float SqDiff2CLFloat(const SInt16 &a, float aWeight, const Float &b) + { + Float fa = TwosCLHalfToFloat(a) * aWeight; + + Float diff = fa - b; + return diff * diff; + } + + static int32_t RightShift(int32_t v, int bits) + { + return SignedRightShift(v, bits); + } + + static int32_t ToSInt16(int32_t v) + { + return v; + } + + static int32_t ToUInt16(int32_t v) + { + return v; + } + + static int32_t ToUInt15(int32_t v) + { + return v; + } + + static int32_t XMultiply(int32_t a, int32_t b) + { + return a * b; + } + + static int32_t CompactMultiply(int32_t a, int32_t b) + { + return a * b; + } + + static bool AnySet(bool v) + { + return v; + } + + static bool AllSet(bool v) + { + return v; + } + }; + +#endif +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_S3TC.cpp b/thirdparty/cvtt/ConvectionKernels_S3TC.cpp new file mode 100644 index 0000000000..23f1bd3314 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_S3TC.cpp @@ -0,0 +1,1054 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------------------- + +Portions based on DirectX Texture Library (DirectXTex) + +Copyright (c) Microsoft Corporation. All rights reserved. +Licensed under the MIT License. + +http://go.microsoft.com/fwlink/?LinkId=248926 +*/ +#include "ConvectionKernels_Config.h" + +#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) + +#include "ConvectionKernels_S3TC.h" + +#include "ConvectionKernels_AggregatedError.h" +#include "ConvectionKernels_BCCommon.h" +#include "ConvectionKernels_EndpointRefiner.h" +#include "ConvectionKernels_EndpointSelector.h" +#include "ConvectionKernels_IndexSelector.h" +#include "ConvectionKernels_UnfinishedEndpoints.h" +#include "ConvectionKernels_S3TC_SingleColor.h" + +void cvtt::Internal::S3TCComputer::Init(MFloat& error) +{ + error = ParallelMath::MakeFloat(FLT_MAX); +} + +void cvtt::Internal::S3TCComputer::QuantizeTo6Bits(MUInt15& v) +{ + MUInt15 reduced = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(v, ParallelMath::MakeUInt15(253)) + ParallelMath::MakeUInt16(512), 10)); + v = (reduced << 2) | ParallelMath::RightShift(reduced, 4); +} + +void cvtt::Internal::S3TCComputer::QuantizeTo5Bits(MUInt15& v) +{ + MUInt15 reduced = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(v, ParallelMath::MakeUInt15(249)) + ParallelMath::MakeUInt16(1024), 11)); + v = (reduced << 3) | ParallelMath::RightShift(reduced, 2); +} + +void cvtt::Internal::S3TCComputer::QuantizeTo565(MUInt15 endPoint[3]) +{ + QuantizeTo5Bits(endPoint[0]); + QuantizeTo6Bits(endPoint[1]); + QuantizeTo5Bits(endPoint[2]); +} + +cvtt::ParallelMath::Float cvtt::Internal::S3TCComputer::ParanoidFactorForSpan(const MSInt16& span) +{ + return ParallelMath::Abs(ParallelMath::ToFloat(span)) * 0.03f; +} + +cvtt::ParallelMath::Float cvtt::Internal::S3TCComputer::ParanoidDiff(const MUInt15& a, const MUInt15& b, const MFloat& d) +{ + MFloat absDiff = ParallelMath::Abs(ParallelMath::ToFloat(ParallelMath::LosslessCast<MSInt16>::Cast(a) - ParallelMath::LosslessCast<MSInt16>::Cast(b))); + absDiff = absDiff + d; + return absDiff * absDiff; +} + +void cvtt::Internal::S3TCComputer::TestSingleColor(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], int range, const float* channelWeights, + MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, const ParallelMath::RoundTowardNearestForScope *rtn) +{ + float channelWeightsSq[3]; + + for (int ch = 0; ch < 3; ch++) + channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; + + MUInt15 totals[3] = { ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(0) }; + + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 3; ch++) + totals[ch] = totals[ch] + pixels[px][ch]; + } + + MUInt15 average[3]; + for (int ch = 0; ch < 3; ch++) + average[ch] = ParallelMath::RightShift(totals[ch] + ParallelMath::MakeUInt15(8), 4); + + const Tables::S3TCSC::TableEntry* rbTable = NULL; + const Tables::S3TCSC::TableEntry* gTable = NULL; + if (flags & cvtt::Flags::S3TC_Paranoid) + { + if (range == 4) + { + rbTable = Tables::S3TCSC::g_singleColor5_3_p; + gTable = Tables::S3TCSC::g_singleColor6_3_p; + } + else + { + assert(range == 3); + rbTable = Tables::S3TCSC::g_singleColor5_2_p; + gTable = Tables::S3TCSC::g_singleColor6_2_p; + } + } + else + { + if (range == 4) + { + rbTable = Tables::S3TCSC::g_singleColor5_3; + gTable = Tables::S3TCSC::g_singleColor6_3; + } + else + { + assert(range == 3); + rbTable = Tables::S3TCSC::g_singleColor5_2; + gTable = Tables::S3TCSC::g_singleColor6_2; + } + } + + MUInt15 interpolated[3]; + MUInt15 eps[2][3]; + MSInt16 spans[3]; + for (int i = 0; i < ParallelMath::ParallelSize; i++) + { + for (int ch = 0; ch < 3; ch++) + { + uint16_t avg = ParallelMath::Extract(average[ch], i); + const Tables::S3TCSC::TableEntry& tableEntry = ((ch == 1) ? gTable[avg] : rbTable[avg]); + ParallelMath::PutUInt15(eps[0][ch], i, tableEntry.m_min); + ParallelMath::PutUInt15(eps[1][ch], i, tableEntry.m_max); + ParallelMath::PutUInt15(interpolated[ch], i, tableEntry.m_actualColor); + ParallelMath::PutSInt16(spans[ch], i, tableEntry.m_span); + } + } + + MFloat error = ParallelMath::MakeFloatZero(); + if (flags & cvtt::Flags::S3TC_Paranoid) + { + MFloat spanParanoidFactors[3]; + for (int ch = 0; ch < 3; ch++) + spanParanoidFactors[ch] = ParanoidFactorForSpan(spans[ch]); + + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 3; ch++) + error = error + ParanoidDiff(interpolated[ch], pixels[px][ch], spanParanoidFactors[ch]) * channelWeightsSq[ch]; + } + } + else + { + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 3; ch++) + error = error + ParallelMath::ToFloat(ParallelMath::SqDiffUInt8(interpolated[ch], pixels[px][ch])) * channelWeightsSq[ch]; + } + } + + ParallelMath::FloatCompFlag better = ParallelMath::Less(error, bestError); + ParallelMath::Int16CompFlag better16 = ParallelMath::FloatFlagToInt16(better); + + if (ParallelMath::AnySet(better16)) + { + bestError = ParallelMath::Min(bestError, error); + for (int epi = 0; epi < 2; epi++) + for (int ch = 0; ch < 3; ch++) + ParallelMath::ConditionalSet(bestEndpoints[epi][ch], better16, eps[epi][ch]); + + MUInt15 vindexes = ParallelMath::MakeUInt15(1); + for (int px = 0; px < 16; px++) + ParallelMath::ConditionalSet(bestIndexes[px], better16, vindexes); + + ParallelMath::ConditionalSet(bestRange, better16, ParallelMath::MakeUInt15(range)); + } +} + +void cvtt::Internal::S3TCComputer::TestEndpoints(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], const MUInt15 unquantizedEndPoints[2][3], int range, const float* channelWeights, + MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, EndpointRefiner<3> *refiner, const ParallelMath::RoundTowardNearestForScope *rtn) +{ + float channelWeightsSq[3]; + + for (int ch = 0; ch < 3; ch++) + channelWeightsSq[ch] = channelWeights[ch] * channelWeights[ch]; + + MUInt15 endPoints[2][3]; + + for (int ep = 0; ep < 2; ep++) + for (int ch = 0; ch < 3; ch++) + endPoints[ep][ch] = unquantizedEndPoints[ep][ch]; + + QuantizeTo565(endPoints[0]); + QuantizeTo565(endPoints[1]); + + IndexSelector<3> selector; + selector.Init<false>(channelWeights, endPoints, range); + + MUInt15 indexes[16]; + + MFloat paranoidFactors[3]; + for (int ch = 0; ch < 3; ch++) + paranoidFactors[ch] = ParanoidFactorForSpan(ParallelMath::LosslessCast<MSInt16>::Cast(endPoints[0][ch]) - ParallelMath::LosslessCast<MSInt16>::Cast(endPoints[1][ch])); + + MFloat error = ParallelMath::MakeFloatZero(); + AggregatedError<3> aggError; + for (int px = 0; px < 16; px++) + { + MUInt15 index = selector.SelectIndexLDR(floatPixels[px], rtn); + indexes[px] = index; + + if (refiner) + refiner->ContributeUnweightedPW(preWeightedPixels[px], index); + + MUInt15 reconstructed[3]; + selector.ReconstructLDRPrecise(index, reconstructed); + + if (flags & Flags::S3TC_Paranoid) + { + for (int ch = 0; ch < 3; ch++) + error = error + ParanoidDiff(reconstructed[ch], pixels[px][ch], paranoidFactors[ch]) * channelWeightsSq[ch]; + } + else + BCCommon::ComputeErrorLDR<3>(flags, reconstructed, pixels[px], aggError); + } + + if (!(flags & Flags::S3TC_Paranoid)) + error = aggError.Finalize(flags, channelWeightsSq); + + ParallelMath::FloatCompFlag better = ParallelMath::Less(error, bestError); + + if (ParallelMath::AnySet(better)) + { + ParallelMath::Int16CompFlag betterInt16 = ParallelMath::FloatFlagToInt16(better); + + ParallelMath::ConditionalSet(bestError, better, error); + + for (int ep = 0; ep < 2; ep++) + for (int ch = 0; ch < 3; ch++) + ParallelMath::ConditionalSet(bestEndpoints[ep][ch], betterInt16, endPoints[ep][ch]); + + for (int px = 0; px < 16; px++) + ParallelMath::ConditionalSet(bestIndexes[px], betterInt16, indexes[px]); + + ParallelMath::ConditionalSet(bestRange, betterInt16, ParallelMath::MakeUInt15(static_cast<uint16_t>(range))); + } +} + +void cvtt::Internal::S3TCComputer::TestCounts(uint32_t flags, const int *counts, int nCounts, const MUInt15 &numElements, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], bool alphaTest, + const MFloat floatSortedInputs[16][4], const MFloat preWeightedFloatSortedInputs[16][4], const float *channelWeights, MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, + const ParallelMath::RoundTowardNearestForScope* rtn) +{ + UNREFERENCED_PARAMETER(alphaTest); + UNREFERENCED_PARAMETER(flags); + + EndpointRefiner<3> refiner; + + refiner.Init(nCounts, channelWeights); + + bool escape = false; + int e = 0; + for (int i = 0; i < nCounts; i++) + { + for (int n = 0; n < counts[i]; n++) + { + ParallelMath::Int16CompFlag valid = ParallelMath::Less(ParallelMath::MakeUInt15(static_cast<uint16_t>(n)), numElements); + if (!ParallelMath::AnySet(valid)) + { + escape = true; + break; + } + + if (ParallelMath::AllSet(valid)) + refiner.ContributeUnweightedPW(preWeightedFloatSortedInputs[e++], ParallelMath::MakeUInt15(static_cast<uint16_t>(i))); + else + { + MFloat weight = ParallelMath::Select(ParallelMath::Int16FlagToFloat(valid), ParallelMath::MakeFloat(1.0f), ParallelMath::MakeFloat(0.0f)); + refiner.ContributePW(preWeightedFloatSortedInputs[e++], ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), weight); + } + } + + if (escape) + break; + } + + MUInt15 endPoints[2][3]; + refiner.GetRefinedEndpointsLDR(endPoints, rtn); + + TestEndpoints(flags, pixels, floatPixels, preWeightedPixels, endPoints, nCounts, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, NULL, rtn); +} + +void cvtt::Internal::S3TCComputer::PackExplicitAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride) +{ + UNREFERENCED_PARAMETER(flags); + ParallelMath::RoundTowardNearestForScope rtn; + + float weights[1] = { 1.0f }; + + MUInt15 pixels[16]; + MFloat floatPixels[16]; + + for (int px = 0; px < 16; px++) + { + ParallelMath::ConvertLDRInputs(inputs, px, inputChannel, pixels[px]); + floatPixels[px] = ParallelMath::ToFloat(pixels[px]); + } + + MUInt15 ep[2][1] = { { ParallelMath::MakeUInt15(0) },{ ParallelMath::MakeUInt15(255) } }; + + IndexSelector<1> selector; + selector.Init<false>(weights, ep, 16); + + MUInt15 indexes[16]; + + for (int px = 0; px < 16; px++) + indexes[px] = selector.SelectIndexLDR(&floatPixels[px], &rtn); + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + for (int px = 0; px < 16; px += 2) + { + int index0 = ParallelMath::Extract(indexes[px], block); + int index1 = ParallelMath::Extract(indexes[px + 1], block); + + packedBlocks[px / 2] = static_cast<uint8_t>(index0 | (index1 << 4)); + } + + packedBlocks += packedBlockStride; + } +} + +void cvtt::Internal::S3TCComputer::PackInterpolatedAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride, bool isSigned, int maxTweakRounds, int numRefineRounds) +{ + if (maxTweakRounds < 1) + maxTweakRounds = 1; + + if (numRefineRounds < 1) + numRefineRounds = 1; + + ParallelMath::RoundTowardNearestForScope rtn; + + float oneWeight[1] = { 1.0f }; + + MUInt15 pixels[16]; + MFloat floatPixels[16]; + + MUInt15 highTerminal = isSigned ? ParallelMath::MakeUInt15(254) : ParallelMath::MakeUInt15(255); + MUInt15 highTerminalMinusOne = highTerminal - ParallelMath::MakeUInt15(1); + + for (int px = 0; px < 16; px++) + { + ParallelMath::ConvertLDRInputs(inputs, px, inputChannel, pixels[px]); + + if (isSigned) + pixels[px] = ParallelMath::Min(pixels[px], highTerminal); + + floatPixels[px] = ParallelMath::ToFloat(pixels[px]); + } + + MUInt15 sortedPixels[16]; + for (int px = 0; px < 16; px++) + sortedPixels[px] = pixels[px]; + + for (int sortEnd = 15; sortEnd > 0; sortEnd--) + { + for (int sortOffset = 0; sortOffset < sortEnd; sortOffset++) + { + MUInt15 a = sortedPixels[sortOffset]; + MUInt15 b = sortedPixels[sortOffset + 1]; + + sortedPixels[sortOffset] = ParallelMath::Min(a, b); + sortedPixels[sortOffset + 1] = ParallelMath::Max(a, b); + } + } + + MUInt15 zero = ParallelMath::MakeUInt15(0); + MUInt15 one = ParallelMath::MakeUInt15(1); + + MUInt15 bestIsFullRange = zero; + MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); + MUInt15 bestEP[2] = { zero, zero }; + MUInt15 bestIndexes[16] = { + zero, zero, zero, zero, + zero, zero, zero, zero, + zero, zero, zero, zero, + zero, zero, zero, zero + }; + + // Full-precision + { + MUInt15 minEP = sortedPixels[0]; + MUInt15 maxEP = sortedPixels[15]; + + MFloat base[1] = { ParallelMath::ToFloat(minEP) }; + MFloat offset[1] = { ParallelMath::ToFloat(maxEP - minEP) }; + + UnfinishedEndpoints<1> ufep = UnfinishedEndpoints<1>(base, offset); + + int numTweakRounds = BCCommon::TweakRoundsForRange(8); + if (numTweakRounds > maxTweakRounds) + numTweakRounds = maxTweakRounds; + + for (int tweak = 0; tweak < numTweakRounds; tweak++) + { + MUInt15 ep[2][1]; + + ufep.FinishLDR(tweak, 8, ep[0], ep[1]); + + for (int refinePass = 0; refinePass < numRefineRounds; refinePass++) + { + EndpointRefiner<1> refiner; + refiner.Init(8, oneWeight); + + if (isSigned) + for (int epi = 0; epi < 2; epi++) + ep[epi][0] = ParallelMath::Min(ep[epi][0], highTerminal); + + IndexSelector<1> indexSelector; + indexSelector.Init<false>(oneWeight, ep, 8); + + MUInt15 indexes[16]; + + AggregatedError<1> aggError; + for (int px = 0; px < 16; px++) + { + MUInt15 index = indexSelector.SelectIndexLDR(&floatPixels[px], &rtn); + + MUInt15 reconstructedPixel; + + indexSelector.ReconstructLDRPrecise(index, &reconstructedPixel); + BCCommon::ComputeErrorLDR<1>(flags, &reconstructedPixel, &pixels[px], aggError); + + if (refinePass != numRefineRounds - 1) + refiner.ContributeUnweightedPW(&floatPixels[px], index); + + indexes[px] = index; + } + MFloat error = aggError.Finalize(flags | Flags::Uniform, oneWeight); + + ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError); + ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); + + if (ParallelMath::AnySet(errorBetter16)) + { + bestError = ParallelMath::Min(error, bestError); + ParallelMath::ConditionalSet(bestIsFullRange, errorBetter16, one); + for (int px = 0; px < 16; px++) + ParallelMath::ConditionalSet(bestIndexes[px], errorBetter16, indexes[px]); + + for (int epi = 0; epi < 2; epi++) + ParallelMath::ConditionalSet(bestEP[epi], errorBetter16, ep[epi][0]); + } + + if (refinePass != numRefineRounds - 1) + refiner.GetRefinedEndpointsLDR(ep, &rtn); + } + } + } + + // Reduced precision with special endpoints + { + MUInt15 bestHeuristicMin = sortedPixels[0]; + MUInt15 bestHeuristicMax = sortedPixels[15]; + + ParallelMath::Int16CompFlag canTryClipping; + + // In reduced precision, we want try putting endpoints at the reserved indexes at the ends. + // The heuristic we use is to assign indexes to the end as long as they aren't off by more than half of the index range. + // This will usually not find anything, but it's cheap to check. + + { + MUInt15 largestPossibleRange = bestHeuristicMax - bestHeuristicMin; // Max: 255 + MUInt15 lowestPossibleClearance = ParallelMath::Min(bestHeuristicMin, static_cast<MUInt15>(highTerminal - bestHeuristicMax)); + + MUInt15 lowestPossibleClearanceTimes10 = (lowestPossibleClearance << 2) + (lowestPossibleClearance << 4); + canTryClipping = ParallelMath::LessOrEqual(lowestPossibleClearanceTimes10, largestPossibleRange); + } + + if (ParallelMath::AnySet(canTryClipping)) + { + MUInt15 lowClearances[16]; + MUInt15 highClearances[16]; + MUInt15 bestSkipCount = ParallelMath::MakeUInt15(0); + + lowClearances[0] = highClearances[0] = ParallelMath::MakeUInt15(0); + + for (int px = 1; px < 16; px++) + { + lowClearances[px] = sortedPixels[px - 1]; + highClearances[px] = highTerminal - sortedPixels[16 - px]; + } + + for (uint16_t firstIndex = 0; firstIndex < 16; firstIndex++) + { + uint16_t numSkippedLow = firstIndex; + + MUInt15 lowClearance = lowClearances[firstIndex]; + + for (uint16_t lastIndex = firstIndex; lastIndex < 16; lastIndex++) + { + uint16_t numSkippedHigh = 15 - lastIndex; + uint16_t numSkipped = numSkippedLow + numSkippedHigh; + + MUInt15 numSkippedV = ParallelMath::MakeUInt15(numSkipped); + + ParallelMath::Int16CompFlag areMoreSkipped = ParallelMath::Less(bestSkipCount, numSkippedV); + + if (!ParallelMath::AnySet(areMoreSkipped)) + continue; + + MUInt15 clearance = ParallelMath::Max(highClearances[numSkippedHigh], lowClearance); + MUInt15 clearanceTimes10 = (clearance << 2) + (clearance << 4); + + MUInt15 range = sortedPixels[lastIndex] - sortedPixels[firstIndex]; + + ParallelMath::Int16CompFlag isBetter = (areMoreSkipped & ParallelMath::LessOrEqual(clearanceTimes10, range)); + ParallelMath::ConditionalSet(bestHeuristicMin, isBetter, sortedPixels[firstIndex]); + ParallelMath::ConditionalSet(bestHeuristicMax, isBetter, sortedPixels[lastIndex]); + } + } + } + + MUInt15 bestSimpleMin = one; + MUInt15 bestSimpleMax = highTerminalMinusOne; + + for (int px = 0; px < 16; px++) + { + ParallelMath::ConditionalSet(bestSimpleMin, ParallelMath::Less(zero, sortedPixels[15 - px]), sortedPixels[15 - px]); + ParallelMath::ConditionalSet(bestSimpleMax, ParallelMath::Less(sortedPixels[px], highTerminal), sortedPixels[px]); + } + + MUInt15 minEPs[2] = { bestSimpleMin, bestHeuristicMin }; + MUInt15 maxEPs[2] = { bestSimpleMax, bestHeuristicMax }; + + int minEPRange = 2; + if (ParallelMath::AllSet(ParallelMath::Equal(minEPs[0], minEPs[1]))) + minEPRange = 1; + + int maxEPRange = 2; + if (ParallelMath::AllSet(ParallelMath::Equal(maxEPs[0], maxEPs[1]))) + maxEPRange = 1; + + for (int minEPIndex = 0; minEPIndex < minEPRange; minEPIndex++) + { + for (int maxEPIndex = 0; maxEPIndex < maxEPRange; maxEPIndex++) + { + MFloat base[1] = { ParallelMath::ToFloat(minEPs[minEPIndex]) }; + MFloat offset[1] = { ParallelMath::ToFloat(maxEPs[maxEPIndex] - minEPs[minEPIndex]) }; + + UnfinishedEndpoints<1> ufep = UnfinishedEndpoints<1>(base, offset); + + int numTweakRounds = BCCommon::TweakRoundsForRange(6); + if (numTweakRounds > maxTweakRounds) + numTweakRounds = maxTweakRounds; + + for (int tweak = 0; tweak < numTweakRounds; tweak++) + { + MUInt15 ep[2][1]; + + ufep.FinishLDR(tweak, 8, ep[0], ep[1]); + + for (int refinePass = 0; refinePass < numRefineRounds; refinePass++) + { + EndpointRefiner<1> refiner; + refiner.Init(6, oneWeight); + + if (isSigned) + for (int epi = 0; epi < 2; epi++) + ep[epi][0] = ParallelMath::Min(ep[epi][0], highTerminal); + + IndexSelector<1> indexSelector; + indexSelector.Init<false>(oneWeight, ep, 6); + + MUInt15 indexes[16]; + MFloat error = ParallelMath::MakeFloatZero(); + + for (int px = 0; px < 16; px++) + { + MUInt15 selectedIndex = indexSelector.SelectIndexLDR(&floatPixels[px], &rtn); + + MUInt15 reconstructedPixel; + + indexSelector.ReconstructLDRPrecise(selectedIndex, &reconstructedPixel); + + MFloat zeroError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &zero, &pixels[px], 1, oneWeight); + MFloat highTerminalError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &highTerminal, &pixels[px], 1, oneWeight); + MFloat selectedIndexError = BCCommon::ComputeErrorLDRSimple<1>(flags | Flags::Uniform, &reconstructedPixel, &pixels[px], 1, oneWeight); + + MFloat bestPixelError = zeroError; + MUInt15 index = ParallelMath::MakeUInt15(6); + + ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(ParallelMath::Less(highTerminalError, bestPixelError)), ParallelMath::MakeUInt15(7)); + bestPixelError = ParallelMath::Min(bestPixelError, highTerminalError); + + ParallelMath::FloatCompFlag selectedIndexBetter = ParallelMath::Less(selectedIndexError, bestPixelError); + + if (ParallelMath::AllSet(selectedIndexBetter)) + { + if (refinePass != numRefineRounds - 1) + refiner.ContributeUnweightedPW(&floatPixels[px], selectedIndex); + } + else + { + MFloat refineWeight = ParallelMath::Select(selectedIndexBetter, ParallelMath::MakeFloat(1.0f), ParallelMath::MakeFloatZero()); + + if (refinePass != numRefineRounds - 1) + refiner.ContributePW(&floatPixels[px], selectedIndex, refineWeight); + } + + ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(selectedIndexBetter), selectedIndex); + bestPixelError = ParallelMath::Min(bestPixelError, selectedIndexError); + + error = error + bestPixelError; + + indexes[px] = index; + } + + ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError); + ParallelMath::Int16CompFlag errorBetter16 = ParallelMath::FloatFlagToInt16(errorBetter); + + if (ParallelMath::AnySet(errorBetter16)) + { + bestError = ParallelMath::Min(error, bestError); + ParallelMath::ConditionalSet(bestIsFullRange, errorBetter16, zero); + for (int px = 0; px < 16; px++) + ParallelMath::ConditionalSet(bestIndexes[px], errorBetter16, indexes[px]); + + for (int epi = 0; epi < 2; epi++) + ParallelMath::ConditionalSet(bestEP[epi], errorBetter16, ep[epi][0]); + } + + if (refinePass != numRefineRounds - 1) + refiner.GetRefinedEndpointsLDR(ep, &rtn); + } + } + } + } + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + int ep0 = ParallelMath::Extract(bestEP[0], block); + int ep1 = ParallelMath::Extract(bestEP[1], block); + int isFullRange = ParallelMath::Extract(bestIsFullRange, block); + + if (isSigned) + { + ep0 -= 127; + ep1 -= 127; + + assert(ep0 >= -127 && ep0 <= 127); + assert(ep1 >= -127 && ep1 <= 127); + } + + + bool swapEndpoints = (isFullRange != 0) != (ep0 > ep1); + + if (swapEndpoints) + std::swap(ep0, ep1); + + uint16_t dumpBits = 0; + int dumpBitsOffset = 0; + int dumpByteOffset = 2; + packedBlocks[0] = static_cast<uint8_t>(ep0 & 0xff); + packedBlocks[1] = static_cast<uint8_t>(ep1 & 0xff); + + int maxValue = (isFullRange != 0) ? 7 : 5; + + for (int px = 0; px < 16; px++) + { + int index = ParallelMath::Extract(bestIndexes[px], block); + + if (swapEndpoints && index <= maxValue) + index = maxValue - index; + + if (index != 0) + { + if (index == maxValue) + index = 1; + else if (index < maxValue) + index++; + } + + assert(index >= 0 && index < 8); + + dumpBits |= static_cast<uint16_t>(index << dumpBitsOffset); + dumpBitsOffset += 3; + + if (dumpBitsOffset >= 8) + { + assert(dumpByteOffset < 8); + packedBlocks[dumpByteOffset] = static_cast<uint8_t>(dumpBits & 0xff); + dumpBits >>= 8; + dumpBitsOffset -= 8; + dumpByteOffset++; + } + } + + assert(dumpBitsOffset == 0); + assert(dumpByteOffset == 8); + + packedBlocks += packedBlockStride; + } +} + +void cvtt::Internal::S3TCComputer::PackRGB(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, size_t packedBlockStride, const float channelWeights[4], bool alphaTest, float alphaThreshold, bool exhaustive, int maxTweakRounds, int numRefineRounds) +{ + ParallelMath::RoundTowardNearestForScope rtn; + + if (numRefineRounds < 1) + numRefineRounds = 1; + + if (maxTweakRounds < 1) + maxTweakRounds = 1; + + EndpointSelector<3, 8> endpointSelector; + + MUInt15 pixels[16][4]; + MFloat floatPixels[16][4]; + + MFloat preWeightedPixels[16][4]; + + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 4; ch++) + ParallelMath::ConvertLDRInputs(inputs, px, ch, pixels[px][ch]); + } + + for (int px = 0; px < 16; px++) + { + for (int ch = 0; ch < 4; ch++) + floatPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]); + } + + if (alphaTest) + { + MUInt15 threshold = ParallelMath::MakeUInt15(static_cast<uint16_t>(floor(alphaThreshold * 255.0f + 0.5f))); + + for (int px = 0; px < 16; px++) + { + ParallelMath::Int16CompFlag belowThreshold = ParallelMath::Less(pixels[px][3], threshold); + pixels[px][3] = ParallelMath::Select(belowThreshold, ParallelMath::MakeUInt15(0), ParallelMath::MakeUInt15(255)); + } + } + + BCCommon::PreWeightPixelsLDR<4>(preWeightedPixels, pixels, channelWeights); + + MUInt15 minAlpha = ParallelMath::MakeUInt15(255); + + for (int px = 0; px < 16; px++) + minAlpha = ParallelMath::Min(minAlpha, pixels[px][3]); + + MFloat pixelWeights[16]; + for (int px = 0; px < 16; px++) + { + pixelWeights[px] = ParallelMath::MakeFloat(1.0f); + if (alphaTest) + { + ParallelMath::Int16CompFlag isTransparent = ParallelMath::Less(pixels[px][3], ParallelMath::MakeUInt15(255)); + + ParallelMath::ConditionalSet(pixelWeights[px], ParallelMath::Int16FlagToFloat(isTransparent), ParallelMath::MakeFloatZero()); + } + } + + for (int pass = 0; pass < NumEndpointSelectorPasses; pass++) + { + for (int px = 0; px < 16; px++) + endpointSelector.ContributePass(preWeightedPixels[px], pass, pixelWeights[px]); + + endpointSelector.FinishPass(pass); + } + + UnfinishedEndpoints<3> ufep = endpointSelector.GetEndpoints(channelWeights); + + MUInt15 bestEndpoints[2][3]; + MUInt15 bestIndexes[16]; + MUInt15 bestRange = ParallelMath::MakeUInt15(0); + MFloat bestError = ParallelMath::MakeFloat(FLT_MAX); + + for (int px = 0; px < 16; px++) + bestIndexes[px] = ParallelMath::MakeUInt15(0); + + for (int ep = 0; ep < 2; ep++) + for (int ch = 0; ch < 3; ch++) + bestEndpoints[ep][ch] = ParallelMath::MakeUInt15(0); + + if (exhaustive) + { + MSInt16 sortBins[16]; + + { + // Compute an 11-bit index, change it to signed, stuff it in the high bits of the sort bins, + // and pack the original indexes into the low bits. + + MUInt15 sortEP[2][3]; + ufep.FinishLDR(0, 11, sortEP[0], sortEP[1]); + + IndexSelector<3> sortSelector; + sortSelector.Init<false>(channelWeights, sortEP, 1 << 11); + + for (int16_t px = 0; px < 16; px++) + { + MSInt16 sortBin = ParallelMath::LosslessCast<MSInt16>::Cast(sortSelector.SelectIndexLDR(floatPixels[px], &rtn) << 4); + + if (alphaTest) + { + ParallelMath::Int16CompFlag isTransparent = ParallelMath::Less(pixels[px][3], ParallelMath::MakeUInt15(255)); + + ParallelMath::ConditionalSet(sortBin, isTransparent, ParallelMath::MakeSInt16(-16)); // 0xfff0 + } + + sortBin = sortBin + ParallelMath::MakeSInt16(px); + + sortBins[px] = sortBin; + } + } + + // Sort bins + for (int sortEnd = 1; sortEnd < 16; sortEnd++) + { + for (int sortLoc = sortEnd; sortLoc > 0; sortLoc--) + { + MSInt16 a = sortBins[sortLoc]; + MSInt16 b = sortBins[sortLoc - 1]; + + sortBins[sortLoc] = ParallelMath::Max(a, b); + sortBins[sortLoc - 1] = ParallelMath::Min(a, b); + } + } + + MUInt15 firstElement = ParallelMath::MakeUInt15(0); + for (uint16_t e = 0; e < 16; e++) + { + ParallelMath::Int16CompFlag isInvalid = ParallelMath::Less(sortBins[e], ParallelMath::MakeSInt16(0)); + ParallelMath::ConditionalSet(firstElement, isInvalid, ParallelMath::MakeUInt15(e + 1)); + if (!ParallelMath::AnySet(isInvalid)) + break; + } + + MUInt15 numElements = ParallelMath::MakeUInt15(16) - firstElement; + + MUInt15 sortedInputs[16][4]; + MFloat floatSortedInputs[16][4]; + MFloat pwFloatSortedInputs[16][4]; + + for (int e = 0; e < 16; e++) + { + for (int ch = 0; ch < 4; ch++) + sortedInputs[e][ch] = ParallelMath::MakeUInt15(0); + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + for (int e = ParallelMath::Extract(firstElement, block); e < 16; e++) + { + ParallelMath::ScalarUInt16 sortBin = ParallelMath::Extract(sortBins[e], block); + int originalIndex = (sortBin & 15); + + for (int ch = 0; ch < 4; ch++) + ParallelMath::PutUInt15(sortedInputs[15 - e][ch], block, ParallelMath::Extract(pixels[originalIndex][ch], block)); + } + } + + for (int e = 0; e < 16; e++) + { + for (int ch = 0; ch < 4; ch++) + { + MFloat f = ParallelMath::ToFloat(sortedInputs[e][ch]); + floatSortedInputs[e][ch] = f; + pwFloatSortedInputs[e][ch] = f * channelWeights[ch]; + } + } + + for (int n0 = 0; n0 <= 15; n0++) + { + int remainingFor1 = 16 - n0; + if (remainingFor1 == 16) + remainingFor1 = 15; + + for (int n1 = 0; n1 <= remainingFor1; n1++) + { + int remainingFor2 = 16 - n1 - n0; + if (remainingFor2 == 16) + remainingFor2 = 15; + + for (int n2 = 0; n2 <= remainingFor2; n2++) + { + int n3 = 16 - n2 - n1 - n0; + + if (n3 == 16) + continue; + + int counts[4] = { n0, n1, n2, n3 }; + + TestCounts(flags, counts, 4, numElements, pixels, floatPixels, preWeightedPixels, alphaTest, floatSortedInputs, pwFloatSortedInputs, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn); + } + } + } + + TestSingleColor(flags, pixels, floatPixels, 4, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn); + + if (alphaTest) + { + for (int n0 = 0; n0 <= 15; n0++) + { + int remainingFor1 = 16 - n0; + if (remainingFor1 == 16) + remainingFor1 = 15; + + for (int n1 = 0; n1 <= remainingFor1; n1++) + { + int n2 = 16 - n1 - n0; + + if (n2 == 16) + continue; + + int counts[3] = { n0, n1, n2 }; + + TestCounts(flags, counts, 3, numElements, pixels, floatPixels, preWeightedPixels, alphaTest, floatSortedInputs, pwFloatSortedInputs, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn); + } + } + + TestSingleColor(flags, pixels, floatPixels, 3, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &rtn); + } + } + else + { + int minRange = alphaTest ? 3 : 4; + + for (int range = minRange; range <= 4; range++) + { + int tweakRounds = BCCommon::TweakRoundsForRange(range); + if (tweakRounds > maxTweakRounds) + tweakRounds = maxTweakRounds; + + for (int tweak = 0; tweak < tweakRounds; tweak++) + { + MUInt15 endPoints[2][3]; + + ufep.FinishLDR(tweak, range, endPoints[0], endPoints[1]); + + for (int refine = 0; refine < numRefineRounds; refine++) + { + EndpointRefiner<3> refiner; + refiner.Init(range, channelWeights); + + TestEndpoints(flags, pixels, floatPixels, preWeightedPixels, endPoints, range, channelWeights, bestError, bestEndpoints, bestIndexes, bestRange, &refiner, &rtn); + + if (refine != numRefineRounds - 1) + refiner.GetRefinedEndpointsLDR(endPoints, &rtn); + } + } + } + } + + for (int block = 0; block < ParallelMath::ParallelSize; block++) + { + ParallelMath::ScalarUInt16 range = ParallelMath::Extract(bestRange, block); + assert(range == 3 || range == 4); + + ParallelMath::ScalarUInt16 compressedEP[2]; + for (int ep = 0; ep < 2; ep++) + { + ParallelMath::ScalarUInt16 endPoint[3]; + for (int ch = 0; ch < 3; ch++) + endPoint[ch] = ParallelMath::Extract(bestEndpoints[ep][ch], block); + + int compressed = (endPoint[0] & 0xf8) << 8; + compressed |= (endPoint[1] & 0xfc) << 3; + compressed |= (endPoint[2] & 0xf8) >> 3; + + compressedEP[ep] = static_cast<ParallelMath::ScalarUInt16>(compressed); + } + + int indexOrder[4]; + + if (range == 4) + { + if (compressedEP[0] == compressedEP[1]) + { + indexOrder[0] = 0; + indexOrder[1] = 0; + indexOrder[2] = 0; + indexOrder[3] = 0; + } + else if (compressedEP[0] < compressedEP[1]) + { + std::swap(compressedEP[0], compressedEP[1]); + indexOrder[0] = 1; + indexOrder[1] = 3; + indexOrder[2] = 2; + indexOrder[3] = 0; + } + else + { + indexOrder[0] = 0; + indexOrder[1] = 2; + indexOrder[2] = 3; + indexOrder[3] = 1; + } + } + else + { + assert(range == 3); + + if (compressedEP[0] > compressedEP[1]) + { + std::swap(compressedEP[0], compressedEP[1]); + indexOrder[0] = 1; + indexOrder[1] = 2; + indexOrder[2] = 0; + } + else + { + indexOrder[0] = 0; + indexOrder[1] = 2; + indexOrder[2] = 1; + } + indexOrder[3] = 3; + } + + packedBlocks[0] = static_cast<uint8_t>(compressedEP[0] & 0xff); + packedBlocks[1] = static_cast<uint8_t>((compressedEP[0] >> 8) & 0xff); + packedBlocks[2] = static_cast<uint8_t>(compressedEP[1] & 0xff); + packedBlocks[3] = static_cast<uint8_t>((compressedEP[1] >> 8) & 0xff); + + for (int i = 0; i < 16; i += 4) + { + int packedIndexes = 0; + for (int subi = 0; subi < 4; subi++) + { + ParallelMath::ScalarUInt16 index = ParallelMath::Extract(bestIndexes[i + subi], block); + packedIndexes |= (indexOrder[index] << (subi * 2)); + } + + packedBlocks[4 + i / 4] = static_cast<uint8_t>(packedIndexes); + } + + packedBlocks += packedBlockStride; + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_S3TC.h b/thirdparty/cvtt/ConvectionKernels_S3TC.h new file mode 100644 index 0000000000..aa197229c2 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_S3TC.h @@ -0,0 +1,51 @@ +#pragma once +#ifndef __CVTT_S3TC_H__ +#define __CVTT_S3TC_H__ + +#include "ConvectionKernels_ParallelMath.h" + +namespace cvtt +{ + namespace Internal + { + template<int TVectorSize> + class EndpointRefiner; + } + + struct PixelBlockU8; +} + +namespace cvtt +{ + namespace Internal + { + class S3TCComputer + { + public: + typedef ParallelMath::Float MFloat; + typedef ParallelMath::SInt16 MSInt16; + typedef ParallelMath::UInt15 MUInt15; + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::SInt32 MSInt32; + + static void Init(MFloat& error); + static void QuantizeTo6Bits(MUInt15& v); + static void QuantizeTo5Bits(MUInt15& v); + static void QuantizeTo565(MUInt15 endPoint[3]); + static MFloat ParanoidFactorForSpan(const MSInt16& span); + static MFloat ParanoidDiff(const MUInt15& a, const MUInt15& b, const MFloat& d); + static void TestSingleColor(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], int range, const float* channelWeights, + MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, const ParallelMath::RoundTowardNearestForScope *rtn); + static void TestEndpoints(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], const MUInt15 unquantizedEndPoints[2][3], int range, const float* channelWeights, + MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, EndpointRefiner<3> *refiner, const ParallelMath::RoundTowardNearestForScope *rtn); + static void TestCounts(uint32_t flags, const int *counts, int nCounts, const MUInt15 &numElements, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], bool alphaTest, + const MFloat floatSortedInputs[16][4], const MFloat preWeightedFloatSortedInputs[16][4], const float *channelWeights, MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, + const ParallelMath::RoundTowardNearestForScope* rtn); + static void PackExplicitAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride); + static void PackInterpolatedAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride, bool isSigned, int maxTweakRounds, int numRefineRounds); + static void PackRGB(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, size_t packedBlockStride, const float channelWeights[4], bool alphaTest, float alphaThreshold, bool exhaustive, int maxTweakRounds, int numRefineRounds); + }; + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_S3TC_SingleColor.h b/thirdparty/cvtt/ConvectionKernels_S3TC_SingleColor.h new file mode 100644 index 0000000000..c772b163c2 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_S3TC_SingleColor.h @@ -0,0 +1,304 @@ +#pragma once +#include <stdint.h> + +// This file is generated by the MakeTables app. Do not edit this file manually. + +namespace cvtt { namespace Tables { namespace S3TCSC { + +struct TableEntry +{ + uint8_t m_min; + uint8_t m_max; + uint8_t m_actualColor; + uint8_t m_span; +}; + +TableEntry g_singleColor5_3[256] = +{ + { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 2, 8 }, { 0, 8, 2, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 8, 8, 0 }, + { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 10, 8 }, { 0, 33, 11, 33 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 16, 16, 0 }, + { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 18, 8 }, { 8, 41, 19, 33 }, { 24, 16, 21, 8 }, { 24, 16, 21, 8 }, { 33, 0, 22, 33 }, { 24, 24, 24, 0 }, + { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 41, 29, 17 }, { 33, 24, 30, 9 }, { 33, 24, 30, 9 }, + { 24, 49, 32, 25 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 33, 41, 35, 8 }, { 33, 41, 35, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 }, + { 49, 24, 40, 25 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 43, 8 }, { 33, 66, 44, 33 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 }, + { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 51, 8 }, { 41, 74, 52, 33 }, { 57, 49, 54, 8 }, { 57, 49, 54, 8 }, { 66, 33, 55, 33 }, + { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 74, 62, 17 }, { 66, 57, 63, 9 }, + { 66, 57, 63, 9 }, { 57, 82, 65, 25 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 66, 74, 68, 8 }, { 66, 74, 68, 8 }, { 74, 66, 71, 8 }, { 74, 66, 71, 8 }, + { 74, 66, 71, 8 }, { 82, 57, 73, 25 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 76, 8 }, { 66, 99, 77, 33 }, { 82, 74, 79, 8 }, { 82, 74, 79, 8 }, + { 82, 74, 79, 8 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 84, 8 }, { 74, 107, 85, 33 }, { 90, 82, 87, 8 }, { 90, 82, 87, 8 }, + { 99, 66, 88, 33 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 107, 95, 17 }, + { 99, 90, 96, 9 }, { 99, 90, 96, 9 }, { 90, 115, 98, 25 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 99, 107, 101, 8 }, { 99, 107, 101, 8 }, { 107, 99, 104, 8 }, + { 107, 99, 104, 8 }, { 107, 99, 104, 8 }, { 115, 90, 106, 25 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 109, 8 }, { 99, 132, 110, 33 }, { 115, 107, 112, 8 }, + { 115, 107, 112, 8 }, { 115, 107, 112, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 117, 8 }, { 107, 140, 118, 33 }, { 123, 115, 120, 8 }, + { 123, 115, 120, 8 }, { 132, 99, 121, 33 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 }, + { 123, 140, 128, 17 }, { 132, 123, 129, 9 }, { 132, 123, 129, 9 }, { 123, 148, 131, 25 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 132, 140, 134, 8 }, { 132, 140, 134, 8 }, + { 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 148, 123, 139, 25 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 142, 8 }, { 132, 165, 143, 33 }, + { 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 150, 8 }, { 140, 173, 151, 33 }, + { 156, 148, 153, 8 }, { 156, 148, 153, 8 }, { 165, 132, 154, 33 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 159, 9 }, { 156, 165, 159, 9 }, + { 156, 165, 159, 9 }, { 156, 173, 161, 17 }, { 165, 156, 162, 9 }, { 165, 156, 162, 9 }, { 156, 181, 164, 25 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 165, 173, 167, 8 }, + { 165, 173, 167, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 181, 156, 172, 25 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 181, 175, 8 }, + { 165, 198, 176, 33 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 189, 183, 8 }, + { 173, 206, 184, 33 }, { 189, 181, 186, 8 }, { 189, 181, 186, 8 }, { 198, 165, 187, 33 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 198, 192, 9 }, + { 189, 198, 192, 9 }, { 189, 198, 192, 9 }, { 189, 206, 194, 17 }, { 198, 189, 195, 9 }, { 198, 189, 195, 9 }, { 189, 214, 197, 25 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 }, + { 198, 206, 200, 8 }, { 198, 206, 200, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 214, 189, 205, 25 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 }, + { 206, 214, 208, 8 }, { 198, 231, 209, 33 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, + { 214, 222, 216, 8 }, { 206, 239, 217, 33 }, { 222, 214, 219, 8 }, { 222, 214, 219, 8 }, { 231, 198, 220, 33 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, + { 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 239, 227, 17 }, { 231, 222, 228, 9 }, { 231, 222, 228, 9 }, { 222, 247, 230, 25 }, { 231, 231, 231, 0 }, + { 231, 231, 231, 0 }, { 231, 239, 233, 8 }, { 231, 239, 233, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 247, 222, 238, 25 }, { 239, 239, 239, 0 }, + { 239, 239, 239, 0 }, { 239, 247, 241, 8 }, { 239, 247, 241, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, + { 247, 247, 247, 0 }, { 247, 255, 249, 8 }, { 247, 255, 249, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, +}; + +TableEntry g_singleColor6_3[256] = +{ + { 0, 0, 0, 0 }, { 0, 4, 1, 4 }, { 4, 0, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 5, 4 }, { 8, 4, 6, 4 }, { 8, 8, 8, 0 }, + { 8, 8, 8, 0 }, { 8, 12, 9, 4 }, { 12, 8, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 13, 4 }, { 16, 12, 14, 4 }, { 16, 16, 16, 0 }, + { 16, 16, 16, 0 }, { 16, 20, 17, 4 }, { 20, 16, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 21, 4 }, { 24, 20, 22, 4 }, { 0, 69, 23, 69 }, + { 24, 24, 24, 0 }, { 24, 28, 25, 4 }, { 28, 24, 26, 4 }, { 8, 65, 27, 57 }, { 28, 28, 28, 0 }, { 28, 32, 29, 4 }, { 32, 28, 30, 4 }, { 12, 69, 31, 57 }, + { 32, 32, 32, 0 }, { 32, 36, 33, 4 }, { 36, 32, 34, 4 }, { 20, 65, 35, 45 }, { 36, 36, 36, 0 }, { 36, 40, 37, 4 }, { 40, 36, 38, 4 }, { 24, 69, 39, 45 }, + { 40, 40, 40, 0 }, { 40, 44, 41, 4 }, { 44, 40, 42, 4 }, { 32, 65, 43, 33 }, { 44, 44, 44, 0 }, { 44, 48, 45, 4 }, { 48, 44, 46, 4 }, { 36, 69, 47, 33 }, + { 48, 48, 48, 0 }, { 48, 52, 49, 4 }, { 52, 48, 50, 4 }, { 44, 65, 51, 21 }, { 52, 52, 52, 0 }, { 52, 56, 53, 4 }, { 56, 52, 54, 4 }, { 48, 69, 55, 21 }, + { 56, 56, 56, 0 }, { 56, 60, 57, 4 }, { 60, 56, 58, 4 }, { 56, 65, 59, 9 }, { 60, 60, 60, 0 }, { 60, 65, 61, 5 }, { 65, 56, 62, 9 }, { 65, 60, 63, 5 }, + { 60, 73, 64, 13 }, { 65, 65, 65, 0 }, { 65, 69, 66, 4 }, { 69, 65, 67, 4 }, { 73, 60, 68, 13 }, { 69, 69, 69, 0 }, { 69, 73, 70, 4 }, { 73, 69, 71, 4 }, + { 81, 56, 72, 25 }, { 73, 73, 73, 0 }, { 73, 77, 74, 4 }, { 77, 73, 75, 4 }, { 85, 60, 76, 25 }, { 77, 77, 77, 0 }, { 77, 81, 78, 4 }, { 81, 77, 79, 4 }, + { 93, 56, 80, 37 }, { 81, 81, 81, 0 }, { 81, 85, 82, 4 }, { 85, 81, 83, 4 }, { 97, 60, 84, 37 }, { 85, 85, 85, 0 }, { 85, 89, 86, 4 }, { 89, 85, 87, 4 }, + { 105, 56, 88, 49 }, { 89, 89, 89, 0 }, { 89, 93, 90, 4 }, { 93, 89, 91, 4 }, { 109, 60, 92, 49 }, { 93, 93, 93, 0 }, { 93, 97, 94, 4 }, { 97, 93, 95, 4 }, + { 77, 134, 96, 57 }, { 97, 97, 97, 0 }, { 97, 101, 98, 4 }, { 101, 97, 99, 4 }, { 85, 130, 100, 45 }, { 101, 101, 101, 0 }, { 101, 105, 102, 4 }, { 105, 101, 103, 4 }, + { 89, 134, 104, 45 }, { 105, 105, 105, 0 }, { 105, 109, 106, 4 }, { 109, 105, 107, 4 }, { 97, 130, 108, 33 }, { 109, 109, 109, 0 }, { 109, 113, 110, 4 }, { 113, 109, 111, 4 }, + { 101, 134, 112, 33 }, { 113, 113, 113, 0 }, { 113, 117, 114, 4 }, { 117, 113, 115, 4 }, { 109, 130, 116, 21 }, { 117, 117, 117, 0 }, { 117, 121, 118, 4 }, { 121, 117, 119, 4 }, + { 113, 134, 120, 21 }, { 121, 121, 121, 0 }, { 121, 125, 122, 4 }, { 125, 121, 123, 4 }, { 121, 130, 124, 9 }, { 125, 125, 125, 0 }, { 125, 130, 126, 5 }, { 130, 121, 127, 9 }, + { 130, 125, 128, 5 }, { 125, 138, 129, 13 }, { 130, 130, 130, 0 }, { 130, 134, 131, 4 }, { 134, 130, 132, 4 }, { 138, 125, 133, 13 }, { 134, 134, 134, 0 }, { 134, 138, 135, 4 }, + { 138, 134, 136, 4 }, { 146, 121, 137, 25 }, { 138, 138, 138, 0 }, { 138, 142, 139, 4 }, { 142, 138, 140, 4 }, { 150, 125, 141, 25 }, { 142, 142, 142, 0 }, { 142, 146, 143, 4 }, + { 146, 142, 144, 4 }, { 158, 121, 145, 37 }, { 146, 146, 146, 0 }, { 146, 150, 147, 4 }, { 150, 146, 148, 4 }, { 162, 125, 149, 37 }, { 150, 150, 150, 0 }, { 150, 154, 151, 4 }, + { 154, 150, 152, 4 }, { 170, 121, 153, 49 }, { 154, 154, 154, 0 }, { 154, 158, 155, 4 }, { 158, 154, 156, 4 }, { 174, 125, 157, 49 }, { 158, 158, 158, 0 }, { 158, 162, 159, 4 }, + { 162, 158, 160, 4 }, { 142, 199, 161, 57 }, { 162, 162, 162, 0 }, { 162, 166, 163, 4 }, { 166, 162, 164, 4 }, { 150, 195, 165, 45 }, { 166, 166, 166, 0 }, { 166, 170, 167, 4 }, + { 170, 166, 168, 4 }, { 154, 199, 169, 45 }, { 170, 170, 170, 0 }, { 170, 174, 171, 4 }, { 174, 170, 172, 4 }, { 162, 195, 173, 33 }, { 174, 174, 174, 0 }, { 174, 178, 175, 4 }, + { 178, 174, 176, 4 }, { 166, 199, 177, 33 }, { 178, 178, 178, 0 }, { 178, 182, 179, 4 }, { 182, 178, 180, 4 }, { 174, 195, 181, 21 }, { 182, 182, 182, 0 }, { 182, 186, 183, 4 }, + { 186, 182, 184, 4 }, { 178, 199, 185, 21 }, { 186, 186, 186, 0 }, { 186, 190, 187, 4 }, { 190, 186, 188, 4 }, { 186, 195, 189, 9 }, { 190, 190, 190, 0 }, { 190, 195, 191, 5 }, + { 195, 186, 192, 9 }, { 195, 190, 193, 5 }, { 190, 203, 194, 13 }, { 195, 195, 195, 0 }, { 195, 199, 196, 4 }, { 199, 195, 197, 4 }, { 203, 190, 198, 13 }, { 199, 199, 199, 0 }, + { 199, 203, 200, 4 }, { 203, 199, 201, 4 }, { 211, 186, 202, 25 }, { 203, 203, 203, 0 }, { 203, 207, 204, 4 }, { 207, 203, 205, 4 }, { 215, 190, 206, 25 }, { 207, 207, 207, 0 }, + { 207, 211, 208, 4 }, { 211, 207, 209, 4 }, { 223, 186, 210, 37 }, { 211, 211, 211, 0 }, { 211, 215, 212, 4 }, { 215, 211, 213, 4 }, { 227, 190, 214, 37 }, { 215, 215, 215, 0 }, + { 215, 219, 216, 4 }, { 219, 215, 217, 4 }, { 235, 186, 218, 49 }, { 219, 219, 219, 0 }, { 219, 223, 220, 4 }, { 223, 219, 221, 4 }, { 239, 190, 222, 49 }, { 223, 223, 223, 0 }, + { 223, 227, 224, 4 }, { 227, 223, 225, 4 }, { 247, 186, 226, 61 }, { 227, 227, 227, 0 }, { 227, 231, 228, 4 }, { 231, 227, 229, 4 }, { 251, 190, 230, 61 }, { 231, 231, 231, 0 }, + { 231, 235, 232, 4 }, { 235, 231, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 236, 4 }, { 239, 235, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, + { 239, 243, 240, 4 }, { 243, 239, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 244, 4 }, { 247, 243, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, + { 247, 251, 248, 4 }, { 251, 247, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 252, 4 }, { 255, 251, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, +}; + +TableEntry g_singleColor5_2[256] = +{ + { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, + { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, + { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, + { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 41, 32, 17 }, + { 24, 41, 32, 17 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 24, 49, 36, 25 }, { 24, 49, 36, 25 }, { 33, 41, 37, 8 }, { 33, 41, 37, 8 }, { 24, 57, 40, 33 }, + { 24, 57, 40, 33 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 49, 49, 49, 0 }, + { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 57, 57, 57, 0 }, + { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, + { 57, 74, 65, 17 }, { 57, 74, 65, 17 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 57, 82, 69, 25 }, { 57, 82, 69, 25 }, { 66, 74, 70, 8 }, { 66, 74, 70, 8 }, + { 57, 90, 73, 33 }, { 57, 90, 73, 33 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, + { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, + { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, + { 90, 99, 94, 9 }, { 90, 107, 98, 17 }, { 90, 107, 98, 17 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 90, 115, 102, 25 }, { 90, 115, 102, 25 }, { 99, 107, 103, 8 }, + { 99, 107, 103, 8 }, { 90, 123, 106, 33 }, { 90, 123, 106, 33 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 111, 8 }, { 107, 115, 111, 8 }, + { 107, 115, 111, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 119, 8 }, { 115, 123, 119, 8 }, + { 115, 123, 119, 8 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, + { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, { 123, 140, 131, 17 }, { 123, 140, 131, 17 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 123, 148, 135, 25 }, { 123, 148, 135, 25 }, + { 132, 140, 136, 8 }, { 132, 140, 136, 8 }, { 123, 156, 139, 33 }, { 123, 156, 139, 33 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 144, 8 }, + { 140, 148, 144, 8 }, { 140, 148, 144, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 152, 8 }, + { 148, 156, 152, 8 }, { 148, 156, 152, 8 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 160, 9 }, + { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 173, 164, 17 }, { 156, 173, 164, 17 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 156, 181, 168, 25 }, + { 156, 181, 168, 25 }, { 165, 173, 169, 8 }, { 165, 173, 169, 8 }, { 156, 189, 172, 33 }, { 156, 189, 172, 33 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, + { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, + { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, + { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 206, 197, 17 }, { 189, 206, 197, 17 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 }, + { 189, 214, 201, 25 }, { 189, 214, 201, 25 }, { 198, 206, 202, 8 }, { 198, 206, 202, 8 }, { 189, 222, 205, 33 }, { 189, 222, 205, 33 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 }, + { 206, 206, 206, 0 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, + { 214, 214, 214, 0 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, + { 222, 222, 222, 0 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 239, 230, 17 }, { 222, 239, 230, 17 }, { 231, 231, 231, 0 }, + { 231, 231, 231, 0 }, { 222, 247, 234, 25 }, { 222, 247, 234, 25 }, { 231, 239, 235, 8 }, { 231, 239, 235, 8 }, { 222, 255, 238, 33 }, { 222, 255, 238, 33 }, { 239, 239, 239, 0 }, + { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, + { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, +}; + +TableEntry g_singleColor6_2[256] = +{ + { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 }, + { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 }, + { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 }, + { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 }, + { 32, 32, 32, 0 }, { 32, 32, 32, 0 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 }, + { 40, 40, 40, 0 }, { 40, 40, 40, 0 }, { 40, 44, 42, 4 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 48, 46, 4 }, { 48, 48, 48, 0 }, + { 48, 48, 48, 0 }, { 48, 48, 48, 0 }, { 48, 52, 50, 4 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 56, 54, 4 }, { 56, 56, 56, 0 }, + { 56, 56, 56, 0 }, { 56, 56, 56, 0 }, { 56, 60, 58, 4 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 65, 62, 5 }, { 60, 65, 62, 5 }, + { 60, 69, 64, 9 }, { 65, 65, 65, 0 }, { 60, 73, 66, 13 }, { 65, 69, 67, 4 }, { 60, 77, 68, 17 }, { 69, 69, 69, 0 }, { 60, 81, 70, 21 }, { 69, 73, 71, 4 }, + { 60, 85, 72, 25 }, { 73, 73, 73, 0 }, { 60, 89, 74, 29 }, { 73, 77, 75, 4 }, { 60, 93, 76, 33 }, { 77, 77, 77, 0 }, { 60, 97, 78, 37 }, { 77, 81, 79, 4 }, + { 60, 101, 80, 41 }, { 81, 81, 81, 0 }, { 60, 105, 82, 45 }, { 81, 85, 83, 4 }, { 60, 109, 84, 49 }, { 85, 85, 85, 0 }, { 60, 113, 86, 53 }, { 85, 89, 87, 4 }, + { 60, 117, 88, 57 }, { 89, 89, 89, 0 }, { 60, 121, 90, 61 }, { 89, 93, 91, 4 }, { 60, 125, 92, 65 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 95, 4 }, + { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 103, 4 }, + { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 107, 4 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 113, 111, 4 }, + { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 117, 115, 4 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 121, 119, 4 }, + { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 125, 123, 4 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 130, 127, 5 }, + { 125, 130, 127, 5 }, { 125, 134, 129, 9 }, { 130, 130, 130, 0 }, { 125, 138, 131, 13 }, { 130, 134, 132, 4 }, { 125, 142, 133, 17 }, { 134, 134, 134, 0 }, { 125, 146, 135, 21 }, + { 134, 138, 136, 4 }, { 125, 150, 137, 25 }, { 138, 138, 138, 0 }, { 125, 154, 139, 29 }, { 138, 142, 140, 4 }, { 125, 158, 141, 33 }, { 142, 142, 142, 0 }, { 125, 162, 143, 37 }, + { 142, 146, 144, 4 }, { 125, 166, 145, 41 }, { 146, 146, 146, 0 }, { 125, 170, 147, 45 }, { 146, 150, 148, 4 }, { 125, 174, 149, 49 }, { 150, 150, 150, 0 }, { 125, 178, 151, 53 }, + { 150, 154, 152, 4 }, { 125, 182, 153, 57 }, { 154, 154, 154, 0 }, { 125, 186, 155, 61 }, { 154, 158, 156, 4 }, { 125, 190, 157, 65 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, + { 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, + { 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 172, 4 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, + { 174, 178, 176, 4 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 182, 180, 4 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, + { 182, 186, 184, 4 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 190, 188, 4 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, + { 190, 195, 192, 5 }, { 190, 195, 192, 5 }, { 190, 199, 194, 9 }, { 195, 195, 195, 0 }, { 190, 203, 196, 13 }, { 195, 199, 197, 4 }, { 190, 207, 198, 17 }, { 199, 199, 199, 0 }, + { 190, 211, 200, 21 }, { 199, 203, 201, 4 }, { 190, 215, 202, 25 }, { 203, 203, 203, 0 }, { 190, 219, 204, 29 }, { 203, 207, 205, 4 }, { 190, 223, 206, 33 }, { 207, 207, 207, 0 }, + { 190, 227, 208, 37 }, { 207, 211, 209, 4 }, { 190, 231, 210, 41 }, { 211, 211, 211, 0 }, { 190, 235, 212, 45 }, { 211, 215, 213, 4 }, { 190, 239, 214, 49 }, { 215, 215, 215, 0 }, + { 190, 243, 216, 53 }, { 215, 219, 217, 4 }, { 190, 247, 218, 57 }, { 219, 219, 219, 0 }, { 190, 251, 220, 61 }, { 219, 223, 221, 4 }, { 190, 255, 222, 65 }, { 223, 223, 223, 0 }, + { 223, 223, 223, 0 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 }, + { 231, 231, 231, 0 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, + { 239, 239, 239, 0 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, + { 247, 247, 247, 0 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, +}; + +TableEntry g_singleColor5_3_p[256] = +{ + { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 2, 8 }, { 0, 8, 2, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 8, 8, 0 }, + { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 10, 8 }, { 0, 33, 11, 33 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 16, 16, 0 }, + { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 18, 8 }, { 8, 41, 19, 33 }, { 24, 16, 21, 8 }, { 24, 16, 21, 8 }, { 33, 0, 22, 33 }, { 24, 24, 24, 0 }, + { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 41, 29, 17 }, { 33, 24, 30, 9 }, { 33, 24, 30, 9 }, + { 24, 49, 32, 25 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 33, 41, 35, 8 }, { 33, 41, 35, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 }, + { 49, 24, 40, 25 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 43, 8 }, { 33, 66, 44, 33 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 }, + { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 51, 8 }, { 41, 74, 52, 33 }, { 57, 49, 54, 8 }, { 57, 49, 54, 8 }, { 66, 33, 55, 33 }, + { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 74, 62, 17 }, { 66, 57, 63, 9 }, + { 66, 57, 63, 9 }, { 57, 82, 65, 25 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 66, 74, 68, 8 }, { 66, 74, 68, 8 }, { 74, 66, 71, 8 }, { 74, 66, 71, 8 }, + { 74, 66, 71, 8 }, { 82, 57, 73, 25 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 76, 8 }, { 66, 99, 77, 33 }, { 82, 74, 79, 8 }, { 82, 74, 79, 8 }, + { 82, 74, 79, 8 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 84, 8 }, { 74, 107, 85, 33 }, { 90, 82, 87, 8 }, { 90, 82, 87, 8 }, + { 99, 66, 88, 33 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 107, 95, 17 }, + { 99, 90, 96, 9 }, { 99, 90, 96, 9 }, { 90, 115, 98, 25 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 99, 107, 101, 8 }, { 99, 107, 101, 8 }, { 107, 99, 104, 8 }, + { 107, 99, 104, 8 }, { 107, 99, 104, 8 }, { 115, 90, 106, 25 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 109, 8 }, { 99, 132, 110, 33 }, { 115, 107, 112, 8 }, + { 115, 107, 112, 8 }, { 115, 107, 112, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 117, 8 }, { 107, 140, 118, 33 }, { 123, 115, 120, 8 }, + { 123, 115, 120, 8 }, { 132, 99, 121, 33 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 }, + { 123, 140, 128, 17 }, { 132, 123, 129, 9 }, { 132, 123, 129, 9 }, { 123, 148, 131, 25 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 132, 140, 134, 8 }, { 132, 140, 134, 8 }, + { 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 148, 123, 139, 25 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 142, 8 }, { 132, 165, 143, 33 }, + { 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 150, 8 }, { 140, 173, 151, 33 }, + { 156, 148, 153, 8 }, { 156, 148, 153, 8 }, { 165, 132, 154, 33 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 159, 9 }, { 156, 165, 159, 9 }, + { 156, 165, 159, 9 }, { 156, 173, 161, 17 }, { 165, 156, 162, 9 }, { 165, 156, 162, 9 }, { 156, 181, 164, 25 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 165, 173, 167, 8 }, + { 165, 173, 167, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 181, 156, 172, 25 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 181, 175, 8 }, + { 165, 198, 176, 33 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 189, 183, 8 }, + { 173, 206, 184, 33 }, { 189, 181, 186, 8 }, { 189, 181, 186, 8 }, { 198, 165, 187, 33 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 198, 192, 9 }, + { 189, 198, 192, 9 }, { 189, 198, 192, 9 }, { 189, 206, 194, 17 }, { 198, 189, 195, 9 }, { 198, 189, 195, 9 }, { 189, 214, 197, 25 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 }, + { 198, 206, 200, 8 }, { 198, 206, 200, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 214, 189, 205, 25 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 }, + { 206, 214, 208, 8 }, { 198, 231, 209, 33 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, + { 214, 222, 216, 8 }, { 206, 239, 217, 33 }, { 222, 214, 219, 8 }, { 222, 214, 219, 8 }, { 231, 198, 220, 33 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, + { 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 239, 227, 17 }, { 231, 222, 228, 9 }, { 231, 222, 228, 9 }, { 222, 247, 230, 25 }, { 231, 231, 231, 0 }, + { 231, 231, 231, 0 }, { 231, 239, 233, 8 }, { 231, 239, 233, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 247, 222, 238, 25 }, { 239, 239, 239, 0 }, + { 239, 239, 239, 0 }, { 239, 247, 241, 8 }, { 239, 247, 241, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, + { 247, 247, 247, 0 }, { 247, 255, 249, 8 }, { 247, 255, 249, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, +}; + +TableEntry g_singleColor6_3_p[256] = +{ + { 0, 0, 0, 0 }, { 0, 4, 1, 4 }, { 4, 0, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 5, 4 }, { 8, 4, 6, 4 }, { 8, 8, 8, 0 }, + { 8, 8, 8, 0 }, { 8, 12, 9, 4 }, { 12, 8, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 13, 4 }, { 16, 12, 14, 4 }, { 16, 16, 16, 0 }, + { 16, 16, 16, 0 }, { 16, 20, 17, 4 }, { 20, 16, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 21, 4 }, { 24, 20, 22, 4 }, { 24, 24, 24, 0 }, + { 24, 24, 24, 0 }, { 24, 28, 25, 4 }, { 28, 24, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 29, 4 }, { 32, 28, 30, 4 }, { 32, 32, 32, 0 }, + { 32, 32, 32, 0 }, { 32, 36, 33, 4 }, { 36, 32, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 37, 4 }, { 40, 36, 38, 4 }, { 40, 40, 40, 0 }, + { 40, 40, 40, 0 }, { 40, 44, 41, 4 }, { 44, 40, 42, 4 }, { 32, 65, 43, 33 }, { 44, 44, 44, 0 }, { 44, 48, 45, 4 }, { 48, 44, 46, 4 }, { 36, 69, 47, 33 }, + { 48, 48, 48, 0 }, { 48, 52, 49, 4 }, { 52, 48, 50, 4 }, { 44, 65, 51, 21 }, { 52, 52, 52, 0 }, { 52, 56, 53, 4 }, { 56, 52, 54, 4 }, { 48, 69, 55, 21 }, + { 56, 56, 56, 0 }, { 56, 60, 57, 4 }, { 60, 56, 58, 4 }, { 56, 65, 59, 9 }, { 60, 60, 60, 0 }, { 60, 65, 61, 5 }, { 65, 56, 62, 9 }, { 65, 60, 63, 5 }, + { 60, 73, 64, 13 }, { 65, 65, 65, 0 }, { 65, 69, 66, 4 }, { 69, 65, 67, 4 }, { 73, 60, 68, 13 }, { 69, 69, 69, 0 }, { 69, 73, 70, 4 }, { 73, 69, 71, 4 }, + { 81, 56, 72, 25 }, { 73, 73, 73, 0 }, { 73, 77, 74, 4 }, { 77, 73, 75, 4 }, { 85, 60, 76, 25 }, { 77, 77, 77, 0 }, { 77, 81, 78, 4 }, { 81, 77, 79, 4 }, + { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 85, 82, 4 }, { 85, 81, 83, 4 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 89, 86, 4 }, { 89, 85, 87, 4 }, + { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 93, 90, 4 }, { 93, 89, 91, 4 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 94, 4 }, { 97, 93, 95, 4 }, + { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 98, 4 }, { 101, 97, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 102, 4 }, { 105, 101, 103, 4 }, + { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 106, 4 }, { 109, 105, 107, 4 }, { 97, 130, 108, 33 }, { 109, 109, 109, 0 }, { 109, 113, 110, 4 }, { 113, 109, 111, 4 }, + { 101, 134, 112, 33 }, { 113, 113, 113, 0 }, { 113, 117, 114, 4 }, { 117, 113, 115, 4 }, { 109, 130, 116, 21 }, { 117, 117, 117, 0 }, { 117, 121, 118, 4 }, { 121, 117, 119, 4 }, + { 113, 134, 120, 21 }, { 121, 121, 121, 0 }, { 121, 125, 122, 4 }, { 125, 121, 123, 4 }, { 121, 130, 124, 9 }, { 125, 125, 125, 0 }, { 125, 130, 126, 5 }, { 130, 121, 127, 9 }, + { 130, 125, 128, 5 }, { 125, 138, 129, 13 }, { 130, 130, 130, 0 }, { 130, 134, 131, 4 }, { 134, 130, 132, 4 }, { 138, 125, 133, 13 }, { 134, 134, 134, 0 }, { 134, 138, 135, 4 }, + { 138, 134, 136, 4 }, { 146, 121, 137, 25 }, { 138, 138, 138, 0 }, { 138, 142, 139, 4 }, { 142, 138, 140, 4 }, { 150, 125, 141, 25 }, { 142, 142, 142, 0 }, { 142, 146, 143, 4 }, + { 146, 142, 144, 4 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 150, 147, 4 }, { 150, 146, 148, 4 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, { 150, 154, 151, 4 }, + { 154, 150, 152, 4 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 158, 155, 4 }, { 158, 154, 156, 4 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, { 158, 162, 159, 4 }, + { 162, 158, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 163, 4 }, { 166, 162, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 170, 167, 4 }, + { 170, 166, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 171, 4 }, { 174, 170, 172, 4 }, { 162, 195, 173, 33 }, { 174, 174, 174, 0 }, { 174, 178, 175, 4 }, + { 178, 174, 176, 4 }, { 166, 199, 177, 33 }, { 178, 178, 178, 0 }, { 178, 182, 179, 4 }, { 182, 178, 180, 4 }, { 174, 195, 181, 21 }, { 182, 182, 182, 0 }, { 182, 186, 183, 4 }, + { 186, 182, 184, 4 }, { 178, 199, 185, 21 }, { 186, 186, 186, 0 }, { 186, 190, 187, 4 }, { 190, 186, 188, 4 }, { 186, 195, 189, 9 }, { 190, 190, 190, 0 }, { 190, 195, 191, 5 }, + { 195, 186, 192, 9 }, { 195, 190, 193, 5 }, { 190, 203, 194, 13 }, { 195, 195, 195, 0 }, { 195, 199, 196, 4 }, { 199, 195, 197, 4 }, { 203, 190, 198, 13 }, { 199, 199, 199, 0 }, + { 199, 203, 200, 4 }, { 203, 199, 201, 4 }, { 211, 186, 202, 25 }, { 203, 203, 203, 0 }, { 203, 207, 204, 4 }, { 207, 203, 205, 4 }, { 215, 190, 206, 25 }, { 207, 207, 207, 0 }, + { 207, 211, 208, 4 }, { 211, 207, 209, 4 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 215, 212, 4 }, { 215, 211, 213, 4 }, { 215, 215, 215, 0 }, { 215, 215, 215, 0 }, + { 215, 219, 216, 4 }, { 219, 215, 217, 4 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 223, 220, 4 }, { 223, 219, 221, 4 }, { 223, 223, 223, 0 }, { 223, 223, 223, 0 }, + { 223, 227, 224, 4 }, { 227, 223, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 228, 4 }, { 231, 227, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 }, + { 231, 235, 232, 4 }, { 235, 231, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 236, 4 }, { 239, 235, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, + { 239, 243, 240, 4 }, { 243, 239, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 244, 4 }, { 247, 243, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, + { 247, 251, 248, 4 }, { 251, 247, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 252, 4 }, { 255, 251, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, +}; + +TableEntry g_singleColor5_2_p[256] = +{ + { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, + { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, + { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, + { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 41, 32, 17 }, + { 24, 41, 32, 17 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 24, 49, 36, 25 }, { 24, 49, 36, 25 }, { 33, 41, 37, 8 }, { 33, 41, 37, 8 }, { 24, 57, 40, 33 }, + { 24, 57, 40, 33 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 49, 49, 49, 0 }, + { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 57, 57, 57, 0 }, + { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, + { 57, 74, 65, 17 }, { 57, 74, 65, 17 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 57, 82, 69, 25 }, { 57, 82, 69, 25 }, { 66, 74, 70, 8 }, { 66, 74, 70, 8 }, + { 57, 90, 73, 33 }, { 57, 90, 73, 33 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, + { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, + { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, + { 90, 99, 94, 9 }, { 90, 107, 98, 17 }, { 90, 107, 98, 17 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 90, 115, 102, 25 }, { 90, 115, 102, 25 }, { 99, 107, 103, 8 }, + { 99, 107, 103, 8 }, { 90, 123, 106, 33 }, { 90, 123, 106, 33 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 111, 8 }, { 107, 115, 111, 8 }, + { 107, 115, 111, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 119, 8 }, { 115, 123, 119, 8 }, + { 115, 123, 119, 8 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, + { 123, 132, 127, 9 }, { 123, 132, 127, 9 }, { 123, 140, 131, 17 }, { 123, 140, 131, 17 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 123, 148, 135, 25 }, { 123, 148, 135, 25 }, + { 132, 140, 136, 8 }, { 132, 140, 136, 8 }, { 123, 156, 139, 33 }, { 123, 156, 139, 33 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 144, 8 }, + { 140, 148, 144, 8 }, { 140, 148, 144, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 152, 8 }, + { 148, 156, 152, 8 }, { 148, 156, 152, 8 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 160, 9 }, + { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 173, 164, 17 }, { 156, 173, 164, 17 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 156, 181, 168, 25 }, + { 156, 181, 168, 25 }, { 165, 173, 169, 8 }, { 165, 173, 169, 8 }, { 156, 189, 172, 33 }, { 156, 189, 172, 33 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, + { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, + { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, + { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 206, 197, 17 }, { 189, 206, 197, 17 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 }, + { 189, 214, 201, 25 }, { 189, 214, 201, 25 }, { 198, 206, 202, 8 }, { 198, 206, 202, 8 }, { 189, 222, 205, 33 }, { 189, 222, 205, 33 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 }, + { 206, 206, 206, 0 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, + { 214, 214, 214, 0 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, + { 222, 222, 222, 0 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 239, 230, 17 }, { 222, 239, 230, 17 }, { 231, 231, 231, 0 }, + { 231, 231, 231, 0 }, { 222, 247, 234, 25 }, { 222, 247, 234, 25 }, { 231, 239, 235, 8 }, { 231, 239, 235, 8 }, { 222, 255, 238, 33 }, { 222, 255, 238, 33 }, { 239, 239, 239, 0 }, + { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, + { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, +}; + +TableEntry g_singleColor6_2_p[256] = +{ + { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 }, + { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 }, + { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 }, + { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 }, + { 32, 32, 32, 0 }, { 32, 32, 32, 0 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 }, + { 40, 40, 40, 0 }, { 40, 40, 40, 0 }, { 40, 44, 42, 4 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 48, 46, 4 }, { 48, 48, 48, 0 }, + { 48, 48, 48, 0 }, { 48, 48, 48, 0 }, { 48, 52, 50, 4 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 56, 54, 4 }, { 56, 56, 56, 0 }, + { 56, 56, 56, 0 }, { 56, 56, 56, 0 }, { 56, 60, 58, 4 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 65, 62, 5 }, { 60, 65, 62, 5 }, + { 60, 69, 64, 9 }, { 65, 65, 65, 0 }, { 60, 73, 66, 13 }, { 65, 69, 67, 4 }, { 60, 77, 68, 17 }, { 69, 69, 69, 0 }, { 60, 81, 70, 21 }, { 69, 73, 71, 4 }, + { 60, 85, 72, 25 }, { 73, 73, 73, 0 }, { 60, 89, 74, 29 }, { 73, 77, 75, 4 }, { 60, 93, 76, 33 }, { 77, 77, 77, 0 }, { 77, 77, 77, 0 }, { 77, 81, 79, 4 }, + { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 85, 83, 4 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 89, 87, 4 }, + { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 93, 91, 4 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 95, 4 }, + { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 103, 4 }, + { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 107, 4 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 113, 111, 4 }, + { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 117, 115, 4 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 121, 119, 4 }, + { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 125, 123, 4 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 130, 127, 5 }, + { 125, 130, 127, 5 }, { 125, 134, 129, 9 }, { 130, 130, 130, 0 }, { 125, 138, 131, 13 }, { 130, 134, 132, 4 }, { 125, 142, 133, 17 }, { 134, 134, 134, 0 }, { 125, 146, 135, 21 }, + { 134, 138, 136, 4 }, { 125, 150, 137, 25 }, { 138, 138, 138, 0 }, { 125, 154, 139, 29 }, { 138, 142, 140, 4 }, { 125, 158, 141, 33 }, { 142, 142, 142, 0 }, { 142, 142, 142, 0 }, + { 142, 146, 144, 4 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 150, 148, 4 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, + { 150, 154, 152, 4 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 158, 156, 4 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, + { 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, + { 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 172, 4 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, + { 174, 178, 176, 4 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 182, 180, 4 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, + { 182, 186, 184, 4 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 190, 188, 4 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, + { 190, 195, 192, 5 }, { 190, 195, 192, 5 }, { 190, 199, 194, 9 }, { 195, 195, 195, 0 }, { 190, 203, 196, 13 }, { 195, 199, 197, 4 }, { 190, 207, 198, 17 }, { 199, 199, 199, 0 }, + { 190, 211, 200, 21 }, { 199, 203, 201, 4 }, { 190, 215, 202, 25 }, { 203, 203, 203, 0 }, { 190, 219, 204, 29 }, { 203, 207, 205, 4 }, { 190, 223, 206, 33 }, { 207, 207, 207, 0 }, + { 207, 207, 207, 0 }, { 207, 211, 209, 4 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 215, 213, 4 }, { 215, 215, 215, 0 }, { 215, 215, 215, 0 }, + { 215, 215, 215, 0 }, { 215, 219, 217, 4 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 223, 221, 4 }, { 223, 223, 223, 0 }, { 223, 223, 223, 0 }, + { 223, 223, 223, 0 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 }, + { 231, 231, 231, 0 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 }, + { 239, 239, 239, 0 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, + { 247, 247, 247, 0 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, +}; + +}}} diff --git a/thirdparty/cvtt/ConvectionKernels_SingleFile.cpp b/thirdparty/cvtt/ConvectionKernels_SingleFile.cpp new file mode 100644 index 0000000000..ad59988655 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_SingleFile.cpp @@ -0,0 +1,48 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------------------- + +Portions based on DirectX Texture Library (DirectXTex) + +Copyright (c) Microsoft Corporation. All rights reserved. +Licensed under the MIT License. + +http://go.microsoft.com/fwlink/?LinkId=248926 +*/ +#include "ConvectionKernels_Config.h" + +#if defined(CVTT_SINGLE_FILE) +#define CVTT_SINGLE_FILE_IMPL + +#include "ConvectionKernels_API.cpp" +#include "ConvectionKernels_BC67.cpp" +#include "ConvectionKernels_BC6H_IO.cpp" +#include "ConvectionKernels_BC7_PrioData.cpp" +#include "ConvectionKernels_BCCommon.cpp" +#include "ConvectionKernels_ETC.cpp" +#include "ConvectionKernels_IndexSelector.cpp" +#include "ConvectionKernels_S3TC.cpp" +#include "ConvectionKernels_Util.cpp" + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_UnfinishedEndpoints.h b/thirdparty/cvtt/ConvectionKernels_UnfinishedEndpoints.h new file mode 100644 index 0000000000..371cbe54bf --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_UnfinishedEndpoints.h @@ -0,0 +1,121 @@ +#pragma once + +#include "ConvectionKernels_Util.h" + +namespace cvtt +{ + namespace Internal + { + template<int TVectorSize> + class UnfinishedEndpoints + { + public: + typedef ParallelMath::Float MFloat; + typedef ParallelMath::UInt16 MUInt16; + typedef ParallelMath::UInt15 MUInt15; + typedef ParallelMath::SInt16 MSInt16; + typedef ParallelMath::SInt32 MSInt32; + + UnfinishedEndpoints() + { + } + + UnfinishedEndpoints(const MFloat *base, const MFloat *offset) + { + for (int ch = 0; ch < TVectorSize; ch++) + m_base[ch] = base[ch]; + for (int ch = 0; ch < TVectorSize; ch++) + m_offset[ch] = offset[ch]; + } + + UnfinishedEndpoints(const UnfinishedEndpoints& other) + { + for (int ch = 0; ch < TVectorSize; ch++) + m_base[ch] = other.m_base[ch]; + for (int ch = 0; ch < TVectorSize; ch++) + m_offset[ch] = other.m_offset[ch]; + } + + void FinishHDRUnsigned(int tweak, int range, MSInt16 *outEP0, MSInt16 *outEP1, ParallelMath::RoundTowardNearestForScope *roundingMode) + { + float tweakFactors[2]; + Util::ComputeTweakFactors(tweak, range, tweakFactors); + + for (int ch = 0; ch < TVectorSize; ch++) + { + MUInt15 channelEPs[2]; + for (int epi = 0; epi < 2; epi++) + { + MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], 0.0f, 31743.0f); + channelEPs[epi] = ParallelMath::RoundAndConvertToU15(f, roundingMode); + } + + outEP0[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[0]); + outEP1[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[1]); + } + } + + void FinishHDRSigned(int tweak, int range, MSInt16* outEP0, MSInt16* outEP1, ParallelMath::RoundTowardNearestForScope* roundingMode) + { + float tweakFactors[2]; + Util::ComputeTweakFactors(tweak, range, tweakFactors); + + for (int ch = 0; ch < TVectorSize; ch++) + { + MSInt16 channelEPs[2]; + for (int epi = 0; epi < 2; epi++) + { + MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], -31743.0f, 31743.0f); + channelEPs[epi] = ParallelMath::RoundAndConvertToS16(f, roundingMode); + } + + outEP0[ch] = channelEPs[0]; + outEP1[ch] = channelEPs[1]; + } + } + + void FinishLDR(int tweak, int range, MUInt15* outEP0, MUInt15* outEP1) + { + ParallelMath::RoundTowardNearestForScope roundingMode; + + float tweakFactors[2]; + Util::ComputeTweakFactors(tweak, range, tweakFactors); + + for (int ch = 0; ch < TVectorSize; ch++) + { + MFloat ep0f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[0], 0.0f, 255.0f); + MFloat ep1f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[1], 0.0f, 255.0f); + outEP0[ch] = ParallelMath::RoundAndConvertToU15(ep0f, &roundingMode); + outEP1[ch] = ParallelMath::RoundAndConvertToU15(ep1f, &roundingMode); + } + } + + template<int TNewVectorSize> + UnfinishedEndpoints<TNewVectorSize> ExpandTo(float filler) + { + MFloat newBase[TNewVectorSize]; + MFloat newOffset[TNewVectorSize]; + + for (int ch = 0; ch < TNewVectorSize && ch < TVectorSize; ch++) + { + newBase[ch] = m_base[ch]; + newOffset[ch] = m_offset[ch]; + } + + MFloat fillerV = ParallelMath::MakeFloat(filler); + + for (int ch = TVectorSize; ch < TNewVectorSize; ch++) + { + newBase[ch] = fillerV; + newOffset[ch] = ParallelMath::MakeFloatZero(); + } + + return UnfinishedEndpoints<TNewVectorSize>(newBase, newOffset); + } + + private: + MFloat m_base[TVectorSize]; + MFloat m_offset[TVectorSize]; + }; + } +} diff --git a/thirdparty/cvtt/ConvectionKernels_Util.cpp b/thirdparty/cvtt/ConvectionKernels_Util.cpp new file mode 100644 index 0000000000..d9c25c7845 --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_Util.cpp @@ -0,0 +1,88 @@ +/* +Convection Texture Tools +Copyright (c) 2018-2019 Eric Lasota + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject +to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +------------------------------------------------------------------------------------- + +Portions based on DirectX Texture Library (DirectXTex) + +Copyright (c) Microsoft Corporation. All rights reserved. +Licensed under the MIT License. + +http://go.microsoft.com/fwlink/?LinkId=248926 +*/ +#include "ConvectionKernels_Config.h" + +#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL) + +#include "ConvectionKernels.h" +#include "ConvectionKernels_ParallelMath.h" + +#include <algorithm> + +namespace cvtt +{ + namespace Util + { + // Signed input blocks are converted into unsigned space, with the maximum value being 254 + void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize]) + { + for (size_t block = 0; block < ParallelMath::ParallelSize; block++) + { + const PixelBlockS8& inputSignedBlock = inputSigned[block]; + PixelBlockU8& inputNormalizedBlock = inputNormalized[block]; + + for (size_t px = 0; px < 16; px++) + { + for (size_t ch = 0; ch < 4; ch++) + inputNormalizedBlock.m_pixels[px][ch] = static_cast<uint8_t>(std::max<int>(inputSignedBlock.m_pixels[px][ch], -127) + 127); + } + } + } + + void FillWeights(const Options &options, float channelWeights[4]) + { + if (options.flags & Flags::Uniform) + channelWeights[0] = channelWeights[1] = channelWeights[2] = channelWeights[3] = 1.0f; + else + { + channelWeights[0] = options.redWeight; + channelWeights[1] = options.greenWeight; + channelWeights[2] = options.blueWeight; + channelWeights[3] = options.alphaWeight; + } + } + + void ComputeTweakFactors(int tweak, int range, float *outFactors) + { + int totalUnits = range - 1; + int minOutsideUnits = ((tweak >> 1) & 1); + int maxOutsideUnits = (tweak & 1); + int insideUnits = totalUnits - minOutsideUnits - maxOutsideUnits; + + outFactors[0] = -static_cast<float>(minOutsideUnits) / static_cast<float>(insideUnits); + outFactors[1] = static_cast<float>(maxOutsideUnits) / static_cast<float>(insideUnits) + 1.0f; + } + } +} + +#endif diff --git a/thirdparty/cvtt/ConvectionKernels_Util.h b/thirdparty/cvtt/ConvectionKernels_Util.h new file mode 100644 index 0000000000..c07b9bf2aa --- /dev/null +++ b/thirdparty/cvtt/ConvectionKernels_Util.h @@ -0,0 +1,21 @@ +#pragma once + +#include "ConvectionKernels_ParallelMath.h" + +namespace cvtt +{ + struct PixelBlockU8; + struct PixelBlockS8; + struct Options; +} + +namespace cvtt +{ + namespace Util + { + // Signed input blocks are converted into unsigned space, with the maximum value being 254 + void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize]); + void FillWeights(const Options &options, float channelWeights[4]); + void ComputeTweakFactors(int tweak, int range, float *outFactors); + } +} diff --git a/thirdparty/cvtt/etc_notes.txt b/thirdparty/cvtt/etc_notes.txt new file mode 100644 index 0000000000..bb041a8435 --- /dev/null +++ b/thirdparty/cvtt/etc_notes.txt @@ -0,0 +1,27 @@ +The ETC1 compressor uses modified cluster fit: + +Assume that there exists an ideal base color and set of selectors for a given table. +For a given table and set of selectors, the ideal base color can be determined by subtracting the offsets from each pixel and averaging them. +Doing that is equivalent to subtracting the average offset from the average color. +Because positive and negative selectors of the same magnitude cancel out, the search space of possible average offsets is reduced: 57 unique offsets for the first table and 81 for the others. +Most of the offsets result in the same color as another average offset due to quantization of the base color, so those can be de-duplicated. +So: +- Start with a high-precision average color. +- Apply precomputed luma offsets to it. +- Quantize and de-duplicate the base colors. +- Find the ideal selectors for each base color. + +Differential mode is solved by just finding the best legal combination from those attempts. + +There are several scenarios where this is not ideal: +- Clamping behavior can sometimes be leveraged for a more accurate block. +- Differentials can sometimes be moved slightly closer to become legal. +- This only works when MSE is the error metric (i.e. not normal maps) +- This only works when pixel weights are of equal importance (i.e. not using weight by alpha or edge deblocking) + +T and H mode just work by generating clustering assignments by computing a chrominance line and splitting the block in half by the chrominance midpoint and using those to determine the averages. + +Planar mode is just solved algebraically. + +If you want to emulate etc2comp's default settings, add the flag ETC_UseFakeBT709 to use its modified Rec. 709 error coefficients. +Doing that will significantly slow down encoding because it requires much more complicated quantization math.
\ No newline at end of file diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-common.hh b/thirdparty/harfbuzz/src/hb-aat-layout-common.hh index 1dcbe92904..1db0f1df92 100644 --- a/thirdparty/harfbuzz/src/hb-aat-layout-common.hh +++ b/thirdparty/harfbuzz/src/hb-aat-layout-common.hh @@ -839,7 +839,7 @@ struct StateTableDriver } if (!c->in_place) - buffer->swap_buffers (); + buffer->sync (); } public: diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh index d745c11431..0bf9bd2912 100644 --- a/thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh +++ b/thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh @@ -146,7 +146,7 @@ struct DuctileGlyphAction HBUINT32 variationAxis; /* The 4-byte tag identifying the ductile axis. * This would normally be 0x64756374 ('duct'), * but you may use any axis the font contains. */ - HBFixed minimumLimit; /* The lowest value for the ductility axis tha + HBFixed minimumLimit; /* The lowest value for the ductility axis that * still yields an acceptable appearance. Normally * this will be 1.0. */ HBFixed noStretchValue; /* This is the default value that corresponds to diff --git a/thirdparty/harfbuzz/src/hb-algs.hh b/thirdparty/harfbuzz/src/hb-algs.hh index 446d87e28b..3a3ab08046 100644 --- a/thirdparty/harfbuzz/src/hb-algs.hh +++ b/thirdparty/harfbuzz/src/hb-algs.hh @@ -36,6 +36,7 @@ #include <algorithm> #include <initializer_list> +#include <functional> #include <new> /* @@ -210,12 +211,23 @@ struct } HB_FUNCOBJ (hb_bool); +template <typename T> +static inline +T hb_coerce (const T v) { return v; } +template <typename T, typename V, + hb_enable_if (!hb_is_same (hb_decay<T>, hb_decay<V>) && std::is_pointer<V>::value)> +static inline +T hb_coerce (const V v) { return *v; } + struct { private: template <typename T> constexpr auto - impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, hb_deref (v).hash ()) + impl (const T& v, hb_priority<2>) const HB_RETURN (uint32_t, hb_deref (v).hash ()) + + template <typename T> constexpr auto + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, std::hash<hb_decay<decltype (hb_deref (v))>>{} (hb_deref (v))) template <typename T, hb_enable_if (std::is_integral<T>::value)> constexpr auto @@ -435,23 +447,29 @@ struct private: template <typename T1, typename T2> auto - impl (T1&& v1, T2 &&v2, hb_priority<2>) const HB_AUTO_RETURN + impl (T1&& v1, T2 &&v2, hb_priority<3>) const HB_AUTO_RETURN ( std::forward<T2> (v2).cmp (std::forward<T1> (v1)) == 0 ) template <typename T1, typename T2> auto - impl (T1&& v1, T2 &&v2, hb_priority<1>) const HB_AUTO_RETURN + impl (T1&& v1, T2 &&v2, hb_priority<2>) const HB_AUTO_RETURN ( std::forward<T1> (v1).cmp (std::forward<T2> (v2)) == 0 ) template <typename T1, typename T2> auto - impl (T1&& v1, T2 &&v2, hb_priority<0>) const HB_AUTO_RETURN + impl (T1&& v1, T2 &&v2, hb_priority<1>) const HB_AUTO_RETURN ( std::forward<T1> (v1) == std::forward<T2> (v2) ) + template <typename T1, typename T2> auto + impl (T1&& v1, T2 &&v2, hb_priority<0>) const HB_AUTO_RETURN + ( + std::forward<T2> (v2) == std::forward<T1> (v1) + ) + public: template <typename T1, typename T2> auto @@ -472,6 +490,10 @@ struct hb_pair_t typedef T2 second_t; typedef hb_pair_t<T1, T2> pair_t; + template <typename U1 = T1, typename U2 = T2, + hb_enable_if (std::is_default_constructible<U1>::value && + std::is_default_constructible<U2>::value)> + hb_pair_t () : first (), second () {} hb_pair_t (T1 a, T2 b) : first (a), second (b) {} template <typename Q1, typename Q2, @@ -870,7 +892,7 @@ hb_bsearch_impl (unsigned *pos, /* Out */ #pragma GCC diagnostic ignored "-Wcast-align" V* p = (V*) (((const char *) base) + (mid * stride)); #pragma GCC diagnostic pop - int c = compar ((const void *) hb_addressof (key), (const void *) p, ds...); + int c = compar ((const void *) std::addressof (key), (const void *) p, ds...); if (c < 0) max = mid - 1; else if (c > 0) diff --git a/thirdparty/harfbuzz/src/hb-array.hh b/thirdparty/harfbuzz/src/hb-array.hh index 0beffb078f..1d1476d7cd 100644 --- a/thirdparty/harfbuzz/src/hb-array.hh +++ b/thirdparty/harfbuzz/src/hb-array.hh @@ -412,7 +412,7 @@ bool hb_array_t<T>::operator == (const hb_array_t<T> &o) const return true; } -/* TODO Specialize opeator== for hb_bytes_t and hb_ubytes_t. */ +/* TODO Specialize operator== for hb_bytes_t and hb_ubytes_t. */ template <> inline uint32_t hb_array_t<const char>::hash () const { diff --git a/thirdparty/harfbuzz/src/hb-bimap.hh b/thirdparty/harfbuzz/src/hb-bimap.hh index d466af8b60..a9e1278de7 100644 --- a/thirdparty/harfbuzz/src/hb-bimap.hh +++ b/thirdparty/harfbuzz/src/hb-bimap.hh @@ -33,20 +33,6 @@ /* Bi-directional map */ struct hb_bimap_t { - /* XXX(remove) */ - void init () - { - forw_map.init (); - back_map.init (); - } - - /* XXX(remove) */ - void fini () - { - forw_map.fini (); - back_map.fini (); - } - void reset () { forw_map.reset (); diff --git a/thirdparty/harfbuzz/src/hb-buffer.cc b/thirdparty/harfbuzz/src/hb-buffer.cc index be3161a54d..e50afcb203 100644 --- a/thirdparty/harfbuzz/src/hb-buffer.cc +++ b/thirdparty/harfbuzz/src/hb-buffer.cc @@ -86,7 +86,46 @@ hb_segment_properties_hash (const hb_segment_properties_t *p) (intptr_t) (p->language); } +/** + * hb_segment_properties_overlay: + * @p: #hb_segment_properties_t to fill in. + * @src: #hb_segment_properties_t to fill in from. + * + * Fills in missing fields of @p from @src in a considered manner. + * + * First, if @p does not have direction set, direction is copied from @src. + * + * Next, if @p and @src have the same direction (which can be unset), if @p + * does not have script set, script is copied from @src. + * + * Finally, if @p and @src have the same direction and script (which either + * can be unset), if @p does not have language set, language is copied from + * @src. + * + * Since: 3.3.0 + **/ +void +hb_segment_properties_overlay (hb_segment_properties_t *p, + const hb_segment_properties_t *src) +{ + if (unlikely (!p || !src)) + return; + if (!p->direction) + p->direction = src->direction; + + if (p->direction != src->direction) + return; + + if (!p->script) + p->script = src->script; + + if (p->script != src->script) + return; + + if (!p->language) + p->language = src->language; +} /* Here is how the buffer works internally: * @@ -96,14 +135,14 @@ hb_segment_properties_hash (const hb_segment_properties_t *p) * As an optimization, both info and out_info may point to the * same piece of memory, which is owned by info. This remains the * case as long as out_len doesn't exceed i at any time. - * In that case, swap_buffers() is mostly no-op and the glyph operations + * In that case, sync() is mostly no-op and the glyph operations * operate mostly in-place. * * As soon as out_info gets longer than info, out_info is moved over * to an alternate buffer (which we reuse the pos buffer for), and its * current contents (out_len entries) are copied to the new place. * - * This should all remain transparent to the user. swap_buffers() then + * This should all remain transparent to the user. sync() then * switches info over to out_info and does housekeeping. */ @@ -217,11 +256,24 @@ hb_buffer_t::get_scratch_buffer (unsigned int *size) /* HarfBuzz-Internal API */ void +hb_buffer_t::similar (const hb_buffer_t &src) +{ + hb_unicode_funcs_destroy (unicode); + unicode = hb_unicode_funcs_reference (src.unicode); + flags = src.flags; + cluster_level = src.cluster_level; + replacement = src.invisible; + invisible = src.invisible; + not_found = src.not_found; +} + +void hb_buffer_t::reset () { hb_unicode_funcs_destroy (unicode); unicode = hb_unicode_funcs_reference (hb_unicode_funcs_get_default ()); flags = HB_BUFFER_FLAG_DEFAULT; + cluster_level = HB_BUFFER_CLUSTER_LEVEL_DEFAULT; replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT; invisible = 0; not_found = 0; @@ -232,11 +284,10 @@ hb_buffer_t::reset () void hb_buffer_t::clear () { + content_type = HB_BUFFER_CONTENT_TYPE_INVALID; hb_segment_properties_t default_props = HB_SEGMENT_PROPERTIES_DEFAULT; props = default_props; - scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT; - content_type = HB_BUFFER_CONTENT_TYPE_INVALID; successful = true; have_output = false; have_positions = false; @@ -244,16 +295,44 @@ hb_buffer_t::clear () idx = 0; len = 0; out_len = 0; - out_info = info; - serial = 0; + out_info = info; memset (context, 0, sizeof context); memset (context_len, 0, sizeof context_len); deallocate_var_all (); + serial = 0; + scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT; +} + +void +hb_buffer_t::enter () +{ + deallocate_var_all (); + serial = 0; + scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT; + if (likely (!hb_unsigned_mul_overflows (len, HB_BUFFER_MAX_LEN_FACTOR))) + { + max_len = hb_max (len * HB_BUFFER_MAX_LEN_FACTOR, + (unsigned) HB_BUFFER_MAX_LEN_MIN); + } + if (likely (!hb_unsigned_mul_overflows (len, HB_BUFFER_MAX_OPS_FACTOR))) + { + max_ops = hb_max (len * HB_BUFFER_MAX_OPS_FACTOR, + (unsigned) HB_BUFFER_MAX_OPS_MIN); + } +} +void +hb_buffer_t::leave () +{ + max_len = HB_BUFFER_MAX_LEN_DEFAULT; + max_ops = HB_BUFFER_MAX_OPS_DEFAULT; + deallocate_var_all (); + serial = 0; } + void hb_buffer_t::add (hb_codepoint_t codepoint, unsigned int cluster) @@ -307,7 +386,7 @@ hb_buffer_t::clear_positions () } void -hb_buffer_t::swap_buffers () +hb_buffer_t::sync () { assert (have_output); @@ -494,33 +573,6 @@ done: } void -hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end) -{ - unsigned int cluster = UINT_MAX; - cluster = _infos_find_min_cluster (info, start, end, cluster); - _unsafe_to_break_set_mask (info, start, end, cluster); -} -void -hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end) -{ - if (!have_output) - { - unsafe_to_break_impl (start, end); - return; - } - - assert (start <= out_len); - assert (idx <= end); - - unsigned int cluster = UINT_MAX; - cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); - cluster = _infos_find_min_cluster (info, idx, end, cluster); - - _unsafe_to_break_set_mask (out_info, start, out_len, cluster); - _unsafe_to_break_set_mask (info, idx, end, cluster); -} - -void hb_buffer_t::guess_segment_properties () { assert_unicode (); @@ -565,12 +617,11 @@ DEFINE_NULL_INSTANCE (hb_buffer_t) = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT, 0, /* invisible */ 0, /* not_found */ - HB_BUFFER_SCRATCH_FLAG_DEFAULT, - HB_BUFFER_MAX_LEN_DEFAULT, - HB_BUFFER_MAX_OPS_DEFAULT, + HB_BUFFER_CONTENT_TYPE_INVALID, HB_SEGMENT_PROPERTIES_DEFAULT, + false, /* successful */ false, /* have_output */ true /* have_positions */ @@ -610,6 +661,46 @@ hb_buffer_create () } /** + * hb_buffer_create_similar: + * @src: An #hb_buffer_t + * + * Creates a new #hb_buffer_t, similar to hb_buffer_create(). The only + * difference is that the buffer is configured similarly to @src. + * + * Return value: (transfer full): + * A newly allocated #hb_buffer_t, similar to hb_buffer_create(). + * + * Since: 3.3.0 + **/ +hb_buffer_t * +hb_buffer_create_similar (const hb_buffer_t *src) +{ + hb_buffer_t *buffer = hb_buffer_create (); + + buffer->similar (*src); + + return buffer; +} + +/** + * hb_buffer_reset: + * @buffer: An #hb_buffer_t + * + * Resets the buffer to its initial status, as if it was just newly created + * with hb_buffer_create(). + * + * Since: 0.9.2 + **/ +void +hb_buffer_reset (hb_buffer_t *buffer) +{ + if (unlikely (hb_object_is_immutable (buffer))) + return; + + buffer->reset (); +} + +/** * hb_buffer_get_empty: * * Fetches an empty #hb_buffer_t. @@ -1157,24 +1248,6 @@ hb_buffer_get_not_found_glyph (hb_buffer_t *buffer) /** - * hb_buffer_reset: - * @buffer: An #hb_buffer_t - * - * Resets the buffer to its initial status, as if it was just newly created - * with hb_buffer_create(). - * - * Since: 0.9.2 - **/ -void -hb_buffer_reset (hb_buffer_t *buffer) -{ - if (unlikely (hb_object_is_immutable (buffer))) - return; - - buffer->reset (); -} - -/** * hb_buffer_clear_contents: * @buffer: An #hb_buffer_t * @@ -1749,6 +1822,8 @@ hb_buffer_append (hb_buffer_t *buffer, if (!buffer->have_positions && source->have_positions) buffer->clear_positions (); + hb_segment_properties_overlay (&buffer->props, &source->props); + memcpy (buffer->info + orig_len, source->info + start, (end - start) * sizeof (buffer->info[0])); if (buffer->have_positions) memcpy (buffer->pos + orig_len, source->pos + start, (end - start) * sizeof (buffer->pos[0])); diff --git a/thirdparty/harfbuzz/src/hb-buffer.h b/thirdparty/harfbuzz/src/hb-buffer.h index a183cb9d4a..9fbd7b1ec3 100644 --- a/thirdparty/harfbuzz/src/hb-buffer.h +++ b/thirdparty/harfbuzz/src/hb-buffer.h @@ -76,18 +76,68 @@ typedef struct hb_glyph_info_t { * @HB_GLYPH_FLAG_UNSAFE_TO_BREAK: Indicates that if input text is broken at the * beginning of the cluster this glyph is part of, * then both sides need to be re-shaped, as the - * result might be different. On the flip side, - * it means that when this flag is not present, - * then it's safe to break the glyph-run at the - * beginning of this cluster, and the two sides - * represent the exact same result one would get - * if breaking input text at the beginning of - * this cluster and shaping the two sides - * separately. This can be used to optimize - * paragraph layout, by avoiding re-shaping - * of each line after line-breaking, or limiting - * the reshaping to a small piece around the - * breaking point only. + * result might be different. + * On the flip side, it means that when this + * flag is not present, then it is safe to break + * the glyph-run at the beginning of this + * cluster, and the two sides will represent the + * exact same result one would get if breaking + * input text at the beginning of this cluster + * and shaping the two sides separately. + * This can be used to optimize paragraph + * layout, by avoiding re-shaping of each line + * after line-breaking. + * @HB_GLYPH_FLAG_UNSAFE_TO_CONCAT: Indicates that if input text is changed on one + * side of the beginning of the cluster this glyph + * is part of, then the shaping results for the + * other side might change. + * Note that the absence of this flag will NOT by + * itself mean that it IS safe to concat text. + * Only two pieces of text both of which clear of + * this flag can be concatenated safely. + * This can be used to optimize paragraph + * layout, by avoiding re-shaping of each line + * after line-breaking, by limiting the + * reshaping to a small piece around the + * breaking positin only, even if the breaking + * position carries the + * #HB_GLYPH_FLAG_UNSAFE_TO_BREAK or when + * hyphenation or other text transformation + * happens at line-break position, in the following + * way: + * 1. Iterate back from the line-break position + * until the first cluster start position that is + * NOT unsafe-to-concat, 2. shape the segment from + * there till the end of line, 3. check whether the + * resulting glyph-run also is clear of the + * unsafe-to-concat at its start-of-text position; + * if it is, just splice it into place and the line + * is shaped; If not, move on to a position further + * back that is clear of unsafe-to-concat and retry + * from there, and repeat. + * At the start of next line a similar algorithm can + * be implemented. That is: 1. Iterate forward from + * the line-break position untill the first cluster + * start position that is NOT unsafe-to-concat, 2. + * shape the segment from beginning of the line to + * that position, 3. check whether the resulting + * glyph-run also is clear of the unsafe-to-concat + * at its end-of-text position; if it is, just splice + * it into place and the beginning is shaped; If not, + * move on to a position further forward that is clear + * of unsafe-to-concat and retry up to there, and repeat. + * A slight complication will arise in the + * implementation of the algorithm above, + * because while our buffer API has a way to + * return flags for position corresponding to + * start-of-text, there is currently no position + * corresponding to end-of-text. This limitation + * can be alleviated by shaping more text than needed + * and looking for unsafe-to-concat flag within text + * clusters. + * The #HB_GLYPH_FLAG_UNSAFE_TO_BREAK flag will + * always imply this flag. + * Since: 3.3.0 * @HB_GLYPH_FLAG_DEFINED: All the currently defined flags. * * Flags for #hb_glyph_info_t. @@ -96,8 +146,9 @@ typedef struct hb_glyph_info_t { */ typedef enum { /*< flags >*/ HB_GLYPH_FLAG_UNSAFE_TO_BREAK = 0x00000001, + HB_GLYPH_FLAG_UNSAFE_TO_CONCAT = 0x00000002, - HB_GLYPH_FLAG_DEFINED = 0x00000001 /* OR of all defined flags */ + HB_GLYPH_FLAG_DEFINED = 0x00000003 /* OR of all defined flags */ } hb_glyph_flags_t; HB_EXTERN hb_glyph_flags_t @@ -170,6 +221,9 @@ hb_segment_properties_equal (const hb_segment_properties_t *a, HB_EXTERN unsigned int hb_segment_properties_hash (const hb_segment_properties_t *p); +HB_EXTERN void +hb_segment_properties_overlay (hb_segment_properties_t *p, + const hb_segment_properties_t *src); /** @@ -185,6 +239,13 @@ HB_EXTERN hb_buffer_t * hb_buffer_create (void); HB_EXTERN hb_buffer_t * +hb_buffer_create_similar (const hb_buffer_t *src); + +HB_EXTERN void +hb_buffer_reset (hb_buffer_t *buffer); + + +HB_EXTERN hb_buffer_t * hb_buffer_get_empty (void); HB_EXTERN hb_buffer_t * @@ -391,8 +452,9 @@ HB_EXTERN hb_codepoint_t hb_buffer_get_not_found_glyph (hb_buffer_t *buffer); -HB_EXTERN void -hb_buffer_reset (hb_buffer_t *buffer); +/* + * Content API. + */ HB_EXTERN void hb_buffer_clear_contents (hb_buffer_t *buffer); diff --git a/thirdparty/harfbuzz/src/hb-buffer.hh b/thirdparty/harfbuzz/src/hb-buffer.hh index 0f8140f1b3..ac45f090a5 100644 --- a/thirdparty/harfbuzz/src/hb-buffer.hh +++ b/thirdparty/harfbuzz/src/hb-buffer.hh @@ -67,8 +67,8 @@ enum hb_buffer_scratch_flags_t { HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES = 0x00000002u, HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u, HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u, - HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u, - HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000020u, + HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000010u, + HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS = 0x00000020u, /* Reserved for complex shapers' internal use. */ HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u, @@ -87,18 +87,21 @@ struct hb_buffer_t { hb_object_header_t header; - /* Information about how the text in the buffer should be treated */ + /* + * Information about how the text in the buffer should be treated. + */ + hb_unicode_funcs_t *unicode; /* Unicode functions */ hb_buffer_flags_t flags; /* BOT / EOT / etc. */ hb_buffer_cluster_level_t cluster_level; hb_codepoint_t replacement; /* U+FFFD or something else. */ hb_codepoint_t invisible; /* 0 or something else. */ hb_codepoint_t not_found; /* 0 or something else. */ - hb_buffer_scratch_flags_t scratch_flags; /* Have space-fallback, etc. */ - unsigned int max_len; /* Maximum allowed len. */ - int max_ops; /* Maximum allowed operations. */ - /* Buffer contents */ + /* + * Buffer contents + */ + hb_buffer_content_type_t content_type; hb_segment_properties_t props; /* Script, language, direction */ @@ -115,8 +118,6 @@ struct hb_buffer_t hb_glyph_info_t *out_info; hb_glyph_position_t *pos; - unsigned int serial; - /* Text before / after the main buffer contents. * Always in Unicode, and ordered outward. * Index 0 is for "pre-context", 1 for "post-context". */ @@ -124,7 +125,25 @@ struct hb_buffer_t hb_codepoint_t context[2][CONTEXT_LENGTH]; unsigned int context_len[2]; - /* Debugging API */ + + /* + * Managed by enter / leave + */ + +#ifndef HB_NDEBUG + uint8_t allocated_var_bits; +#endif + uint8_t serial; + hb_buffer_scratch_flags_t scratch_flags; /* Have space-fallback, etc. */ + unsigned int max_len; /* Maximum allowed len. */ + int max_ops; /* Maximum allowed operations. */ + /* The bits here reflect current allocations of the bytes in glyph_info_t's var1 and var2. */ + + + /* + * Messaging callback + */ + #ifndef HB_NO_BUFFER_MESSAGE hb_buffer_message_func_t message_func; void *message_data; @@ -134,11 +153,6 @@ struct hb_buffer_t static constexpr unsigned message_depth = 0u; #endif - /* Internal debugging. */ - /* The bits here reflect current allocations of the bytes in glyph_info_t's var1 and var2. */ -#ifndef HB_NDEBUG - uint8_t allocated_var_bits; -#endif /* Methods */ @@ -190,12 +204,17 @@ struct hb_buffer_t hb_glyph_info_t &prev () { return out_info[out_len ? out_len - 1 : 0]; } hb_glyph_info_t prev () const { return out_info[out_len ? out_len - 1 : 0]; } + HB_INTERNAL void similar (const hb_buffer_t &src); HB_INTERNAL void reset (); HB_INTERNAL void clear (); + /* Called around shape() */ + HB_INTERNAL void enter (); + HB_INTERNAL void leave (); + unsigned int backtrack_len () const { return have_output ? out_len : idx; } unsigned int lookahead_len () const { return len - idx; } - unsigned int next_serial () { return serial++; } + uint8_t next_serial () { return ++serial ? serial : ++serial; } HB_INTERNAL void add (hb_codepoint_t codepoint, unsigned int cluster); @@ -252,7 +271,7 @@ struct hb_buffer_t HB_INTERNAL void guess_segment_properties (); - HB_INTERNAL void swap_buffers (); + HB_INTERNAL void sync (); HB_INTERNAL void clear_output (); HB_INTERNAL void clear_positions (); @@ -366,15 +385,83 @@ struct hb_buffer_t /* Merge clusters for deleting current glyph, and skip it. */ HB_INTERNAL void delete_glyph (); - void unsafe_to_break (unsigned int start, - unsigned int end) + + /* Adds glyph flags in mask to infos with clusters between start and end. + * The start index will be from out-buffer if from_out_buffer is true. + * If interior is true, then the cluster having the minimum value is skipped. */ + void _set_glyph_flags (hb_mask_t mask, + unsigned start = 0, + unsigned end = (unsigned) -1, + bool interior = false, + bool from_out_buffer = false) { - if (end - start < 2) + end = hb_min (end, len); + + if (interior && !from_out_buffer && end - start < 2) return; - unsafe_to_break_impl (start, end); + + scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; + + if (!from_out_buffer || !have_output) + { + if (!interior) + { + for (unsigned i = start; i < end; i++) + info[i].mask |= mask; + } + else + { + unsigned cluster = _infos_find_min_cluster (info, start, end); + _infos_set_glyph_flags (info, start, end, cluster, mask); + } + } + else + { + assert (start <= out_len); + assert (idx <= end); + + if (!interior) + { + for (unsigned i = start; i < out_len; i++) + out_info[i].mask |= mask; + for (unsigned i = idx; i < end; i++) + info[i].mask |= mask; + } + else + { + unsigned cluster = _infos_find_min_cluster (info, idx, end); + cluster = _infos_find_min_cluster (out_info, start, out_len, cluster); + + _infos_set_glyph_flags (out_info, start, out_len, cluster, mask); + _infos_set_glyph_flags (info, idx, end, cluster, mask); + } + } + } + + void unsafe_to_break (unsigned int start = 0, unsigned int end = -1) + { + _set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, + true); + } + void unsafe_to_concat (unsigned int start = 0, unsigned int end = -1) + { + _set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, + true); + } + void unsafe_to_break_from_outbuffer (unsigned int start = 0, unsigned int end = -1) + { + _set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK | HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, + true, true); + } + void unsafe_to_concat_from_outbuffer (unsigned int start = 0, unsigned int end = -1) + { + _set_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_CONCAT, + start, end, + false, true); } - HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end); - HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end); /* Internal methods */ @@ -465,36 +552,31 @@ struct hb_buffer_t set_cluster (hb_glyph_info_t &inf, unsigned int cluster, unsigned int mask = 0) { if (inf.cluster != cluster) - { - if (mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) - inf.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - else - inf.mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - } + inf.mask = (inf.mask & ~HB_GLYPH_FLAG_DEFINED) | (mask & HB_GLYPH_FLAG_DEFINED); inf.cluster = cluster; } - + void + _infos_set_glyph_flags (hb_glyph_info_t *infos, + unsigned int start, unsigned int end, + unsigned int cluster, + hb_mask_t mask) + { + for (unsigned int i = start; i < end; i++) + if (cluster != infos[i].cluster) + { + scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS; + infos[i].mask |= mask; + } + } static unsigned _infos_find_min_cluster (const hb_glyph_info_t *infos, unsigned start, unsigned end, - unsigned cluster) + unsigned cluster = UINT_MAX) { for (unsigned int i = start; i < end; i++) cluster = hb_min (cluster, infos[i].cluster); return cluster; } - void - _unsafe_to_break_set_mask (hb_glyph_info_t *infos, - unsigned int start, unsigned int end, - unsigned int cluster) - { - for (unsigned int i = start; i < end; i++) - if (cluster != infos[i].cluster) - { - scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK; - infos[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - } - } void clear_glyph_flags (hb_mask_t mask = 0) { diff --git a/thirdparty/harfbuzz/src/hb-cff-interp-common.hh b/thirdparty/harfbuzz/src/hb-cff-interp-common.hh index c251e2d0ed..641de0eff2 100644 --- a/thirdparty/harfbuzz/src/hb-cff-interp-common.hh +++ b/thirdparty/harfbuzz/src/hb-cff-interp-common.hh @@ -217,9 +217,6 @@ inline unsigned int OpCode_Size (op_code_t op) { return Is_OpCode_ESC (op) ? 2: struct number_t { - void init () { set_real (0.0); } - void fini () {} - void set_int (int v) { value = v; } int to_int () const { return value; } @@ -245,7 +242,7 @@ struct number_t } protected: - double value; + double value = 0.; }; /* byte string */ @@ -380,10 +377,8 @@ struct cff_stack_t count = 0; elements.init (); elements.resize (kSizeLimit); - for (unsigned int i = 0; i < elements.length; i++) - elements[i].init (); } - void fini () { elements.fini_deep (); } + void fini () { elements.fini (); } ELEM& operator [] (unsigned int i) { @@ -523,9 +518,6 @@ struct arg_stack_t : cff_stack_t<ARG, 513> /* an operator prefixed by its operands in a byte string */ struct op_str_t { - void init () {} - void fini () {} - op_code_t op; byte_str_t str; }; @@ -553,7 +545,7 @@ struct parsed_values_t opStart = 0; values.init (); } - void fini () { values.fini_deep (); } + void fini () { values.fini (); } void add_op (op_code_t op, const byte_str_ref_t& str_ref = byte_str_ref_t ()) { diff --git a/thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh b/thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh index 52d778ffe2..ef299369b5 100644 --- a/thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh +++ b/thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh @@ -94,12 +94,6 @@ struct biased_subrs_t struct point_t { - void init () - { - x.init (); - y.init (); - } - void set_int (int _x, int _y) { x.set_int (_x); @@ -128,7 +122,7 @@ struct cs_interp_env_t : interp_env_t<ARG> hstem_count = 0; vstem_count = 0; hintmask_size = 0; - pt.init (); + pt.set_int (0, 0); callStack.init (); globalSubrs.init (globalSubrs_); localSubrs.init (localSubrs_); @@ -841,7 +835,6 @@ struct path_procs_t if (likely (env.argStack.get_count () == 11)) { point_t d; - d.init (); for (unsigned int i = 0; i < 10; i += 2) d.move (env.eval_arg (i), env.eval_arg (i+1)); diff --git a/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh b/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh index d961566447..766183760e 100644 --- a/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh +++ b/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh @@ -35,18 +35,6 @@ using namespace OT; struct blend_arg_t : number_t { - void init () - { - number_t::init (); - deltas.init (); - } - - void fini () - { - number_t::fini (); - deltas.fini_deep (); - } - void set_int (int v) { reset_blends (); number_t::set_int (v); } void set_fixed (int32_t v) { reset_blends (); number_t::set_fixed (v); } void set_real (double v) { reset_blends (); number_t::set_real (v); } @@ -202,7 +190,7 @@ struct cff2_cs_opset_t : cs_opset_t<blend_arg_t, OPSET, cff2_cs_interp_env_t, PA switch (op) { case OpCode_callsubr: case OpCode_callgsubr: - /* a subroutine number shoudln't be a blended value */ + /* a subroutine number shouldn't be a blended value */ if (unlikely (env.argStack.peek ().blending ())) { env.set_error (); diff --git a/thirdparty/harfbuzz/src/hb-common.cc b/thirdparty/harfbuzz/src/hb-common.cc index 26c8ad0f49..249a8a8010 100644 --- a/thirdparty/harfbuzz/src/hb-common.cc +++ b/thirdparty/harfbuzz/src/hb-common.cc @@ -29,10 +29,31 @@ #include "hb.hh" #include "hb-machinery.hh" +#if !defined(HB_NO_SETLOCALE) && (!defined(HAVE_NEWLOCALE) || !defined(HAVE_USELOCALE)) +#define HB_NO_SETLOCALE 1 +#endif + +#ifndef HB_NO_SETLOCALE + #include <locale.h> +#ifdef HAVE_XLOCALE_H +#include <xlocale.h> // Needed on BSD/OS X for uselocale +#endif + +#ifdef WIN32 +#define hb_locale_t _locale_t +#else +#define hb_locale_t locale_t +#endif +#define hb_setlocale setlocale +#define hb_uselocale uselocale + +#else + +#define hb_locale_t void * +#define hb_setlocale(Category, Locale) "C" +#define hb_uselocale(Locale) ((hb_locale_t) 0) -#ifdef HB_NO_SETLOCALE -#define setlocale(Category, Locale) "C" #endif /** @@ -122,7 +143,7 @@ hb_tag_from_string (const char *str, int len) * @tag: #hb_tag_t to convert * @buf: (out caller-allocates) (array fixed-size=4) (element-type uint8_t): Converted string * - * Converts an #hb_tag_t to a string and returns it in @buf. + * Converts an #hb_tag_t to a string and returns it in @buf. * Strings will be four characters long. * * Since: 0.9.5 @@ -151,13 +172,13 @@ const char direction_strings[][4] = { * @str: (array length=len) (element-type uint8_t): String to convert * @len: Length of @str, or -1 if it is %NULL-terminated * - * Converts a string to an #hb_direction_t. + * Converts a string to an #hb_direction_t. * * Matching is loose and applies only to the first letter. For * examples, "LTR" and "left-to-right" will both return #HB_DIRECTION_LTR. * * Unmatched strings will return #HB_DIRECTION_INVALID. - * + * * Return value: The #hb_direction_t matching @str * * Since: 0.9.2 @@ -413,7 +434,7 @@ hb_language_get_default () hb_language_t language = default_language; if (unlikely (language == HB_LANGUAGE_INVALID)) { - language = hb_language_from_string (setlocale (LC_CTYPE, nullptr), -1); + language = hb_language_from_string (hb_setlocale (LC_CTYPE, nullptr), -1); (void) default_language.cmpexch (HB_LANGUAGE_INVALID, language); } @@ -1039,6 +1060,47 @@ hb_variation_from_string (const char *str, int len, return false; } +#ifndef HB_NO_SETLOCALE + +static inline void free_static_C_locale (); + +static struct hb_C_locale_lazy_loader_t : hb_lazy_loader_t<hb_remove_pointer<hb_locale_t>, + hb_C_locale_lazy_loader_t> +{ + static hb_locale_t create () + { + hb_locale_t l = newlocale (LC_ALL_MASK, "C", NULL); + if (!l) + return l; + + hb_atexit (free_static_C_locale); + + return l; + } + static void destroy (hb_locale_t l) + { + freelocale (l); + } + static hb_locale_t get_null () + { + return (hb_locale_t) 0; + } +} static_C_locale; + +static inline +void free_static_C_locale () +{ + static_C_locale.free_instance (); +} + +static hb_locale_t +get_C_locale () +{ + return static_C_locale.get_unconst (); +} + +#endif + /** * hb_variation_to_string: * @variation: an #hb_variation_t to convert @@ -1064,7 +1126,11 @@ hb_variation_to_string (hb_variation_t *variation, while (len && s[len - 1] == ' ') len--; s[len++] = '='; + + hb_locale_t oldlocale HB_UNUSED; + oldlocale = hb_uselocale (get_C_locale ()); len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%g", (double) variation->value)); + (void) hb_uselocale (oldlocale); assert (len < ARRAY_LENGTH (s)); len = hb_min (len, size - 1); diff --git a/thirdparty/harfbuzz/src/hb-coretext.cc b/thirdparty/harfbuzz/src/hb-coretext.cc index a512f3b8b7..5f383064c4 100644 --- a/thirdparty/harfbuzz/src/hb-coretext.cc +++ b/thirdparty/harfbuzz/src/hb-coretext.cc @@ -481,8 +481,8 @@ struct active_feature_t { a->rec.setting < b->rec.setting ? -1 : a->rec.setting > b->rec.setting ? 1 : 0; } - bool operator== (const active_feature_t *f) { - return cmp (this, f) == 0; + bool operator== (const active_feature_t& f) const { + return cmp (this, &f) == 0; } }; @@ -677,7 +677,7 @@ _hb_coretext_shape (hb_shape_plan_t *shape_plan, { active_features.push (event->feature); } else { - active_feature_t *feature = active_features.find (&event->feature); + active_feature_t *feature = active_features.lsearch (event->feature); if (feature) active_features.remove (feature - active_features.arrayZ); } @@ -1213,7 +1213,8 @@ resize_and_retry: } } - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); #undef FAIL diff --git a/thirdparty/harfbuzz/src/hb-directwrite.cc b/thirdparty/harfbuzz/src/hb-directwrite.cc index dea87b8cd0..f177ff31c0 100644 --- a/thirdparty/harfbuzz/src/hb-directwrite.cc +++ b/thirdparty/harfbuzz/src/hb-directwrite.cc @@ -762,7 +762,8 @@ retry_getglyphs: if (isRightToLeft) hb_buffer_reverse (buffer); - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); delete [] clusterMap; delete [] glyphIndices; diff --git a/thirdparty/harfbuzz/src/hb-draw.h b/thirdparty/harfbuzz/src/hb-draw.h index bddc876399..f82cc34842 100644 --- a/thirdparty/harfbuzz/src/hb-draw.h +++ b/thirdparty/harfbuzz/src/hb-draw.h @@ -50,7 +50,7 @@ typedef void (*hb_draw_close_path_func_t) (void *user_data); * * Glyph draw callbacks. * - * _move_to, _line_to and _cubic_to calls are nessecary to be defined but we + * _move_to, _line_to and _cubic_to calls are necessary to be defined but we * translate _quadratic_to calls to _cubic_to if the callback isn't defined. * * Since: EXPERIMENTAL diff --git a/thirdparty/harfbuzz/src/hb-face.cc b/thirdparty/harfbuzz/src/hb-face.cc index 2c0087370c..5365598636 100644 --- a/thirdparty/harfbuzz/src/hb-face.cc +++ b/thirdparty/harfbuzz/src/hb-face.cc @@ -143,7 +143,7 @@ hb_face_create_for_tables (hb_reference_table_func_t reference_table_func, typedef struct hb_face_for_data_closure_t { hb_blob_t *blob; - unsigned int index; + uint16_t index; } hb_face_for_data_closure_t; static hb_face_for_data_closure_t * @@ -156,7 +156,7 @@ _hb_face_for_data_closure_create (hb_blob_t *blob, unsigned int index) return nullptr; closure->blob = blob; - closure->index = index; + closure->index = (uint16_t) (index & 0xFFFFu); return closure; } @@ -195,9 +195,19 @@ _hb_face_for_data_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void * @index: The index of the face within @blob * * Constructs a new face object from the specified blob and - * a face index into that blob. This is used for blobs of - * file formats such as Dfont and TTC that can contain more - * than one face. + * a face index into that blob. + * + * The face index is used for blobs of file formats such as TTC and + * and DFont that can contain more than one face. Face indices within + * such collections are zero-based. + * + * <note>Note: If the blob font format is not a collection, @index + * is ignored. Otherwise, only the lower 16-bits of @index are used. + * The unmodified @index can be accessed via hb_face_get_index().</note> + * + * <note>Note: The high 16-bits of @index, if non-zero, are used by + * hb_font_create() to load named-instances in variable fonts. See + * hb_font_create() for details.</note> * * Return value: (transfer full): The new face object * @@ -420,7 +430,8 @@ hb_face_reference_blob (hb_face_t *face) * Assigns the specified face-index to @face. Fails if the * face is immutable. * - * <note>Note: face indices within a collection are zero-based.</note> + * <note>Note: changing the index has no effect on the face itself + * This only changes the value returned by hb_face_get_index().</note> * * Since: 0.9.2 **/ diff --git a/thirdparty/harfbuzz/src/hb-font.cc b/thirdparty/harfbuzz/src/hb-font.cc index fa8da96395..350fcac139 100644 --- a/thirdparty/harfbuzz/src/hb-font.cc +++ b/thirdparty/harfbuzz/src/hb-font.cc @@ -631,7 +631,7 @@ hb_font_funcs_destroy (hb_font_funcs_t *ffuncs) * @destroy: (nullable): A callback to call when @data is not needed anymore * @replace: Whether to replace an existing data with the same key * - * Attaches a user-data key/data pair to the specified font-functions structure. + * Attaches a user-data key/data pair to the specified font-functions structure. * * Return value: %true if success, %false otherwise * @@ -821,7 +821,7 @@ hb_font_get_glyph (hb_font_t *font, * @glyph: (out): The glyph ID retrieved * * Fetches the nominal glyph ID for a Unicode code point in the - * specified font. + * specified font. * * This version of the function should not be used to fetch glyph IDs * for code points modified by variation selectors. For variation-selector @@ -940,7 +940,7 @@ hb_font_get_glyph_v_advance (hb_font_t *font, * @advance_stride: The stride between successive advances * * Fetches the advances for a sequence of glyph IDs in the specified - * font, for horizontal text segments. + * font, for horizontal text segments. * * Since: 1.8.6 **/ @@ -964,7 +964,7 @@ hb_font_get_glyph_h_advances (hb_font_t* font, * @advance_stride: (out): The stride between successive advances * * Fetches the advances for a sequence of glyph IDs in the specified - * font, for vertical text segments. + * font, for vertical text segments. * * Since: 1.8.6 **/ @@ -1278,7 +1278,7 @@ hb_font_get_glyph_origin_for_direction (hb_font_t *font, * @font: #hb_font_t to work upon * @glyph: The glyph ID to query * @direction: The direction of the text segment - * @x: (inout): Input = The original X coordinate + * @x: (inout): Input = The original X coordinate * Output = The X coordinate plus the X-coordinate of the origin * @y: (inout): Input = The original Y coordinate * Output = The Y coordinate plus the Y-coordinate of the origin @@ -1306,7 +1306,7 @@ hb_font_add_glyph_origin_for_direction (hb_font_t *font, * @font: #hb_font_t to work upon * @glyph: The glyph ID to query * @direction: The direction of the text segment - * @x: (inout): Input = The original X coordinate + * @x: (inout): Input = The original X coordinate * Output = The X coordinate minus the X-coordinate of the origin * @y: (inout): Input = The original Y coordinate * Output = The Y coordinate minus the Y-coordinate of the origin @@ -1477,6 +1477,8 @@ DEFINE_NULL_INSTANCE (hb_font_t) = 1000, /* x_scale */ 1000, /* y_scale */ + 0., /* slant */ + 0., /* slant_xy; */ 1<<16, /* x_mult */ 1<<16, /* y_mult */ @@ -1521,6 +1523,13 @@ _hb_font_create (hb_face_t *face) * * Constructs a new font object from the specified face. * + * <note>Note: If @face's index value (as passed to hb_face_create() + * has non-zero top 16-bits, those bits minus one are passed to + * hb_font_set_var_named_instance(), effectively loading a named-instance + * of a variable font, instead of the default-instance. This allows + * specifying which named-instance to load by default when creating the + * face.</note> + * * Return value: (transfer full): The new font object * * Since: 0.9.2 @@ -1535,6 +1544,11 @@ hb_font_create (hb_face_t *face) hb_ot_font_set_funcs (font); #endif +#ifndef HB_NO_VAR + if (face && face->index >> 16) + hb_font_set_var_named_instance (font, (face->index >> 16) - 1); +#endif + return font; } @@ -1578,6 +1592,7 @@ hb_font_create_sub_font (hb_font_t *parent) font->x_scale = parent->x_scale; font->y_scale = parent->y_scale; + font->slant = parent->slant; font->mults_changed (); font->x_ppem = parent->x_ppem; font->y_ppem = parent->y_ppem; @@ -1668,12 +1683,12 @@ hb_font_destroy (hb_font_t *font) /** * hb_font_set_user_data: (skip) * @font: #hb_font_t to work upon - * @key: The user-data key + * @key: The user-data key * @data: A pointer to the user data * @destroy: (nullable): A callback to call when @data is not needed anymore * @replace: Whether to replace an existing data with the same key * - * Attaches a user-data key/data pair to the specified font object. + * Attaches a user-data key/data pair to the specified font object. * * Return value: %true if success, %false otherwise * @@ -1875,7 +1890,7 @@ hb_font_set_funcs (hb_font_t *font, * @font_data: (destroy destroy) (scope notified): Data to attach to @font * @destroy: (nullable): The function to call when @font_data is not needed anymore * - * Replaces the user data attached to a font, updating the font's + * Replaces the user data attached to a font, updating the font's * @destroy callback. * * Since: 0.9.2 @@ -1949,7 +1964,7 @@ hb_font_get_scale (hb_font_t *font, * @x_ppem: Horizontal ppem value to assign * @y_ppem: Vertical ppem value to assign * - * Sets the horizontal and vertical pixels-per-em (ppem) of a font. + * Sets the horizontal and vertical pixels-per-em (ppem) of a font. * * Since: 0.9.2 **/ @@ -1971,7 +1986,7 @@ hb_font_set_ppem (hb_font_t *font, * @x_ppem: (out): Horizontal ppem value * @y_ppem: (out): Vertical ppem value * - * Fetches the horizontal and vertical points-per-em (ppem) of a font. + * Fetches the horizontal and vertical points-per-em (ppem) of a font. * * Since: 0.9.2 **/ @@ -2015,7 +2030,7 @@ hb_font_set_ptem (hb_font_t *font, * * Return value: Point size. A value of zero means "not set." * - * Since: 0.9.2 + * Since: 1.6.0 **/ float hb_font_get_ptem (hb_font_t *font) @@ -2023,6 +2038,49 @@ hb_font_get_ptem (hb_font_t *font) return font->ptem; } +/** + * hb_font_set_synthetic_slant: + * @font: #hb_font_t to work upon + * @slant: synthetic slant value. + * + * Sets the "synthetic slant" of a font. By default is zero. + * Synthetic slant is the graphical skew that the renderer + * applies to the font at rendering time. + * + * HarfBuzz needs to know this value to adjust shaping results, + * metrics, and style values to match the slanted rendering. + * + * <note>Note: The slant value is a ratio. For example, a + * 20% slant would be represented as a 0.2 value.</note> + * + * Since: 3.3.0 + **/ +HB_EXTERN void +hb_font_set_synthetic_slant (hb_font_t *font, float slant) +{ + if (hb_object_is_immutable (font)) + return; + + font->slant = slant; + font->mults_changed (); +} + +/** + * hb_font_get_synthetic_slant: + * @font: #hb_font_t to work upon + * + * Fetches the "synthetic slant" of a font. + * + * Return value: Synthetic slant. By default is zero. + * + * Since: 3.3.0 + **/ +HB_EXTERN float +hb_font_get_synthetic_slant (hb_font_t *font) +{ + return font->slant; +} + #ifndef HB_NO_VAR /* * Variations @@ -2036,6 +2094,10 @@ hb_font_get_ptem (hb_font_t *font) * * Applies a list of font-variation settings to a font. * + * Note that this overrides all existing variations set on @font. + * Axes not included in @variations will be effectively set to their + * default values. + * * Since: 1.4.2 */ void @@ -2091,6 +2153,10 @@ hb_font_set_variations (hb_font_t *font, * Applies a list of variation coordinates (in design-space units) * to a font. * + * Note that this overrides all existing variations set on @font. + * Axes not included in @coords will be effectively set to their + * default values. + * * Since: 1.4.2 */ void @@ -2154,6 +2220,10 @@ hb_font_set_var_named_instance (hb_font_t *font, * Applies a list of variation coordinates (in normalized units) * to a font. * + * Note that this overrides all existing variations set on @font. + * Axes not included in @coords will be effectively set to their + * default values. + * * <note>Note: Coordinates should be normalized to 2.14.</note> * * Since: 1.4.2 @@ -2196,14 +2266,19 @@ hb_font_set_var_coords_normalized (hb_font_t *font, /** * hb_font_get_var_coords_normalized: * @font: #hb_font_t to work upon - * @length: Number of coordinates retrieved + * @length: (out): Number of coordinates retrieved * * Fetches the list of normalized variation coordinates currently * set on a font. * + * Note that this returned array may only contain values for some + * (or none) of the axes; omitted axes effectively have zero values. + * * Return value is valid as long as variation coordinates of the font * are not modified. * + * Return value: coordinates array + * * Since: 1.4.2 */ const int * @@ -2216,18 +2291,24 @@ hb_font_get_var_coords_normalized (hb_font_t *font, return font->coords; } -#ifdef HB_EXPERIMENTAL_API /** * hb_font_get_var_coords_design: * @font: #hb_font_t to work upon - * @length: (out): number of coordinates + * @length: (out): Number of coordinates retrieved + * + * Fetches the list of variation coordinates (in design-space units) currently + * set on a font. + * + * Note that this returned array may only contain values for some + * (or none) of the axes; omitted axes effectively have their default + * values. * * Return value is valid as long as variation coordinates of the font * are not modified. * * Return value: coordinates array * - * Since: EXPERIMENTAL + * Since: 3.3.0 */ const float * hb_font_get_var_coords_design (hb_font_t *font, @@ -2239,7 +2320,6 @@ hb_font_get_var_coords_design (hb_font_t *font, return font->design_coords; } #endif -#endif #ifndef HB_DISABLE_DEPRECATED /* diff --git a/thirdparty/harfbuzz/src/hb-font.h b/thirdparty/harfbuzz/src/hb-font.h index 15dc126523..a3bbb2e37b 100644 --- a/thirdparty/harfbuzz/src/hb-font.h +++ b/thirdparty/harfbuzz/src/hb-font.h @@ -1024,6 +1024,12 @@ HB_EXTERN float hb_font_get_ptem (hb_font_t *font); HB_EXTERN void +hb_font_set_synthetic_slant (hb_font_t *font, float slant); + +HB_EXTERN float +hb_font_get_synthetic_slant (hb_font_t *font); + +HB_EXTERN void hb_font_set_variations (hb_font_t *font, const hb_variation_t *variations, unsigned int variations_length); @@ -1033,11 +1039,9 @@ hb_font_set_var_coords_design (hb_font_t *font, const float *coords, unsigned int coords_length); -#ifdef HB_EXPERIMENTAL_API HB_EXTERN const float * hb_font_get_var_coords_design (hb_font_t *font, unsigned int *length); -#endif HB_EXTERN void hb_font_set_var_coords_normalized (hb_font_t *font, diff --git a/thirdparty/harfbuzz/src/hb-font.hh b/thirdparty/harfbuzz/src/hb-font.hh index 1b7f445e8b..0d73589e8c 100644 --- a/thirdparty/harfbuzz/src/hb-font.hh +++ b/thirdparty/harfbuzz/src/hb-font.hh @@ -109,6 +109,8 @@ struct hb_font_t int32_t x_scale; int32_t y_scale; + float slant; + float slant_xy; int64_t x_mult; int64_t y_mult; @@ -617,6 +619,7 @@ struct hb_font_t signed upem = face->get_upem (); x_mult = ((int64_t) x_scale << 16) / upem; y_mult = ((int64_t) y_scale << 16) / upem; + slant_xy = y_scale ? slant * x_scale / y_scale : 0.f; } hb_position_t em_mult (int16_t v, int64_t mult) diff --git a/thirdparty/harfbuzz/src/hb-graphite2.cc b/thirdparty/harfbuzz/src/hb-graphite2.cc index 42420ac0b0..63dc18b466 100644 --- a/thirdparty/harfbuzz/src/hb-graphite2.cc +++ b/thirdparty/harfbuzz/src/hb-graphite2.cc @@ -439,7 +439,8 @@ _hb_graphite2_shape (hb_shape_plan_t *shape_plan HB_UNUSED, if (feats) gr_featureval_destroy (feats); gr_seg_destroy (seg); - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); return true; } diff --git a/thirdparty/harfbuzz/src/hb-iter.hh b/thirdparty/harfbuzz/src/hb-iter.hh index ad2e45e3c5..43a3098f65 100644 --- a/thirdparty/harfbuzz/src/hb-iter.hh +++ b/thirdparty/harfbuzz/src/hb-iter.hh @@ -90,8 +90,8 @@ struct hb_iter_t * it will be returning pointer to temporary rvalue. * TODO Use a wrapper return type to fix for non-reference type. */ template <typename T = item_t, - hb_enable_if (hb_is_reference (T))> - hb_remove_reference<item_t>* operator -> () const { return hb_addressof (**thiz()); } + hb_enable_if (std::is_reference<T>::value)> + hb_remove_reference<item_t>* operator -> () const { return std::addressof (**thiz()); } item_t operator * () const { return thiz()->__item__ (); } item_t operator * () { return thiz()->__item__ (); } item_t operator [] (unsigned i) const { return thiz()->__item_at__ (i); } @@ -289,7 +289,7 @@ struct hb_is_source_of { private: template <typename Iter2 = Iter, - hb_enable_if (hb_is_convertible (typename Iter2::item_t, hb_add_lvalue_reference<hb_add_const<Item>>))> + hb_enable_if (hb_is_convertible (typename Iter2::item_t, hb_add_lvalue_reference<const Item>))> static hb_true_type impl (hb_priority<2>); template <typename Iter2 = Iter> static auto impl (hb_priority<1>) -> decltype (hb_declval (Iter2) >> hb_declval (Item &), hb_true_type ()); diff --git a/thirdparty/harfbuzz/src/hb-kern.hh b/thirdparty/harfbuzz/src/hb-kern.hh index 3f952fe7fc..9ea945caed 100644 --- a/thirdparty/harfbuzz/src/hb-kern.hh +++ b/thirdparty/harfbuzz/src/hb-kern.hh @@ -49,6 +49,10 @@ struct hb_kern_machine_t hb_mask_t kern_mask, bool scale = true) const { + if (!buffer->message (font, "start kern")) + return; + + buffer->unsafe_to_concat (); OT::hb_ot_apply_context_t c (1, font, buffer); c.set_lookup_mask (kern_mask); c.set_lookup_props (OT::LookupFlag::IgnoreMarks); @@ -67,7 +71,8 @@ struct hb_kern_machine_t } skippy_iter.reset (idx, 1); - if (!skippy_iter.next ()) + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) { idx++; continue; @@ -125,6 +130,8 @@ struct hb_kern_machine_t skip: idx = skippy_iter.idx; } + + (void) buffer->message (font, "end kern"); } const Driver &driver; diff --git a/thirdparty/harfbuzz/src/hb-machinery.hh b/thirdparty/harfbuzz/src/hb-machinery.hh index 010c2570d7..5046ac1933 100644 --- a/thirdparty/harfbuzz/src/hb-machinery.hh +++ b/thirdparty/harfbuzz/src/hb-machinery.hh @@ -244,19 +244,19 @@ struct hb_lazy_loader_t : hb_data_wrapper_t<Data, WheresData> { Stored *p = (Stored *) hb_calloc (1, sizeof (Stored)); if (likely (p)) - p->init (data); + p = new (p) Stored (data); return p; } static Stored *create () { Stored *p = (Stored *) hb_calloc (1, sizeof (Stored)); if (likely (p)) - p->init (); + p = new (p) Stored (); return p; } static void destroy (Stored *p) { - p->fini (); + p->~Stored (); hb_free (p); } diff --git a/thirdparty/harfbuzz/src/hb-map.hh b/thirdparty/harfbuzz/src/hb-map.hh index 793dcf22ca..9341637eac 100644 --- a/thirdparty/harfbuzz/src/hb-map.hh +++ b/thirdparty/harfbuzz/src/hb-map.hh @@ -37,13 +37,10 @@ template <typename K, typename V, typename k_invalid_t = K, typename v_invalid_t = V, - k_invalid_t kINVALID = hb_is_pointer (K) ? 0 : std::is_signed<K>::value ? hb_int_min (K) : (K) -1, - v_invalid_t vINVALID = hb_is_pointer (V) ? 0 : std::is_signed<V>::value ? hb_int_min (V) : (V) -1> + k_invalid_t kINVALID = std::is_pointer<K>::value ? 0 : std::is_signed<K>::value ? hb_int_min (K) : (K) -1, + v_invalid_t vINVALID = std::is_pointer<V>::value ? 0 : std::is_signed<V>::value ? hb_int_min (V) : (V) -1> struct hb_hashmap_t { - static constexpr K INVALID_KEY = kINVALID; - static constexpr V INVALID_VALUE = vINVALID; - hb_hashmap_t () { init (); } ~hb_hashmap_t () { fini (); } @@ -64,24 +61,40 @@ struct hb_hashmap_t hb_copy (o, *this); } - static_assert (std::is_trivially_copyable<K>::value, ""); - static_assert (std::is_trivially_copyable<V>::value, ""); - static_assert (std::is_trivially_destructible<K>::value, ""); - static_assert (std::is_trivially_destructible<V>::value, ""); - struct item_t { K key; V value; uint32_t hash; - void clear () { key = kINVALID; value = vINVALID; hash = 0; } + void clear () + { + new (std::addressof (key)) K (); + key = hb_coerce<K> (kINVALID); + new (std::addressof (value)) V (); + value = hb_coerce<V> (vINVALID); + hash = 0; + } bool operator == (const K &o) { return hb_deref (key) == hb_deref (o); } bool operator == (const item_t &o) { return *this == o.key; } - bool is_unused () const { return key == kINVALID; } - bool is_tombstone () const { return key != kINVALID && value == vINVALID; } - bool is_real () const { return key != kINVALID && value != vINVALID; } + bool is_unused () const + { + const K inv = hb_coerce<K> (kINVALID); + return key == inv; + } + bool is_tombstone () const + { + const K kinv = hb_coerce<K> (kINVALID); + const V vinv = hb_coerce<V> (vINVALID); + return key != kinv && value == vinv; + } + bool is_real () const + { + const K kinv = hb_coerce<K> (kINVALID); + const V vinv = hb_coerce<V> (vINVALID); + return key != kinv && value != vinv; + } hb_pair_t<K, V> get_pair() const { return hb_pair_t<K, V> (key, value); } }; @@ -118,8 +131,13 @@ struct hb_hashmap_t } void fini_shallow () { - hb_free (items); - items = nullptr; + if (likely (items)) { + unsigned size = mask + 1; + for (unsigned i = 0; i < size; i++) + items[i].~item_t (); + hb_free (items); + items = nullptr; + } population = occupancy = 0; } void fini () @@ -163,10 +181,15 @@ struct hb_hashmap_t /* Insert back old items. */ if (old_items) for (unsigned int i = 0; i < old_size; i++) + { if (old_items[i].is_real ()) + { set_with_hash (old_items[i].key, old_items[i].hash, std::move (old_items[i].value)); + } + old_items[i].~item_t (); + } hb_free (old_items); @@ -178,22 +201,22 @@ struct hb_hashmap_t V get (K key) const { - if (unlikely (!items)) return vINVALID; + if (unlikely (!items)) return hb_coerce<V> (vINVALID); unsigned int i = bucket_for (key); - return items[i].is_real () && items[i] == key ? items[i].value : vINVALID; + return items[i].is_real () && items[i] == key ? items[i].value : hb_coerce<V> (vINVALID); } - void del (K key) { set (key, vINVALID); } + void del (K key) { set (key, hb_coerce<V> (vINVALID)); } /* Has interface. */ - static constexpr V SENTINEL = vINVALID; typedef V value_t; value_t operator [] (K k) const { return get (k); } bool has (K k, V *vp = nullptr) const { V v = (*this)[k]; if (vp) *vp = v; - return v != SENTINEL; + const V vinv = hb_coerce<V> (vINVALID); + return v != vinv; } /* Projection. */ V operator () (K k) const { return get (k); } @@ -248,11 +271,13 @@ struct hb_hashmap_t bool set_with_hash (K key, uint32_t hash, VV&& value) { if (unlikely (!successful)) return false; - if (unlikely (key == kINVALID)) return true; + const K kinv = hb_coerce<K> (kINVALID); + if (unlikely (key == kinv)) return true; if (unlikely ((occupancy + occupancy / 2) >= mask && !resize ())) return false; unsigned int i = bucket_for_hash (key, hash); - if (value == vINVALID && items[i].key != key) + const V vinv = hb_coerce<V> (vINVALID); + if (value == vinv && items[i].key != key) return true; /* Trying to delete non-existent key. */ if (!items[i].is_unused ()) diff --git a/thirdparty/harfbuzz/src/hb-meta.hh b/thirdparty/harfbuzz/src/hb-meta.hh index 0ea5774a9f..3fea5d995e 100644 --- a/thirdparty/harfbuzz/src/hb-meta.hh +++ b/thirdparty/harfbuzz/src/hb-meta.hh @@ -29,6 +29,7 @@ #include "hb.hh" +#include <memory> #include <type_traits> #include <utility> @@ -85,30 +86,13 @@ template <> struct hb_priority<0> {}; template <typename T> struct hb_type_identity_t { typedef T type; }; template <typename T> using hb_type_identity = typename hb_type_identity_t<T>::type; -struct -{ - template <typename T> constexpr T* - operator () (T& arg) const - { -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wcast-align" - /* https://en.cppreference.com/w/cpp/memory/addressof */ - return reinterpret_cast<T*> ( - &const_cast<char&> ( - reinterpret_cast<const volatile char&> (arg))); -#pragma GCC diagnostic pop - } -} -HB_FUNCOBJ (hb_addressof); - template <typename T> static inline T hb_declval (); #define hb_declval(T) (hb_declval<T> ()) template <typename T> struct hb_match_const : hb_type_identity_t<T>, hb_false_type {}; template <typename T> struct hb_match_const<const T> : hb_type_identity_t<T>, hb_true_type {}; template <typename T> using hb_remove_const = typename hb_match_const<T>::type; -template <typename T> using hb_add_const = const T; -#define hb_is_const(T) hb_match_const<T>::value + template <typename T> struct hb_match_reference : hb_type_identity_t<T>, hb_false_type {}; template <typename T> struct hb_match_reference<T &> : hb_type_identity_t<T>, hb_true_type {}; template <typename T> struct hb_match_reference<T &&> : hb_type_identity_t<T>, hb_true_type {}; @@ -119,14 +103,13 @@ template <typename T> using hb_add_lvalue_reference = decltype (_hb_try_add_lval template <typename T> auto _hb_try_add_rvalue_reference (hb_priority<1>) -> hb_type_identity<T&&>; template <typename T> auto _hb_try_add_rvalue_reference (hb_priority<0>) -> hb_type_identity<T>; template <typename T> using hb_add_rvalue_reference = decltype (_hb_try_add_rvalue_reference<T> (hb_prioritize)); -#define hb_is_reference(T) hb_match_reference<T>::value + template <typename T> struct hb_match_pointer : hb_type_identity_t<T>, hb_false_type {}; template <typename T> struct hb_match_pointer<T *> : hb_type_identity_t<T>, hb_true_type {}; template <typename T> using hb_remove_pointer = typename hb_match_pointer<T>::type; template <typename T> auto _hb_try_add_pointer (hb_priority<1>) -> hb_type_identity<hb_remove_reference<T>*>; template <typename T> auto _hb_try_add_pointer (hb_priority<1>) -> hb_type_identity<T>; template <typename T> using hb_add_pointer = decltype (_hb_try_add_pointer<T> (hb_prioritize)); -#define hb_is_pointer(T) hb_match_pointer<T>::value /* TODO Add feature-parity to std::decay. */ @@ -137,8 +120,8 @@ template <typename T> using hb_decay = hb_remove_const<hb_remove_reference<T>>; template <typename From, typename To> using hb_is_cr_convertible = hb_bool_constant< hb_is_same (hb_decay<From>, hb_decay<To>) && - (!hb_is_const (From) || hb_is_const (To)) && - (!hb_is_reference (To) || hb_is_const (To) || hb_is_reference (To)) + (!std::is_const<From>::value || std::is_const<To>::value) && + (!std::is_reference<To>::value || std::is_const<To>::value || std::is_reference<To>::value) >; #define hb_is_cr_convertible(From,To) hb_is_cr_convertible<From, To>::value @@ -153,16 +136,6 @@ struct } HB_FUNCOBJ (hb_deref); -struct -{ - template <typename T> constexpr auto - operator () (T&& v) const HB_AUTO_RETURN (std::forward<T> (v)) - - template <typename T> constexpr auto - operator () (T& v) const HB_AUTO_RETURN (hb_addressof (v)) -} -HB_FUNCOBJ (hb_ref); - template <typename T> struct hb_reference_wrapper { @@ -176,7 +149,7 @@ struct hb_reference_wrapper template <typename T> struct hb_reference_wrapper<T&> { - hb_reference_wrapper (T& v) : v (hb_addressof (v)) {} + hb_reference_wrapper (T& v) : v (std::addressof (v)) {} bool operator == (const hb_reference_wrapper& o) const { return v == o.v; } bool operator != (const hb_reference_wrapper& o) const { return v != o.v; } operator T& () const { return *v; } diff --git a/thirdparty/harfbuzz/src/hb-ms-feature-ranges.cc b/thirdparty/harfbuzz/src/hb-ms-feature-ranges.cc deleted file mode 100644 index 6d09b252d8..0000000000 --- a/thirdparty/harfbuzz/src/hb-ms-feature-ranges.cc +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright © 2011,2012,2013 Google, Inc. - * Copyright © 2021 Khaled Hosny - * - * This is part of HarfBuzz, a text shaping library. - * - * Permission is hereby granted, without written agreement and without - * license or royalty fees, to use, copy, modify, and distribute this - * software and its documentation for any purpose, provided that the - * above copyright notice and the following two paragraphs appear in - * all copies of this software. - * - * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR - * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES - * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN - * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * - * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, - * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS - * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO - * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. - * - * Google Author(s): Behdad Esfahbod - */ - -#include "hb-ms-feature-ranges.hh" - -bool -hb_ms_setup_features (const hb_feature_t *features, - unsigned int num_features, - hb_vector_t<hb_ms_feature_t> &feature_records, /* OUT */ - hb_vector_t<hb_ms_range_record_t> &range_records /* OUT */) -{ - feature_records.shrink(0); - range_records.shrink(0); - - /* Sort features by start/end events. */ - hb_vector_t<hb_ms_feature_event_t> feature_events; - for (unsigned int i = 0; i < num_features; i++) - { - hb_ms_active_feature_t feature; - feature.fea.tag_le = hb_uint32_swap (features[i].tag); - feature.fea.value = features[i].value; - feature.order = i; - - hb_ms_feature_event_t *event; - - event = feature_events.push (); - event->index = features[i].start; - event->start = true; - event->feature = feature; - - event = feature_events.push (); - event->index = features[i].end; - event->start = false; - event->feature = feature; - } - feature_events.qsort (); - /* Add a strategic final event. */ - { - hb_ms_active_feature_t feature; - feature.fea.tag_le = 0; - feature.fea.value = 0; - feature.order = num_features + 1; - - auto *event = feature_events.push (); - event->index = 0; /* This value does magic. */ - event->start = false; - event->feature = feature; - } - - /* Scan events and save features for each range. */ - hb_vector_t<hb_ms_active_feature_t> active_features; - unsigned int last_index = 0; - for (unsigned int i = 0; i < feature_events.length; i++) - { - auto *event = &feature_events[i]; - - if (event->index != last_index) - { - /* Save a snapshot of active features and the range. */ - auto *range = range_records.push (); - auto offset = feature_records.length; - - active_features.qsort (); - for (unsigned int j = 0; j < active_features.length; j++) - { - if (!j || active_features[j].fea.tag_le != feature_records[feature_records.length - 1].tag_le) - { - feature_records.push (active_features[j].fea); - } - else - { - /* Overrides value for existing feature. */ - feature_records[feature_records.length - 1].value = active_features[j].fea.value; - } - } - - /* Will convert to pointer after all is ready, since feature_records.array - * may move as we grow it. */ - range->features.features = reinterpret_cast<hb_ms_feature_t *> (offset); - range->features.num_features = feature_records.length - offset; - range->index_first = last_index; - range->index_last = event->index - 1; - - last_index = event->index; - } - - if (event->start) - { - active_features.push (event->feature); - } - else - { - auto *feature = active_features.find (&event->feature); - if (feature) - active_features.remove (feature - active_features.arrayZ); - } - } - - if (!range_records.length) /* No active feature found. */ - num_features = 0; - - /* Fixup the pointers. */ - for (unsigned int i = 0; i < range_records.length; i++) - { - auto *range = &range_records[i]; - range->features.features = (hb_ms_feature_t *) feature_records + reinterpret_cast<uintptr_t> (range->features.features); - } - - return !!num_features; -} - -void -hb_ms_make_feature_ranges (hb_vector_t<hb_ms_feature_t> &feature_records, - hb_vector_t<hb_ms_range_record_t> &range_records, - unsigned int chars_offset, - unsigned int chars_len, - uint16_t *log_clusters, - hb_vector_t<hb_ms_features_t*> &range_features, /* OUT */ - hb_vector_t<uint32_t> &range_counts /* OUT */) -{ - range_features.shrink (0); - range_counts.shrink (0); - - auto *last_range = &range_records[0]; - for (unsigned int i = chars_offset; i < chars_len; i++) - { - auto *range = last_range; - while (log_clusters[i] < range->index_first) - range--; - while (log_clusters[i] > range->index_last) - range++; - if (!range_features.length || - &range->features != range_features[range_features.length - 1]) - { - auto **features = range_features.push (); - auto *c = range_counts.push (); - if (unlikely (!features || !c)) - { - range_features.shrink (0); - range_counts.shrink (0); - break; - } - *features = &range->features; - *c = 1; - } - else - { - range_counts[range_counts.length - 1]++; - } - - last_range = range; - } -} diff --git a/thirdparty/harfbuzz/src/hb-ms-feature-ranges.hh b/thirdparty/harfbuzz/src/hb-ms-feature-ranges.hh index 401d1e1d97..d40fdeaa82 100644 --- a/thirdparty/harfbuzz/src/hb-ms-feature-ranges.hh +++ b/thirdparty/harfbuzz/src/hb-ms-feature-ranges.hh @@ -52,8 +52,8 @@ struct hb_ms_active_feature_t { a->fea.value < b->fea.value ? -1 : a->fea.value > b->fea.value ? 1 : 0; } - bool operator== (const hb_ms_active_feature_t *f) - { return cmp (this, f) == 0; } + bool operator== (const hb_ms_active_feature_t& f) const + { return cmp (this, &f) == 0; } }; struct hb_ms_feature_event_t { @@ -77,20 +77,153 @@ struct hb_ms_range_record_t { unsigned int index_last; /* == end - 1 */ }; -HB_INTERNAL bool +static inline bool hb_ms_setup_features (const hb_feature_t *features, unsigned int num_features, hb_vector_t<hb_ms_feature_t> &feature_records, /* OUT */ - hb_vector_t<hb_ms_range_record_t> &range_records /* OUT */); + hb_vector_t<hb_ms_range_record_t> &range_records /* OUT */) +{ + feature_records.shrink(0); + range_records.shrink(0); + /* Sort features by start/end events. */ + hb_vector_t<hb_ms_feature_event_t> feature_events; + for (unsigned int i = 0; i < num_features; i++) + { + hb_ms_active_feature_t feature; + feature.fea.tag_le = hb_uint32_swap (features[i].tag); + feature.fea.value = features[i].value; + feature.order = i; + + hb_ms_feature_event_t *event; + + event = feature_events.push (); + event->index = features[i].start; + event->start = true; + event->feature = feature; + + event = feature_events.push (); + event->index = features[i].end; + event->start = false; + event->feature = feature; + } + feature_events.qsort (); + /* Add a strategic final event. */ + { + hb_ms_active_feature_t feature; + feature.fea.tag_le = 0; + feature.fea.value = 0; + feature.order = num_features + 1; + + auto *event = feature_events.push (); + event->index = 0; /* This value does magic. */ + event->start = false; + event->feature = feature; + } + + /* Scan events and save features for each range. */ + hb_vector_t<hb_ms_active_feature_t> active_features; + unsigned int last_index = 0; + for (unsigned int i = 0; i < feature_events.length; i++) + { + auto *event = &feature_events[i]; + + if (event->index != last_index) + { + /* Save a snapshot of active features and the range. */ + auto *range = range_records.push (); + auto offset = feature_records.length; + + active_features.qsort (); + for (unsigned int j = 0; j < active_features.length; j++) + { + if (!j || active_features[j].fea.tag_le != feature_records[feature_records.length - 1].tag_le) + { + feature_records.push (active_features[j].fea); + } + else + { + /* Overrides value for existing feature. */ + feature_records[feature_records.length - 1].value = active_features[j].fea.value; + } + } + + /* Will convert to pointer after all is ready, since feature_records.array + * may move as we grow it. */ + range->features.features = reinterpret_cast<hb_ms_feature_t *> (offset); + range->features.num_features = feature_records.length - offset; + range->index_first = last_index; + range->index_last = event->index - 1; -HB_INTERNAL void + last_index = event->index; + } + + if (event->start) + { + active_features.push (event->feature); + } + else + { + auto *feature = active_features.lsearch (event->feature); + if (feature) + active_features.remove (feature - active_features.arrayZ); + } + } + + if (!range_records.length) /* No active feature found. */ + num_features = 0; + + /* Fixup the pointers. */ + for (unsigned int i = 0; i < range_records.length; i++) + { + auto *range = &range_records[i]; + range->features.features = (hb_ms_feature_t *) feature_records + reinterpret_cast<uintptr_t> (range->features.features); + } + + return !!num_features; +} + +static inline void hb_ms_make_feature_ranges (hb_vector_t<hb_ms_feature_t> &feature_records, hb_vector_t<hb_ms_range_record_t> &range_records, unsigned int chars_offset, unsigned int chars_len, uint16_t *log_clusters, hb_vector_t<hb_ms_features_t*> &range_features, /* OUT */ - hb_vector_t<uint32_t> &range_counts /* OUT */); + hb_vector_t<uint32_t> &range_counts /* OUT */) +{ + range_features.shrink (0); + range_counts.shrink (0); + + auto *last_range = &range_records[0]; + for (unsigned int i = chars_offset; i < chars_len; i++) + { + auto *range = last_range; + while (log_clusters[i] < range->index_first) + range--; + while (log_clusters[i] > range->index_last) + range++; + if (!range_features.length || + &range->features != range_features[range_features.length - 1]) + { + auto **features = range_features.push (); + auto *c = range_counts.push (); + if (unlikely (!features || !c)) + { + range_features.shrink (0); + range_counts.shrink (0); + break; + } + *features = &range->features; + *c = 1; + } + else + { + range_counts[range_counts.length - 1]++; + } + + last_range = range; + } +} #endif /* HB_MS_FEATURE_RANGES_HH */ diff --git a/thirdparty/harfbuzz/src/hb-object.hh b/thirdparty/harfbuzz/src/hb-object.hh index 0e15cb12c4..4b5bc32ade 100644 --- a/thirdparty/harfbuzz/src/hb-object.hh +++ b/thirdparty/harfbuzz/src/hb-object.hh @@ -53,7 +53,7 @@ struct hb_lockable_set_t item_t *replace_or_insert (T v, lock_t &l, bool replace) { l.lock (); - item_t *item = items.find (v); + item_t *item = items.lsearch (v); if (item) { if (replace) { item_t old = *item; @@ -76,7 +76,7 @@ struct hb_lockable_set_t void remove (T v, lock_t &l) { l.lock (); - item_t *item = items.find (v); + item_t *item = items.lsearch (v); if (item) { item_t old = *item; @@ -93,7 +93,7 @@ struct hb_lockable_set_t bool find (T v, item_t *i, lock_t &l) { l.lock (); - item_t *item = items.find (v); + item_t *item = items.lsearch (v); if (item) *i = *item; l.unlock (); diff --git a/thirdparty/harfbuzz/src/hb-ot-cff-common.hh b/thirdparty/harfbuzz/src/hb-ot-cff-common.hh index 180c87cb89..c102c15173 100644 --- a/thirdparty/harfbuzz/src/hb-ot-cff-common.hh +++ b/thirdparty/harfbuzz/src/hb-ot-cff-common.hh @@ -68,8 +68,6 @@ struct code_pair_t typedef hb_vector_t<unsigned char> str_buff_t; struct str_buff_vec_t : hb_vector_t<str_buff_t> { - void fini () { SUPER::fini_deep (); } - unsigned int total_size () const { unsigned int size = 0; diff --git a/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh b/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh index 5dd183e3a0..6fb59315c9 100644 --- a/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh @@ -1144,8 +1144,8 @@ struct cff1 { sc.end_processing (); topDict.fini (); - fontDicts.fini_deep (); - privateDicts.fini_deep (); + fontDicts.fini (); + privateDicts.fini (); hb_blob_destroy (blob); blob = nullptr; } @@ -1245,32 +1245,32 @@ struct cff1 } protected: - hb_blob_t *blob; + hb_blob_t *blob = nullptr; hb_sanitize_context_t sc; public: - const Encoding *encoding; - const Charset *charset; - const CFF1NameIndex *nameIndex; - const CFF1TopDictIndex *topDictIndex; - const CFF1StringIndex *stringIndex; - const CFF1Subrs *globalSubrs; - const CFF1CharStrings *charStrings; - const CFF1FDArray *fdArray; - const CFF1FDSelect *fdSelect; - unsigned int fdCount; + const Encoding *encoding = nullptr; + const Charset *charset = nullptr; + const CFF1NameIndex *nameIndex = nullptr; + const CFF1TopDictIndex *topDictIndex = nullptr; + const CFF1StringIndex *stringIndex = nullptr; + const CFF1Subrs *globalSubrs = nullptr; + const CFF1CharStrings *charStrings = nullptr; + const CFF1FDArray *fdArray = nullptr; + const CFF1FDSelect *fdSelect = nullptr; + unsigned int fdCount = 0; cff1_top_dict_values_t topDict; hb_vector_t<cff1_font_dict_values_t> fontDicts; hb_vector_t<PRIVDICTVAL> privateDicts; - unsigned int num_glyphs; + unsigned int num_glyphs = 0; }; struct accelerator_t : accelerator_templ_t<cff1_private_dict_opset_t, cff1_private_dict_values_t> { - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { SUPER::init (face); @@ -1295,8 +1295,7 @@ struct cff1 } glyph_names.qsort (); } - - void fini () + ~accelerator_t () { glyph_names.fini (); @@ -1398,7 +1397,10 @@ struct cff1 DEFINE_SIZE_STATIC (4); }; -struct cff1_accelerator_t : cff1::accelerator_t {}; +struct cff1_accelerator_t : cff1::accelerator_t { + cff1_accelerator_t (hb_face_t *face) : cff1::accelerator_t (face) {} +}; + } /* namespace OT */ #endif /* HB_OT_CFF1_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh b/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh index 829217feaa..6e1b01c8fe 100644 --- a/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh @@ -397,7 +397,7 @@ struct cff2 template <typename PRIVOPSET, typename PRIVDICTVAL> struct accelerator_templ_t { - void init (hb_face_t *face) + accelerator_templ_t (hb_face_t *face) { topDict.init (); fontDicts.init (); @@ -412,15 +412,15 @@ struct cff2 const OT::cff2 *cff2 = this->blob->template as<OT::cff2> (); if (cff2 == &Null (OT::cff2)) - { fini (); return; } + goto fail; { /* parse top dict */ byte_str_t topDictStr (cff2 + cff2->topDict, cff2->topDictSize); - if (unlikely (!topDictStr.sanitize (&sc))) { fini (); return; } + if (unlikely (!topDictStr.sanitize (&sc))) goto fail; cff2_top_dict_interpreter_t top_interp; top_interp.env.init (topDictStr); topDict.init (); - if (unlikely (!top_interp.interpret (topDict))) { fini (); return; } + if (unlikely (!top_interp.interpret (topDict))) goto fail; } globalSubrs = &StructAtOffset<CFF2Subrs> (cff2, cff2->topDict + cff2->topDictSize); @@ -434,49 +434,55 @@ struct cff2 (globalSubrs == &Null (CFF2Subrs)) || unlikely (!globalSubrs->sanitize (&sc)) || (fdArray == &Null (CFF2FDArray)) || unlikely (!fdArray->sanitize (&sc)) || (((fdSelect != &Null (CFF2FDSelect)) && unlikely (!fdSelect->sanitize (&sc, fdArray->count))))) - { fini (); return; } + goto fail; num_glyphs = charStrings->count; if (num_glyphs != sc.get_num_glyphs ()) - { fini (); return; } + goto fail; fdCount = fdArray->count; if (!privateDicts.resize (fdCount)) - { fini (); return; } + goto fail; /* parse font dicts and gather private dicts */ for (unsigned int i = 0; i < fdCount; i++) { const byte_str_t fontDictStr = (*fdArray)[i]; - if (unlikely (!fontDictStr.sanitize (&sc))) { fini (); return; } + if (unlikely (!fontDictStr.sanitize (&sc))) goto fail; cff2_font_dict_values_t *font; cff2_font_dict_interpreter_t font_interp; font_interp.env.init (fontDictStr); font = fontDicts.push (); - if (unlikely (font == &Crap (cff2_font_dict_values_t))) { fini (); return; } + if (unlikely (font == &Crap (cff2_font_dict_values_t))) goto fail; font->init (); - if (unlikely (!font_interp.interpret (*font))) { fini (); return; } + if (unlikely (!font_interp.interpret (*font))) goto fail; const byte_str_t privDictStr (StructAtOffsetOrNull<UnsizedByteStr> (cff2, font->privateDictInfo.offset), font->privateDictInfo.size); - if (unlikely (!privDictStr.sanitize (&sc))) { fini (); return; } + if (unlikely (!privDictStr.sanitize (&sc))) goto fail; dict_interpreter_t<PRIVOPSET, PRIVDICTVAL, cff2_priv_dict_interp_env_t> priv_interp; priv_interp.env.init(privDictStr); privateDicts[i].init (); - if (unlikely (!priv_interp.interpret (privateDicts[i]))) { fini (); return; } + if (unlikely (!priv_interp.interpret (privateDicts[i]))) goto fail; privateDicts[i].localSubrs = &StructAtOffsetOrNull<CFF2Subrs> (&privDictStr[0], privateDicts[i].subrsOffset); if (privateDicts[i].localSubrs != &Null (CFF2Subrs) && unlikely (!privateDicts[i].localSubrs->sanitize (&sc))) - { fini (); return; } + goto fail; } - } - void fini () + + return; + + fail: + _fini (); + } + ~accelerator_templ_t () { _fini (); } + void _fini () { sc.end_processing (); topDict.fini (); - fontDicts.fini_deep (); - privateDicts.fini_deep (); + fontDicts.fini (); + privateDicts.fini (); hb_blob_destroy (blob); blob = nullptr; } @@ -484,26 +490,28 @@ struct cff2 bool is_valid () const { return blob; } protected: - hb_blob_t *blob; + hb_blob_t *blob = nullptr; hb_sanitize_context_t sc; public: cff2_top_dict_values_t topDict; - const CFF2Subrs *globalSubrs; - const CFF2VariationStore *varStore; - const CFF2CharStrings *charStrings; - const CFF2FDArray *fdArray; - const CFF2FDSelect *fdSelect; - unsigned int fdCount; + const CFF2Subrs *globalSubrs = nullptr; + const CFF2VariationStore *varStore = nullptr; + const CFF2CharStrings *charStrings = nullptr; + const CFF2FDArray *fdArray = nullptr; + const CFF2FDSelect *fdSelect = nullptr; + unsigned int fdCount = 0; hb_vector_t<cff2_font_dict_values_t> fontDicts; hb_vector_t<PRIVDICTVAL> privateDicts; - unsigned int num_glyphs; + unsigned int num_glyphs = 0; }; struct accelerator_t : accelerator_templ_t<cff2_private_dict_opset_t, cff2_private_dict_values_t> { + accelerator_t (hb_face_t *face) : accelerator_templ_t (face) {} + HB_INTERNAL bool get_extents (hb_font_t *font, hb_codepoint_t glyph, hb_glyph_extents_t *extents) const; @@ -525,7 +533,10 @@ struct cff2 DEFINE_SIZE_STATIC (5); }; -struct cff2_accelerator_t : cff2::accelerator_t {}; +struct cff2_accelerator_t : cff2::accelerator_t { + cff2_accelerator_t (hb_face_t *face) : cff2::accelerator_t (face) {} +}; + } /* namespace OT */ #endif /* HB_OT_CFF2_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh b/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh index d837adc788..fde57cdc5b 100644 --- a/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh @@ -369,7 +369,6 @@ struct CmapSubtableFormat4 { accelerator_t () {} accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); } - ~accelerator_t () { fini (); } void init (const CmapSubtableFormat4 *subtable) { @@ -381,7 +380,6 @@ struct CmapSubtableFormat4 glyphIdArray = idRangeOffset + segCount; glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2; } - void fini () {} bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const { @@ -1607,7 +1605,7 @@ struct cmap unsigned format = (this + _.subtable).u.format; if (format == 12) has_format12 = true; - const EncodingRecord *table = hb_addressof (_); + const EncodingRecord *table = std::addressof (_); if (_.platformID == 0 && _.encodingID == 3) unicode_bmp = table; else if (_.platformID == 0 && _.encodingID == 4) unicode_ucs4 = table; else if (_.platformID == 3 && _.encodingID == 1) ms_bmp = table; @@ -1665,7 +1663,7 @@ struct cmap struct accelerator_t { - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { this->table = hb_sanitize_context_t ().reference_table<cmap> (face); bool symbol; @@ -1700,8 +1698,7 @@ struct cmap } } } - - void fini () { this->table.destroy (); } + ~accelerator_t () { this->table.destroy (); } bool get_nominal_glyph (hb_codepoint_t unicode, hb_codepoint_t *glyph) const @@ -1863,7 +1860,9 @@ struct cmap DEFINE_SIZE_ARRAY (4, encodingRecord); }; -struct cmap_accelerator_t : cmap::accelerator_t {}; +struct cmap_accelerator_t : cmap::accelerator_t { + cmap_accelerator_t (hb_face_t *face) : cmap::accelerator_t (face) {} +}; } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh index 14459914ee..23fa56c4f6 100644 --- a/thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh @@ -360,6 +360,16 @@ struct IndexSubtable struct IndexSubtableRecord { + /* XXX Remove this and fix by not inserting it into vector. */ + IndexSubtableRecord& operator = (const IndexSubtableRecord &o) + { + firstGlyphIndex = o.firstGlyphIndex; + lastGlyphIndex = o.lastGlyphIndex; + offsetToSubtable = (unsigned) o.offsetToSubtable; + assert (offsetToSubtable.is_null ()); + return *this; + } + bool sanitize (hb_sanitize_context_t *c, const void *base) const { TRACE_SANITIZE (this); @@ -809,15 +819,14 @@ struct CBDT struct accelerator_t { - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { - cblc = hb_sanitize_context_t ().reference_table<CBLC> (face); - cbdt = hb_sanitize_context_t ().reference_table<CBDT> (face); + this->cblc = hb_sanitize_context_t ().reference_table<CBLC> (face); + this->cbdt = hb_sanitize_context_t ().reference_table<CBDT> (face); upem = hb_face_get_upem (face); } - - void fini () + ~accelerator_t () { this->cblc.destroy (); this->cbdt.destroy (); @@ -978,7 +987,10 @@ CBLC::subset (hb_subset_context_t *c) const return_trace (CBLC::sink_cbdt (c, &cbdt_prime)); } -struct CBDT_accelerator_t : CBDT::accelerator_t {}; +struct CBDT_accelerator_t : CBDT::accelerator_t { + CBDT_accelerator_t (hb_face_t *face) : CBDT::accelerator_t (face) {} +}; + } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh index 008422d089..dac755c02c 100644 --- a/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh @@ -71,7 +71,7 @@ struct hb_colrv1_closure_context_t : bool paint_visited (const void *paint) { hb_codepoint_t delta = (hb_codepoint_t) ((uintptr_t) paint - (uintptr_t) base); - if (visited_paint.has (delta)) + if (visited_paint.in_error() || visited_paint.has (delta)) return true; visited_paint.add (delta); @@ -1270,13 +1270,9 @@ struct COLR struct accelerator_t { - accelerator_t () {} - ~accelerator_t () { fini (); } - - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { colr = hb_sanitize_context_t ().reference_table<COLR> (face); } - - void fini () { this->colr.destroy (); } + ~accelerator_t () { this->colr.destroy (); } bool is_valid () { return colr.get_blob ()->length; } @@ -1535,6 +1531,10 @@ struct COLR DEFINE_SIZE_MIN (14); }; +struct COLR_accelerator_t : COLR::accelerator_t { + COLR_accelerator_t (hb_face_t *face) : COLR::accelerator_t (face) {} +}; + } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color-colrv1-closure.hh b/thirdparty/harfbuzz/src/hb-ot-color-colrv1-closure.hh index ca85ba6ad6..fbaf2ec26b 100644 --- a/thirdparty/harfbuzz/src/hb-ot-color-colrv1-closure.hh +++ b/thirdparty/harfbuzz/src/hb-ot-color-colrv1-closure.hh @@ -43,7 +43,7 @@ HB_INTERNAL void PaintColrLayers::closurev1 (hb_colrv1_closure_context_t* c) con const LayerList &paint_offset_lists = c->get_colr_table ()->get_layerList (); for (unsigned i = firstLayerIndex; i < firstLayerIndex + numLayers; i++) { - const Paint &paint = hb_addressof (paint_offset_lists) + paint_offset_lists[i]; + const Paint &paint = std::addressof (paint_offset_lists) + paint_offset_lists[i]; paint.dispatch (c); } } diff --git a/thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh index d2911f19e6..9741ebd450 100644 --- a/thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh @@ -202,12 +202,12 @@ struct sbix struct accelerator_t { - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { table = hb_sanitize_context_t ().reference_table<sbix> (face); num_glyphs = face->get_num_glyphs (); } - void fini () { table.destroy (); } + ~accelerator_t () { table.destroy (); } bool has_data () const { return table->has_data (); } @@ -407,7 +407,10 @@ struct sbix DEFINE_SIZE_ARRAY (8, strikes); }; -struct sbix_accelerator_t : sbix::accelerator_t {}; +struct sbix_accelerator_t : sbix::accelerator_t { + sbix_accelerator_t (hb_face_t *face) : sbix::accelerator_t (face) {} +}; + } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh index e022ef43b7..fc649f1006 100644 --- a/thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh @@ -79,9 +79,9 @@ struct SVG struct accelerator_t { - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { table = hb_sanitize_context_t ().reference_table<SVG> (face); } - void fini () { table.destroy (); } + ~accelerator_t () { table.destroy (); } hb_blob_t *reference_blob_for_glyph (hb_codepoint_t glyph_id) const { @@ -116,7 +116,9 @@ struct SVG DEFINE_SIZE_STATIC (10); }; -struct SVG_accelerator_t : SVG::accelerator_t {}; +struct SVG_accelerator_t : SVG::accelerator_t { + SVG_accelerator_t (hb_face_t *face) : SVG::accelerator_t (face) {} +}; } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color.cc b/thirdparty/harfbuzz/src/hb-ot-color.cc index 4170b71317..16077765bd 100644 --- a/thirdparty/harfbuzz/src/hb-ot-color.cc +++ b/thirdparty/harfbuzz/src/hb-ot-color.cc @@ -90,15 +90,15 @@ hb_ot_color_palette_get_count (hb_face_t *face) /** * hb_ot_color_palette_get_name_id: * @face: #hb_face_t to work upon - * @palette_index: The index of the color palette + * @palette_index: The index of the color palette * * Fetches the `name` table Name ID that provides display names for - * a `CPAL` color palette. + * a `CPAL` color palette. * * Palette display names can be generic (e.g., "Default") or provide * specific, themed names (e.g., "Spring", "Summer", "Fall", and "Winter"). * - * Return value: the Named ID found for the palette. + * Return value: the Named ID found for the palette. * If the requested palette has no name the result is #HB_OT_NAME_ID_INVALID. * * Since: 2.1.0 @@ -116,7 +116,7 @@ hb_ot_color_palette_get_name_id (hb_face_t *face, * @color_index: The index of the color * * Fetches the `name` table Name ID that provides display names for - * the specificed color in a face's `CPAL` color palette. + * the specified color in a face's `CPAL` color palette. * * Display names can be generic (e.g., "Background") or specific * (e.g., "Eye color"). @@ -256,6 +256,8 @@ hb_ot_color_has_svg (hb_face_t *face) * * Fetches the SVG document for a glyph. The blob may be either plain text or gzip-encoded. * + * If the glyph has no SVG document, the singleton empty blob is returned. + * * Return value: (transfer full): An #hb_blob_t containing the SVG document of the glyph, if available * * Since: 2.1.0 @@ -296,6 +298,8 @@ hb_ot_color_has_png (hb_face_t *face) * as input. To get an optimally sized PNG blob, the UPEM value must be set on the @font * object. If UPEM is unset, the blob returned will be the largest PNG available. * + * If the glyph has no PNG image, the singleton empty blob is returned. + * * Return value: (transfer full): An #hb_blob_t containing the PNG image for the glyph, if available * * Since: 2.1.0 diff --git a/thirdparty/harfbuzz/src/hb-ot-glyf-table.hh b/thirdparty/harfbuzz/src/hb-ot-glyf-table.hh index 6aa34295c7..87a7d800c1 100644 --- a/thirdparty/harfbuzz/src/hb-ot-glyf-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-glyf-table.hh @@ -207,8 +207,7 @@ struct glyf _populate_subset_glyphs (const hb_subset_plan_t *plan, hb_vector_t<SubsetGlyph> *glyphs /* OUT */) const { - OT::glyf::accelerator_t glyf; - glyf.init (plan->source); + OT::glyf::accelerator_t glyf (plan->source); + hb_range (plan->num_output_glyphs ()) | hb_map ([&] (hb_codepoint_t new_gid) @@ -233,8 +232,6 @@ struct glyf }) | hb_sink (glyphs) ; - - glyf.fini (); } static bool @@ -595,7 +592,7 @@ struct glyf if (unlikely (!header.numberOfContours)) return; unsigned flags_offset = length (instructions_length ()); - if (unlikely (length (flags_offset + 1) > bytes.length)) return; + if (unlikely (flags_offset + 1 > bytes.length)) return; HBUINT8 &first_flag = (HBUINT8 &) StructAtOffset<HBUINT16> (&bytes, flags_offset); first_flag = (uint8_t) first_flag | FLAG_OVERLAP_SIMPLE; @@ -920,7 +917,7 @@ struct glyf struct accelerator_t { - void init (hb_face_t *face_) + accelerator_t (hb_face_t *face) { short_offset = false; num_glyphs = 0; @@ -933,7 +930,6 @@ struct glyf #ifndef HB_NO_VERTICAL vmtx = nullptr; #endif - face = face_; const OT::head &head = *face->table.head; if (head.indexToLocFormat > 1 || head.glyphDataFormat > 0) /* Unknown format. Leave num_glyphs=0, that takes care of disabling us. */ @@ -953,8 +949,7 @@ struct glyf num_glyphs = hb_max (1u, loca_table.get_length () / (short_offset ? 2 : 4)) - 1; num_glyphs = hb_min (num_glyphs, face->get_num_glyphs ()); } - - void fini () + ~accelerator_t () { loca_table.destroy (); glyf_table.destroy (); @@ -1291,7 +1286,6 @@ struct glyf unsigned int num_glyphs; hb_blob_ptr_t<loca> loca_table; hb_blob_ptr_t<glyf> glyf_table; - hb_face_t *face; }; struct SubsetGlyph @@ -1358,7 +1352,10 @@ struct glyf * defining it _MIN instead. */ }; -struct glyf_accelerator_t : glyf::accelerator_t {}; +struct glyf_accelerator_t : glyf::accelerator_t { + glyf_accelerator_t (hb_face_t *face) : glyf::accelerator_t (face) {} +}; + } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh b/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh index 7d2d2d3eb8..36bffa70a5 100644 --- a/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh @@ -127,8 +127,7 @@ struct hmtxvmtx T *table_prime = c->serializer->start_embed <T> (); if (unlikely (!table_prime)) return_trace (false); - accelerator_t _mtx; - _mtx.init (c->plan->source); + accelerator_t _mtx (c->plan->source); unsigned num_advances = _mtx.num_advances_for_subset (c->plan); auto it = @@ -144,8 +143,6 @@ struct hmtxvmtx table_prime->serialize (c->serializer, it, num_advances); - _mtx.fini (); - if (unlikely (c->serializer->in_error ())) return_trace (false); @@ -160,8 +157,8 @@ struct hmtxvmtx { friend struct hmtxvmtx; - void init (hb_face_t *face, - unsigned int default_advance_ = 0) + accelerator_t (hb_face_t *face, + unsigned int default_advance_ = 0) { default_advance = default_advance_ ? default_advance_ : hb_face_get_upem (face); @@ -193,8 +190,7 @@ struct hmtxvmtx var_table = hb_sanitize_context_t ().reference_table<HVARVVAR> (face, T::variationsTag); } - - void fini () + ~accelerator_t () { table.destroy (); var_table.destroy (); @@ -338,8 +334,12 @@ struct vmtx : hmtxvmtx<vmtx, vhea> { static constexpr bool is_horizontal = false; }; -struct hmtx_accelerator_t : hmtx::accelerator_t {}; -struct vmtx_accelerator_t : vmtx::accelerator_t {}; +struct hmtx_accelerator_t : hmtx::accelerator_t { + hmtx_accelerator_t (hb_face_t *face) : hmtx::accelerator_t (face) {} +}; +struct vmtx_accelerator_t : vmtx::accelerator_t { + vmtx_accelerator_t (hb_face_t *face) : vmtx::accelerator_t (face) {} +}; } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-common.hh b/thirdparty/harfbuzz/src/hb-ot-layout-common.hh index 4fb1893435..60a1906155 100644 --- a/thirdparty/harfbuzz/src/hb-ot-layout-common.hh +++ b/thirdparty/harfbuzz/src/hb-ot-layout-common.hh @@ -128,7 +128,7 @@ struct hb_prune_langsys_context_t bool visited (const T *p, hb_set_t &visited_set) { hb_codepoint_t delta = (hb_codepoint_t) ((uintptr_t) p - (uintptr_t) table); - if (visited_set.has (delta)) + if (visited_set.in_error () || visited_set.has (delta)) return true; visited_set.add (delta); @@ -655,7 +655,6 @@ struct LangSys void collect_features (hb_prune_langsys_context_t *c) const { if (!has_required_feature () && !get_feature_count ()) return; - if (c->visitedLangsys (this)) return; if (has_required_feature () && c->duplicate_feature_map->has (reqFeatureIndex)) c->new_feature_indexes->add (get_required_feature_index ()); @@ -750,11 +749,15 @@ struct Script { //only collect features from non-redundant langsys const LangSys& d = get_default_lang_sys (); - d.collect_features (c); + if (!c->visitedLangsys (&d)) { + d.collect_features (c); + } for (auto _ : + hb_zip (langSys, hb_range (langsys_count))) { + const LangSys& l = this+_.first.offset; + if (c->visitedLangsys (&l)) continue; if (l.compare (d, c->duplicate_feature_map)) continue; l.collect_features (c); @@ -766,6 +769,7 @@ struct Script for (auto _ : + hb_zip (langSys, hb_range (langsys_count))) { const LangSys& l = this+_.first.offset; + if (c->visitedLangsys (&l)) continue; l.collect_features (c); c->script_langsys_map->get (script_index)->add (_.second); } @@ -845,7 +849,7 @@ struct FeatureParamsSize if (unlikely (!c->check_struct (this))) return_trace (false); /* This subtable has some "history", if you will. Some earlier versions of - * Adobe tools calculated the offset of the FeatureParams sutable from the + * Adobe tools calculated the offset of the FeatureParams subtable from the * beginning of the FeatureList table! Now, that is dealt with in the * Feature implementation. But we still need to be able to tell junk from * real data. Note: We don't check that the nameID actually exists. @@ -2926,8 +2930,6 @@ struct VariationStore hb_vector_t<hb_inc_bimap_t> inner_maps; inner_maps.resize ((unsigned) dataSets.len); - for (unsigned i = 0; i < inner_maps.length; i++) - inner_maps[i].init (); for (unsigned idx : c->plan->layout_variation_indices->iter ()) { @@ -2935,18 +2937,11 @@ struct VariationStore uint16_t minor = idx & 0xFFFF; if (major >= inner_maps.length) - { - for (unsigned i = 0; i < inner_maps.length; i++) - inner_maps[i].fini (); return_trace (false); - } inner_maps[major].add (minor); } varstore_prime->serialize (c->serializer, this, inner_maps.as_array ()); - for (unsigned i = 0; i < inner_maps.length; i++) - inner_maps[i].fini (); - return_trace ( !c->serializer->in_error() && varstore_prime->dataSets); diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh index aea644f3e1..a76d644c4b 100644 --- a/thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh @@ -585,17 +585,16 @@ struct GDEF struct accelerator_t { - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { - this->table = hb_sanitize_context_t ().reference_table<GDEF> (face); - if (unlikely (this->table->is_blocklisted (this->table.get_blob (), face))) + table = hb_sanitize_context_t ().reference_table<GDEF> (face); + if (unlikely (table->is_blocklisted (table.get_blob (), face))) { - hb_blob_destroy (this->table.get_blob ()); - this->table = hb_blob_get_empty (); + hb_blob_destroy (table.get_blob ()); + table = hb_blob_get_empty (); } } - - void fini () { this->table.destroy (); } + ~accelerator_t () { table.destroy (); } hb_blob_ptr_t<GDEF> table; }; @@ -715,7 +714,9 @@ struct GDEF DEFINE_SIZE_MIN (12); }; -struct GDEF_accelerator_t : GDEF::accelerator_t {}; +struct GDEF_accelerator_t : GDEF::accelerator_t { + GDEF_accelerator_t (hb_face_t *face) : GDEF::accelerator_t (face) {} +}; } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh index 6db3e08940..2f9186a2a7 100644 --- a/thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh @@ -706,7 +706,7 @@ struct MarkArray : Array16Of<MarkRecord> /* Array of MarkRecords--in Coverage or float mark_x, mark_y, base_x, base_y; - buffer->unsafe_to_break (glyph_pos, buffer->idx); + buffer->unsafe_to_break (glyph_pos, buffer->idx + 1); mark_anchor.get_anchor (c, buffer->cur().codepoint, &mark_x, &mark_y); glyph_anchor.get_anchor (c, buffer->info[glyph_pos].codepoint, &base_x, &base_y); @@ -1235,6 +1235,7 @@ struct PairSet buffer->idx = pos; return_trace (true); } + buffer->unsafe_to_concat (buffer->idx, pos + 1); return_trace (false); } @@ -1362,7 +1363,12 @@ struct PairPosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); - if (!skippy_iter.next ()) return_trace (false); + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + buffer->unsafe_to_concat (buffer->idx, unsafe_to); + return_trace (false); + } return_trace ((this+pairSet[index]).apply (c, valueFormat, skippy_iter.idx)); } @@ -1555,7 +1561,12 @@ struct PairPosFormat2 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); - if (!skippy_iter.next ()) return_trace (false); + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + buffer->unsafe_to_concat (buffer->idx, unsafe_to); + return_trace (false); + } unsigned int len1 = valueFormat1.get_len (); unsigned int len2 = valueFormat2.get_len (); @@ -1563,13 +1574,89 @@ struct PairPosFormat2 unsigned int klass1 = (this+classDef1).get_class (buffer->cur().codepoint); unsigned int klass2 = (this+classDef2).get_class (buffer->info[skippy_iter.idx].codepoint); - if (unlikely (klass1 >= class1Count || klass2 >= class2Count)) return_trace (false); + if (unlikely (klass1 >= class1Count || klass2 >= class2Count)) + { + buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1); + return_trace (false); + } const Value *v = &values[record_len * (klass1 * class2Count + klass2)]; - bool applied_first = valueFormat1.apply_value (c, this, v, buffer->cur_pos()); - bool applied_second = valueFormat2.apply_value (c, this, v + len1, buffer->pos[skippy_iter.idx]); + + bool applied_first = false, applied_second = false; + + + /* Isolate simple kerning and apply it half to each side. + * Results in better cursor positinoing / underline drawing. + * + * Disabled, because causes issues... :-( + * https://github.com/harfbuzz/harfbuzz/issues/3408 + * https://github.com/harfbuzz/harfbuzz/pull/3235#issuecomment-1029814978 + */ +#ifndef HB_SPLIT_KERN + if (0) +#endif + { + if (!len2) + { + const hb_direction_t dir = buffer->props.direction; + const bool horizontal = HB_DIRECTION_IS_HORIZONTAL (dir); + const bool backward = HB_DIRECTION_IS_BACKWARD (dir); + unsigned mask = horizontal ? ValueFormat::xAdvance : ValueFormat::yAdvance; + if (backward) + mask |= mask >> 2; /* Add eg. xPlacement in RTL. */ + /* Add Devices. */ + mask |= mask << 4; + + if (valueFormat1 & ~mask) + goto bail; + + /* Is simple kern. Apply value on an empty position slot, + * then split it between sides. */ + + hb_glyph_position_t pos{}; + if (valueFormat1.apply_value (c, this, v, pos)) + { + hb_position_t *src = &pos.x_advance; + hb_position_t *dst1 = &buffer->cur_pos().x_advance; + hb_position_t *dst2 = &buffer->pos[skippy_iter.idx].x_advance; + unsigned i = horizontal ? 0 : 1; + + hb_position_t kern = src[i]; + hb_position_t kern1 = kern >> 1; + hb_position_t kern2 = kern - kern1; + + if (!backward) + { + dst1[i] += kern1; + dst2[i] += kern2; + dst2[i + 2] += kern2; + } + else + { + dst1[i] += kern1; + dst1[i + 2] += src[i + 2] - kern2; + dst2[i] += kern2; + } + + applied_first = applied_second = kern != 0; + goto success; + } + goto boring; + } + } + bail: + + + applied_first = valueFormat1.apply_value (c, this, v, buffer->cur_pos()); + applied_second = valueFormat2.apply_value (c, this, v + len1, buffer->pos[skippy_iter.idx]); + + success: if (applied_first || applied_second) buffer->unsafe_to_break (buffer->idx, skippy_iter.idx + 1); + else + boring: + buffer->unsafe_to_concat (buffer->idx, skippy_iter.idx + 1); + buffer->idx = skippy_iter.idx; if (len2) @@ -1799,10 +1886,19 @@ struct CursivePosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); + return_trace (false); + } const EntryExitRecord &prev_record = entryExitRecord[(this+coverage).get_coverage (buffer->info[skippy_iter.idx].codepoint)]; - if (!prev_record.exitAnchor) return_trace (false); + if (!prev_record.exitAnchor) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } unsigned int i = skippy_iter.idx; unsigned int j = buffer->idx; @@ -2066,7 +2162,13 @@ struct MarkBasePosFormat1 skippy_iter.reset (buffer->idx, 1); skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks); do { - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); + return_trace (false); + } + /* We only want to attach to the first of a MultipleSubst sequence. * https://github.com/harfbuzz/harfbuzz/issues/740 * Reject others... @@ -2089,7 +2191,11 @@ struct MarkBasePosFormat1 //if (!_hb_glyph_info_is_base_glyph (&buffer->info[skippy_iter.idx])) { return_trace (false); } unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[skippy_iter.idx].codepoint); - if (base_index == NOT_COVERED) return_trace (false); + if (base_index == NOT_COVERED) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, skippy_iter.idx)); } @@ -2320,21 +2426,34 @@ struct MarkLigPosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks); - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); + return_trace (false); + } /* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */ //if (!_hb_glyph_info_is_ligature (&buffer->info[skippy_iter.idx])) { return_trace (false); } unsigned int j = skippy_iter.idx; unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[j].codepoint); - if (lig_index == NOT_COVERED) return_trace (false); + if (lig_index == NOT_COVERED) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } const LigatureArray& lig_array = this+ligatureArray; const LigatureAttach& lig_attach = lig_array[lig_index]; /* Find component to attach to */ unsigned int comp_count = lig_attach.rows; - if (unlikely (!comp_count)) return_trace (false); + if (unlikely (!comp_count)) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } /* We must now check whether the ligature ID of the current mark glyph * is identical to the ligature ID of the found ligature. If yes, we @@ -2517,9 +2636,18 @@ struct MarkMarkPosFormat1 hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; skippy_iter.reset (buffer->idx, 1); skippy_iter.set_lookup_props (c->lookup_props & ~LookupFlag::IgnoreFlags); - if (!skippy_iter.prev ()) return_trace (false); + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1); + return_trace (false); + } - if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx])) { return_trace (false); } + if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx])) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } unsigned int j = skippy_iter.idx; @@ -2544,11 +2672,16 @@ struct MarkMarkPosFormat1 } /* Didn't match. */ + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); return_trace (false); good: unsigned int mark2_index = (this+mark2Coverage).get_coverage (buffer->info[j].codepoint); - if (mark2_index == NOT_COVERED) return_trace (false); + if (mark2_index == NOT_COVERED) + { + buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1); + return_trace (false); + } return_trace ((this+mark1Array).apply (c, mark1_index, mark2_index, this+mark2Array, classCount, j)); } @@ -2951,7 +3084,7 @@ GPOS::position_finish_advances (hb_font_t *font HB_UNUSED, hb_buffer_t *buffer H } void -GPOS::position_finish_offsets (hb_font_t *font HB_UNUSED, hb_buffer_t *buffer) +GPOS::position_finish_offsets (hb_font_t *font, hb_buffer_t *buffer) { _hb_buffer_assert_gsubgpos_vars (buffer); @@ -2961,12 +3094,21 @@ GPOS::position_finish_offsets (hb_font_t *font HB_UNUSED, hb_buffer_t *buffer) /* Handle attachments */ if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT) - for (unsigned int i = 0; i < len; i++) + for (unsigned i = 0; i < len; i++) propagate_attachment_offsets (pos, len, i, direction); + + if (unlikely (font->slant)) + { + for (unsigned i = 0; i < len; i++) + if (unlikely (pos[i].y_offset)) + pos[i].x_offset += _hb_roundf (font->slant_xy * pos[i].y_offset); + } } -struct GPOS_accelerator_t : GPOS::accelerator_t {}; +struct GPOS_accelerator_t : GPOS::accelerator_t { + GPOS_accelerator_t (hb_face_t *face) : GPOS::accelerator_t (face) {} +}; /* Out-of-class implementation for methods recursing */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh index b7ce30135e..0b0bc547bd 100644 --- a/thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh @@ -826,22 +826,25 @@ struct Ligature unsigned int total_component_count = 0; - unsigned int match_length = 0; + unsigned int match_end = 0; unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; if (likely (!match_input (c, count, &component[1], match_glyph, nullptr, - &match_length, + &match_end, match_positions, &total_component_count))) + { + c->buffer->unsafe_to_concat (c->buffer->idx, match_end); return_trace (false); + } ligate_input (c, count, match_positions, - match_length, + match_end, ligGlyph, total_component_count); @@ -1296,7 +1299,7 @@ struct ReverseChainSingleSubstFormat1 match_lookahead (c, lookahead.len, (HBUINT16 *) lookahead.arrayZ, match_coverage, this, - 1, &end_index)) + c->buffer->idx + 1, &end_index)) { c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index); c->replace_glyph_inplace (substitute[index]); @@ -1305,8 +1308,11 @@ struct ReverseChainSingleSubstFormat1 * calls us through a Context lookup. */ return_trace (true); } - - return_trace (false); + else + { + c->buffer->unsafe_to_concat_from_outbuffer (start_index, end_index); + return_trace (false); + } } template<typename Iterator, @@ -1739,7 +1745,9 @@ struct GSUB : GSUBGPOS }; -struct GSUB_accelerator_t : GSUB::accelerator_t {}; +struct GSUB_accelerator_t : GSUB::accelerator_t { + GSUB_accelerator_t (hb_face_t *face) : GSUB::accelerator_t (face) {} +}; /* Out-of-class implementation for methods recursing */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh index 191d3bebc5..65de131f85 100644 --- a/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh +++ b/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh @@ -125,24 +125,31 @@ struct hb_closure_context_t : hb_set_t *covered_glyph_set = done_lookups_glyph_set->get (lookup_index); if (unlikely (covered_glyph_set->in_error ())) return true; - if (parent_active_glyphs ()->is_subset (*covered_glyph_set)) + if (parent_active_glyphs ().is_subset (*covered_glyph_set)) return true; - hb_set_union (covered_glyph_set, parent_active_glyphs ()); + covered_glyph_set->union_ (parent_active_glyphs ()); return false; } - hb_set_t* parent_active_glyphs () + const hb_set_t& previous_parent_active_glyphs () { + if (active_glyphs_stack.length <= 1) + return *glyphs; + + return active_glyphs_stack[active_glyphs_stack.length - 2]; + } + + const hb_set_t& parent_active_glyphs () { - if (active_glyphs_stack.length < 1) - return glyphs; + if (!active_glyphs_stack) + return *glyphs; return active_glyphs_stack.tail (); } - void push_cur_active_glyphs (hb_set_t* cur_active_glyph_set) + hb_set_t& push_cur_active_glyphs () { - active_glyphs_stack.push (cur_active_glyph_set); + return *active_glyphs_stack.push (); } bool pop_cur_done_glyphs () @@ -156,29 +163,24 @@ struct hb_closure_context_t : hb_face_t *face; hb_set_t *glyphs; - hb_set_t *cur_intersected_glyphs; hb_set_t output[1]; - hb_vector_t<hb_set_t *> active_glyphs_stack; + hb_vector_t<hb_set_t> active_glyphs_stack; recurse_func_t recurse_func; unsigned int nesting_level_left; hb_closure_context_t (hb_face_t *face_, hb_set_t *glyphs_, - hb_set_t *cur_intersected_glyphs_, hb_map_t *done_lookups_glyph_count_, hb_hashmap_t<unsigned, hb_set_t *> *done_lookups_glyph_set_, unsigned int nesting_level_left_ = HB_MAX_NESTING_LEVEL) : face (face_), glyphs (glyphs_), - cur_intersected_glyphs (cur_intersected_glyphs_), recurse_func (nullptr), nesting_level_left (nesting_level_left_), done_lookups_glyph_count (done_lookups_glyph_count_), done_lookups_glyph_set (done_lookups_glyph_set_), lookup_count (0) - { - push_cur_active_glyphs (glyphs_); - } + {} ~hb_closure_context_t () { flush (); } @@ -186,11 +188,11 @@ struct hb_closure_context_t : void flush () { - hb_set_del_range (output, face->get_num_glyphs (), HB_SET_VALUE_INVALID); /* Remove invalid glyphs. */ - hb_set_union (glyphs, output); - hb_set_clear (output); + output->del_range (face->get_num_glyphs (), HB_SET_VALUE_INVALID); /* Remove invalid glyphs. */ + glyphs->union_ (*output); + output->clear (); active_glyphs_stack.pop (); - active_glyphs_stack.fini (); + active_glyphs_stack.reset (); } private: @@ -520,7 +522,7 @@ struct hb_ot_apply_context_t : may_skip (const hb_glyph_info_t &info) const { return matcher.may_skip (c, info); } - bool next () + bool next (unsigned *unsafe_to = nullptr) { assert (num_items > 0); while (idx + num_items < end) @@ -543,11 +545,17 @@ struct hb_ot_apply_context_t : } if (skip == matcher_t::SKIP_NO) + { + if (unsafe_to) + *unsafe_to = idx + 1; return false; + } } + if (unsafe_to) + *unsafe_to = end; return false; } - bool prev () + bool prev (unsigned *unsafe_from = nullptr) { assert (num_items > 0); while (idx > num_items - 1) @@ -570,8 +578,14 @@ struct hb_ot_apply_context_t : } if (skip == matcher_t::SKIP_NO) + { + if (unsafe_from) + *unsafe_from = hb_max (1u, idx) - 1u; return false; + } } + if (unsafe_from) + *unsafe_from = 0; return false; } @@ -712,53 +726,60 @@ struct hb_ot_apply_context_t : return true; } - void _set_glyph_props (hb_codepoint_t glyph_index, + void _set_glyph_class (hb_codepoint_t glyph_index, unsigned int class_guess = 0, bool ligature = false, bool component = false) const { - unsigned int add_in = _hb_glyph_info_get_glyph_props (&buffer->cur()) & - HB_OT_LAYOUT_GLYPH_PROPS_PRESERVE; - add_in |= HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED; + unsigned int props = _hb_glyph_info_get_glyph_props (&buffer->cur()); + props |= HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED; if (ligature) { - add_in |= HB_OT_LAYOUT_GLYPH_PROPS_LIGATED; + props |= HB_OT_LAYOUT_GLYPH_PROPS_LIGATED; /* In the only place that the MULTIPLIED bit is used, Uniscribe * seems to only care about the "last" transformation between * Ligature and Multiple substitutions. Ie. if you ligate, expand, * and ligate again, it forgives the multiplication and acts as * if only ligation happened. As such, clear MULTIPLIED bit. */ - add_in &= ~HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED; + props &= ~HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED; } if (component) - add_in |= HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED; + props |= HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED; if (likely (has_glyph_classes)) - _hb_glyph_info_set_glyph_props (&buffer->cur(), add_in | gdef.get_glyph_props (glyph_index)); + { + props &= HB_OT_LAYOUT_GLYPH_PROPS_PRESERVE; + _hb_glyph_info_set_glyph_props (&buffer->cur(), props | gdef.get_glyph_props (glyph_index)); + } else if (class_guess) - _hb_glyph_info_set_glyph_props (&buffer->cur(), add_in | class_guess); + { + props &= HB_OT_LAYOUT_GLYPH_PROPS_PRESERVE; + _hb_glyph_info_set_glyph_props (&buffer->cur(), props | class_guess); + } + else + _hb_glyph_info_set_glyph_props (&buffer->cur(), props); } void replace_glyph (hb_codepoint_t glyph_index) const { - _set_glyph_props (glyph_index); + _set_glyph_class (glyph_index); (void) buffer->replace_glyph (glyph_index); } void replace_glyph_inplace (hb_codepoint_t glyph_index) const { - _set_glyph_props (glyph_index); + _set_glyph_class (glyph_index); buffer->cur().codepoint = glyph_index; } void replace_glyph_with_ligature (hb_codepoint_t glyph_index, unsigned int class_guess) const { - _set_glyph_props (glyph_index, class_guess, true); + _set_glyph_class (glyph_index, class_guess, true); (void) buffer->replace_glyph (glyph_index); } void output_glyph_for_component (hb_codepoint_t glyph_index, unsigned int class_guess) const { - _set_glyph_props (glyph_index, class_guess, false, true); + _set_glyph_class (glyph_index, class_guess, false, true); (void) buffer->output_glyph (glyph_index); } }; @@ -948,7 +969,7 @@ static inline bool match_input (hb_ot_apply_context_t *c, const HBUINT16 input[], /* Array of input values--start with second glyph */ match_func_t match_func, const void *match_data, - unsigned int *end_offset, + unsigned int *end_position, unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], unsigned int *p_total_component_count = nullptr) { @@ -1001,7 +1022,12 @@ static inline bool match_input (hb_ot_apply_context_t *c, match_positions[0] = buffer->idx; for (unsigned int i = 1; i < count; i++) { - if (!skippy_iter.next ()) return_trace (false); + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + *end_position = unsafe_to; + return_trace (false); + } match_positions[i] = skippy_iter.idx; @@ -1055,7 +1081,7 @@ static inline bool match_input (hb_ot_apply_context_t *c, total_component_count += _hb_glyph_info_get_lig_num_comps (&buffer->info[skippy_iter.idx]); } - *end_offset = skippy_iter.idx - buffer->idx + 1; + *end_position = skippy_iter.idx + 1; if (p_total_component_count) *p_total_component_count = total_component_count; @@ -1065,7 +1091,7 @@ static inline bool match_input (hb_ot_apply_context_t *c, static inline bool ligate_input (hb_ot_apply_context_t *c, unsigned int count, /* Including the first glyph */ const unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */ - unsigned int match_length, + unsigned int match_end, hb_codepoint_t lig_glyph, unsigned int total_component_count) { @@ -1073,7 +1099,7 @@ static inline bool ligate_input (hb_ot_apply_context_t *c, hb_buffer_t *buffer = c->buffer; - buffer->merge_clusters (buffer->idx, buffer->idx + match_length); + buffer->merge_clusters (buffer->idx, match_end); /* - If a base and one or more marks ligate, consider that as a base, NOT * ligature, such that all following marks can still attach to it. @@ -1190,11 +1216,16 @@ static inline bool match_backtrack (hb_ot_apply_context_t *c, skippy_iter.set_match_func (match_func, match_data, backtrack); for (unsigned int i = 0; i < count; i++) - if (!skippy_iter.prev ()) + { + unsigned unsafe_from; + if (!skippy_iter.prev (&unsafe_from)) + { + *match_start = unsafe_from; return_trace (false); + } + } *match_start = skippy_iter.idx; - return_trace (true); } @@ -1203,21 +1234,26 @@ static inline bool match_lookahead (hb_ot_apply_context_t *c, const HBUINT16 lookahead[], match_func_t match_func, const void *match_data, - unsigned int offset, + unsigned int start_index, unsigned int *end_index) { TRACE_APPLY (nullptr); hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_context; - skippy_iter.reset (c->buffer->idx + offset - 1, count); + skippy_iter.reset (start_index - 1, count); skippy_iter.set_match_func (match_func, match_data, lookahead); for (unsigned int i = 0; i < count; i++) - if (!skippy_iter.next ()) + { + unsigned unsafe_to; + if (!skippy_iter.next (&unsafe_to)) + { + *end_index = unsafe_to; return_trace (false); + } + } *end_index = skippy_iter.idx + 1; - return_trace (true); } @@ -1284,22 +1320,23 @@ static void context_closure_recurse_lookups (hb_closure_context_t *c, unsigned seqIndex = lookupRecord[i].sequenceIndex; if (seqIndex >= inputCount) continue; - hb_set_t *pos_glyphs = nullptr; + bool has_pos_glyphs = false; + hb_set_t pos_glyphs; if (hb_set_is_empty (covered_seq_indicies) || !hb_set_has (covered_seq_indicies, seqIndex)) { - pos_glyphs = hb_set_create (); + has_pos_glyphs = true; if (seqIndex == 0) { switch (context_format) { case ContextFormat::SimpleContext: - pos_glyphs->add (value); + pos_glyphs.add (value); break; case ContextFormat::ClassBasedContext: - intersected_glyphs_func (c->cur_intersected_glyphs, data, value, pos_glyphs); + intersected_glyphs_func (&c->parent_active_glyphs (), data, value, &pos_glyphs); break; case ContextFormat::CoverageBasedContext: - hb_set_set (pos_glyphs, c->cur_intersected_glyphs); + pos_glyphs.set (c->parent_active_glyphs ()); break; } } @@ -1313,12 +1350,16 @@ static void context_closure_recurse_lookups (hb_closure_context_t *c, input_value = input[seqIndex - 1]; } - intersected_glyphs_func (c->glyphs, input_data, input_value, pos_glyphs); + intersected_glyphs_func (c->glyphs, input_data, input_value, &pos_glyphs); } } - hb_set_add (covered_seq_indicies, seqIndex); - c->push_cur_active_glyphs (pos_glyphs ? pos_glyphs : c->glyphs); + covered_seq_indicies->add (seqIndex); + if (has_pos_glyphs) { + c->push_cur_active_glyphs () = pos_glyphs; + } else { + c->push_cur_active_glyphs ().set (*c->glyphs); + } unsigned endIndex = inputCount; if (context_format == ContextFormat::CoverageBasedContext) @@ -1327,8 +1368,6 @@ static void context_closure_recurse_lookups (hb_closure_context_t *c, c->recurse (lookupRecord[i].lookupListIndex, covered_seq_indicies, seqIndex, endIndex); c->pop_cur_done_glyphs (); - if (pos_glyphs) - hb_set_destroy (pos_glyphs); } hb_set_destroy (covered_seq_indicies); @@ -1343,15 +1382,13 @@ static inline void recurse_lookups (context_t *c, c->recurse (lookupRecord[i].lookupListIndex); } -static inline bool apply_lookup (hb_ot_apply_context_t *c, +static inline void apply_lookup (hb_ot_apply_context_t *c, unsigned int count, /* Including the first glyph */ unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */ unsigned int lookupCount, const LookupRecord lookupRecord[], /* Array of LookupRecords--in design order */ - unsigned int match_length) + unsigned int match_end) { - TRACE_APPLY (nullptr); - hb_buffer_t *buffer = c->buffer; int end; @@ -1359,7 +1396,7 @@ static inline bool apply_lookup (hb_ot_apply_context_t *c, * Adjust. */ { unsigned int bl = buffer->backtrack_len (); - end = bl + match_length; + end = bl + match_end - buffer->idx; int delta = bl - buffer->idx; /* Convert positions to new indexing. */ @@ -1461,8 +1498,6 @@ static inline bool apply_lookup (hb_ot_apply_context_t *c, } (void) buffer->move_to (end); - - return_trace (true); } @@ -1550,17 +1585,25 @@ static inline bool context_apply_lookup (hb_ot_apply_context_t *c, const LookupRecord lookupRecord[], ContextApplyLookupContext &lookup_context) { - unsigned int match_length = 0; - unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; - return match_input (c, - inputCount, input, - lookup_context.funcs.match, lookup_context.match_data, - &match_length, match_positions) - && (c->buffer->unsafe_to_break (c->buffer->idx, c->buffer->idx + match_length), - apply_lookup (c, - inputCount, match_positions, - lookupCount, lookupRecord, - match_length)); + unsigned match_end = 0; + unsigned match_positions[HB_MAX_CONTEXT_LENGTH]; + if (match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data, + &match_end, match_positions)) + { + c->buffer->unsafe_to_break (c->buffer->idx, match_end); + apply_lookup (c, + inputCount, match_positions, + lookupCount, lookupRecord, + match_end); + return true; + } + else + { + c->buffer->unsafe_to_concat (c->buffer->idx, match_end); + return false; + } } struct Rule @@ -1828,8 +1871,9 @@ struct ContextFormat1 void closure (hb_closure_context_t *c) const { - c->cur_intersected_glyphs->clear (); - get_coverage ().intersected_coverage_glyphs (c->parent_active_glyphs (), c->cur_intersected_glyphs); + hb_set_t* cur_active_glyphs = &c->push_cur_active_glyphs (); + get_coverage ().intersected_coverage_glyphs (&c->previous_parent_active_glyphs (), + cur_active_glyphs); struct ContextClosureLookupContext lookup_context = { {intersects_glyph, intersected_glyph}, @@ -1838,10 +1882,14 @@ struct ContextFormat1 }; + hb_zip (this+coverage, hb_range ((unsigned) ruleSet.len)) - | hb_filter (c->parent_active_glyphs (), hb_first) + | hb_filter ([&] (hb_codepoint_t _) { + return c->previous_parent_active_glyphs ().has (_); + }, hb_first) | hb_map ([&](const hb_pair_t<hb_codepoint_t, unsigned> _) { return hb_pair_t<unsigned, const RuleSet&> (_.first, this+ruleSet[_.second]); }) | hb_apply ([&] (const hb_pair_t<unsigned, const RuleSet&>& _) { _.second.closure (c, _.first, lookup_context); }) ; + + c->pop_cur_done_glyphs (); } void closure_lookups (hb_closure_lookups_context_t *c) const @@ -1989,8 +2037,9 @@ struct ContextFormat2 if (!(this+coverage).intersects (c->glyphs)) return; - c->cur_intersected_glyphs->clear (); - get_coverage ().intersected_coverage_glyphs (c->parent_active_glyphs (), c->cur_intersected_glyphs); + hb_set_t* cur_active_glyphs = &c->push_cur_active_glyphs (); + get_coverage ().intersected_coverage_glyphs (&c->previous_parent_active_glyphs (), + cur_active_glyphs); const ClassDef &class_def = this+classDef; @@ -2000,10 +2049,9 @@ struct ContextFormat2 &class_def }; - return + hb_enumerate (ruleSet) | hb_filter ([&] (unsigned _) - { return class_def.intersects_class (c->cur_intersected_glyphs, _); }, + { return class_def.intersects_class (&c->parent_active_glyphs (), _); }, hb_first) | hb_apply ([&] (const hb_pair_t<unsigned, const Offset16To<RuleSet>&> _) { @@ -2011,6 +2059,8 @@ struct ContextFormat2 rule_set.closure (c, _.first, lookup_context); }) ; + + c->pop_cur_done_glyphs (); } void closure_lookups (hb_closure_lookups_context_t *c) const @@ -2183,8 +2233,10 @@ struct ContextFormat3 if (!(this+coverageZ[0]).intersects (c->glyphs)) return; - c->cur_intersected_glyphs->clear (); - get_coverage ().intersected_coverage_glyphs (c->parent_active_glyphs (), c->cur_intersected_glyphs); + hb_set_t* cur_active_glyphs = &c->push_cur_active_glyphs (); + get_coverage ().intersected_coverage_glyphs (&c->previous_parent_active_glyphs (), + cur_active_glyphs); + const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount)); struct ContextClosureLookupContext lookup_context = { @@ -2196,6 +2248,8 @@ struct ContextFormat3 glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1), lookupCount, lookupRecord, 0, lookup_context); + + c->pop_cur_done_glyphs (); } void closure_lookups (hb_closure_lookups_context_t *c) const @@ -2452,25 +2506,38 @@ static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c, const LookupRecord lookupRecord[], ChainContextApplyLookupContext &lookup_context) { - unsigned int start_index = 0, match_length = 0, end_index = 0; - unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; - return match_input (c, - inputCount, input, - lookup_context.funcs.match, lookup_context.match_data[1], - &match_length, match_positions) - && match_backtrack (c, - backtrackCount, backtrack, - lookup_context.funcs.match, lookup_context.match_data[0], - &start_index) - && match_lookahead (c, - lookaheadCount, lookahead, - lookup_context.funcs.match, lookup_context.match_data[2], - match_length, &end_index) - && (c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index), - apply_lookup (c, - inputCount, match_positions, - lookupCount, lookupRecord, - match_length)); + unsigned end_index = c->buffer->idx; + unsigned match_end = 0; + unsigned match_positions[HB_MAX_CONTEXT_LENGTH]; + if (!(match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data[1], + &match_end, match_positions) && (end_index = match_end) + && match_lookahead (c, + lookaheadCount, lookahead, + lookup_context.funcs.match, lookup_context.match_data[2], + match_end, &end_index))) + { + c->buffer->unsafe_to_concat (c->buffer->idx, end_index); + return false; + } + + unsigned start_index = c->buffer->out_len; + if (!match_backtrack (c, + backtrackCount, backtrack, + lookup_context.funcs.match, lookup_context.match_data[0], + &start_index)) + { + c->buffer->unsafe_to_concat_from_outbuffer (start_index, end_index); + return false; + } + + c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index); + apply_lookup (c, + inputCount, match_positions, + lookupCount, lookupRecord, + match_end); + return true; } struct ChainRule @@ -2802,8 +2869,9 @@ struct ChainContextFormat1 void closure (hb_closure_context_t *c) const { - c->cur_intersected_glyphs->clear (); - get_coverage ().intersected_coverage_glyphs (c->parent_active_glyphs (), c->cur_intersected_glyphs); + hb_set_t* cur_active_glyphs = &c->push_cur_active_glyphs (); + get_coverage ().intersected_coverage_glyphs (&c->previous_parent_active_glyphs (), + cur_active_glyphs); struct ChainContextClosureLookupContext lookup_context = { {intersects_glyph, intersected_glyph}, @@ -2812,10 +2880,14 @@ struct ChainContextFormat1 }; + hb_zip (this+coverage, hb_range ((unsigned) ruleSet.len)) - | hb_filter (c->parent_active_glyphs (), hb_first) + | hb_filter ([&] (hb_codepoint_t _) { + return c->previous_parent_active_glyphs ().has (_); + }, hb_first) | hb_map ([&](const hb_pair_t<hb_codepoint_t, unsigned> _) { return hb_pair_t<unsigned, const ChainRuleSet&> (_.first, this+ruleSet[_.second]); }) | hb_apply ([&] (const hb_pair_t<unsigned, const ChainRuleSet&>& _) { _.second.closure (c, _.first, lookup_context); }) ; + + c->pop_cur_done_glyphs (); } void closure_lookups (hb_closure_lookups_context_t *c) const @@ -2964,8 +3036,10 @@ struct ChainContextFormat2 if (!(this+coverage).intersects (c->glyphs)) return; - c->cur_intersected_glyphs->clear (); - get_coverage ().intersected_coverage_glyphs (c->parent_active_glyphs (), c->cur_intersected_glyphs); + hb_set_t* cur_active_glyphs = &c->push_cur_active_glyphs (); + get_coverage ().intersected_coverage_glyphs (&c->previous_parent_active_glyphs (), + cur_active_glyphs); + const ClassDef &backtrack_class_def = this+backtrackClassDef; const ClassDef &input_class_def = this+inputClassDef; @@ -2979,10 +3053,9 @@ struct ChainContextFormat2 &lookahead_class_def} }; - return + hb_enumerate (ruleSet) | hb_filter ([&] (unsigned _) - { return input_class_def.intersects_class (c->cur_intersected_glyphs, _); }, + { return input_class_def.intersects_class (&c->parent_active_glyphs (), _); }, hb_first) | hb_apply ([&] (const hb_pair_t<unsigned, const Offset16To<ChainRuleSet>&> _) { @@ -2990,6 +3063,8 @@ struct ChainContextFormat2 chainrule_set.closure (c, _.first, lookup_context); }) ; + + c->pop_cur_done_glyphs (); } void closure_lookups (hb_closure_lookups_context_t *c) const @@ -3216,8 +3291,10 @@ struct ChainContextFormat3 if (!(this+input[0]).intersects (c->glyphs)) return; - c->cur_intersected_glyphs->clear (); - get_coverage ().intersected_coverage_glyphs (c->parent_active_glyphs (), c->cur_intersected_glyphs); + hb_set_t* cur_active_glyphs = &c->push_cur_active_glyphs (); + get_coverage ().intersected_coverage_glyphs (&c->previous_parent_active_glyphs (), + cur_active_glyphs); + const Array16OfOffset16To<Coverage> &lookahead = StructAfter<Array16OfOffset16To<Coverage>> (input); const Array16Of<LookupRecord> &lookup = StructAfter<Array16Of<LookupRecord>> (lookahead); @@ -3232,6 +3309,8 @@ struct ChainContextFormat3 lookahead.len, (const HBUINT16 *) lookahead.arrayZ, lookup.len, lookup.arrayZ, 0, lookup_context); + + c->pop_cur_done_glyphs (); } void closure_lookups (hb_closure_lookups_context_t *c) const @@ -3706,7 +3785,7 @@ struct GSUBGPOS for (unsigned i : feature_indices->iter ()) { hb_tag_t t = get_feature_tag (i); - if (t == unique_features.INVALID_KEY) continue; + if (t == HB_MAP_VALUE_INVALID) continue; if (!unique_features.has (t)) { hb_set_t* indices = hb_set_create (); @@ -3839,7 +3918,7 @@ struct GSUBGPOS template <typename T> struct accelerator_t { - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { this->table = hb_sanitize_context_t ().reference_table<T> (face); if (unlikely (this->table->is_blocklisted (this->table.get_blob (), face))) @@ -3861,8 +3940,7 @@ struct GSUBGPOS for (unsigned int i = 0; i < this->lookup_count; i++) this->accels[i].init (table->get_lookup (i)); } - - void fini () + ~accelerator_t () { for (unsigned int i = 0; i < this->lookup_count; i++) this->accels[i].fini (); diff --git a/thirdparty/harfbuzz/src/hb-ot-layout.cc b/thirdparty/harfbuzz/src/hb-ot-layout.cc index 60733648c1..a599eea6e9 100644 --- a/thirdparty/harfbuzz/src/hb-ot-layout.cc +++ b/thirdparty/harfbuzz/src/hb-ot-layout.cc @@ -1491,10 +1491,9 @@ hb_ot_layout_lookup_substitute_closure (hb_face_t *face, unsigned int lookup_index, hb_set_t *glyphs /* OUT */) { - hb_set_t cur_intersected_glyphs; hb_map_t done_lookups_glyph_count; hb_hashmap_t<unsigned, hb_set_t *> done_lookups_glyph_set; - OT::hb_closure_context_t c (face, glyphs, &cur_intersected_glyphs, &done_lookups_glyph_count, &done_lookups_glyph_set); + OT::hb_closure_context_t c (face, glyphs, &done_lookups_glyph_count, &done_lookups_glyph_set); const OT::SubstLookup& l = face->table.GSUB->table->get_lookup (lookup_index); @@ -1520,10 +1519,9 @@ hb_ot_layout_lookups_substitute_closure (hb_face_t *face, const hb_set_t *lookups, hb_set_t *glyphs /* OUT */) { - hb_set_t cur_intersected_glyphs; hb_map_t done_lookups_glyph_count; hb_hashmap_t<unsigned, hb_set_t *> done_lookups_glyph_set; - OT::hb_closure_context_t c (face, glyphs, &cur_intersected_glyphs, &done_lookups_glyph_count, &done_lookups_glyph_set); + OT::hb_closure_context_t c (face, glyphs, &done_lookups_glyph_count, &done_lookups_glyph_set); const OT::GSUB& gsub = *face->table.GSUB->table; unsigned int iteration_count = 0; @@ -1890,7 +1888,7 @@ apply_string (OT::hb_ot_apply_context_t *c, apply_forward (c, accel); if (!Proxy::inplace) - buffer->swap_buffers (); + buffer->sync (); } else { diff --git a/thirdparty/harfbuzz/src/hb-ot-layout.hh b/thirdparty/harfbuzz/src/hb-ot-layout.hh index 2c825e0c81..ede8f007db 100644 --- a/thirdparty/harfbuzz/src/hb-ot-layout.hh +++ b/thirdparty/harfbuzz/src/hb-ot-layout.hh @@ -482,10 +482,9 @@ _hb_glyph_info_get_lig_num_comps (const hb_glyph_info_t *info) } static inline uint8_t -_hb_allocate_lig_id (hb_buffer_t *buffer) { +_hb_allocate_lig_id (hb_buffer_t *buffer) +{ uint8_t lig_id = buffer->next_serial () & 0x07; - if (unlikely (!lig_id)) - lig_id = _hb_allocate_lig_id (buffer); /* in case of overflow */ return lig_id; } diff --git a/thirdparty/harfbuzz/src/hb-ot-meta-table.hh b/thirdparty/harfbuzz/src/hb-ot-meta-table.hh index e31447f8fc..93e64c5327 100644 --- a/thirdparty/harfbuzz/src/hb-ot-meta-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-meta-table.hh @@ -71,9 +71,9 @@ struct meta struct accelerator_t { - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { table = hb_sanitize_context_t ().reference_table<meta> (face); } - void fini () { table.destroy (); } + ~accelerator_t () { table.destroy (); } hb_blob_t *reference_entry (hb_tag_t tag) const { return table->dataMaps.lsearch (tag).reference_entry (table.get_blob ()); } @@ -119,7 +119,9 @@ struct meta DEFINE_SIZE_ARRAY (16, dataMaps); }; -struct meta_accelerator_t : meta::accelerator_t {}; +struct meta_accelerator_t : meta::accelerator_t { + meta_accelerator_t (hb_face_t *face) : meta::accelerator_t (face) {} +}; } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-metrics.cc b/thirdparty/harfbuzz/src/hb-ot-metrics.cc index dbd4a1ffbe..103808cf91 100644 --- a/thirdparty/harfbuzz/src/hb-ot-metrics.cc +++ b/thirdparty/harfbuzz/src/hb-ot-metrics.cc @@ -160,9 +160,50 @@ hb_ot_metrics_get_position (hb_font_t *font, (position && (*position = font->em_scalef_y (face->table.TABLE->ATTR + GET_VAR)), true)) case HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_ASCENT: return GET_METRIC_Y (OS2, usWinAscent); case HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_DESCENT: return GET_METRIC_Y (OS2, usWinDescent); - case HB_OT_METRICS_TAG_HORIZONTAL_CARET_RISE: return GET_METRIC_Y (hhea, caretSlopeRise); - case HB_OT_METRICS_TAG_HORIZONTAL_CARET_RUN: return GET_METRIC_X (hhea, caretSlopeRun); + + case HB_OT_METRICS_TAG_HORIZONTAL_CARET_RISE: + case HB_OT_METRICS_TAG_HORIZONTAL_CARET_RUN: + { + unsigned mult = 1u; + + if (font->slant) + { + unsigned rise = face->table.hhea->caretSlopeRise; + unsigned upem = face->get_upem (); + mult = (rise && rise < upem) ? hb_min (upem / rise, 256u) : 1u; + } + + if (metrics_tag == HB_OT_METRICS_TAG_HORIZONTAL_CARET_RISE) + { + bool ret = GET_METRIC_Y (hhea, caretSlopeRise); + + if (position) + *position *= mult; + + return ret; + } + else + { + hb_position_t rise = 0; + + if (font->slant && position && GET_METRIC_Y (hhea, caretSlopeRise)) + rise = *position; + + bool ret = GET_METRIC_X (hhea, caretSlopeRun); + + if (position) + { + *position *= mult; + + if (font->slant) + *position += _hb_roundf (mult * font->slant_xy * rise); + } + + return ret; + } + } case HB_OT_METRICS_TAG_HORIZONTAL_CARET_OFFSET: return GET_METRIC_X (hhea, caretOffset); + #ifndef HB_NO_VERTICAL case HB_OT_METRICS_TAG_VERTICAL_CARET_RISE: return GET_METRIC_X (vhea, caretSlopeRise); case HB_OT_METRICS_TAG_VERTICAL_CARET_RUN: return GET_METRIC_Y (vhea, caretSlopeRun); diff --git a/thirdparty/harfbuzz/src/hb-ot-name-table.hh b/thirdparty/harfbuzz/src/hb-ot-name-table.hh index c17bb4abb8..d52367e9b1 100644 --- a/thirdparty/harfbuzz/src/hb-ot-name-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-name-table.hh @@ -256,7 +256,7 @@ struct name }) ; - name_prime->serialize (c->serializer, it, hb_addressof (this + stringOffset)); + name_prime->serialize (c->serializer, it, std::addressof (this + stringOffset)); return_trace (name_prime->count); } @@ -279,7 +279,7 @@ struct name struct accelerator_t { - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { this->table = hb_sanitize_context_t ().reference_table<name> (face); assert (this->table.get_length () >= this->table->stringOffset); @@ -288,7 +288,6 @@ struct name const hb_array_t<const NameRecord> all_names (this->table->nameRecordZ.arrayZ, this->table->count); - this->names.init (); this->names.alloc (all_names.length); for (unsigned int i = 0; i < all_names.length; i++) @@ -318,10 +317,8 @@ struct name } this->names.resize (j); } - - void fini () + ~accelerator_t () { - this->names.fini (); this->table.destroy (); } @@ -373,7 +370,9 @@ struct name #undef entry_index #undef entry_score -struct name_accelerator_t : name::accelerator_t {}; +struct name_accelerator_t : name::accelerator_t { + name_accelerator_t (hb_face_t *face) : name::accelerator_t (face) {} +}; } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-post-table-v2subset.hh b/thirdparty/harfbuzz/src/hb-ot-post-table-v2subset.hh index 504de2de74..0f3cd8e24f 100644 --- a/thirdparty/harfbuzz/src/hb-ot-post-table-v2subset.hh +++ b/thirdparty/harfbuzz/src/hb-ot-post-table-v2subset.hh @@ -76,8 +76,7 @@ HB_INTERNAL bool postV2Tail::subset (hb_subset_context_t *c) const hb_map_t old_new_index_map, old_gid_new_index_map; unsigned i = 0; - post::accelerator_t _post; - _post.init (c->plan->source); + post::accelerator_t _post (c->plan->source); hb_hashmap_t<hb_bytes_t, unsigned, std::nullptr_t, unsigned, nullptr, (unsigned)-1> glyph_name_to_new_index; for (hb_codepoint_t new_gid = 0; new_gid < num_glyphs; new_gid++) @@ -128,9 +127,7 @@ HB_INTERNAL bool postV2Tail::subset (hb_subset_context_t *c) const }) ; - bool ret = serialize (c->serializer, index_iter, &_post); - _post.fini (); - return_trace (ret); + return_trace (serialize (c->serializer, index_iter, &_post)); } } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-post-table.hh b/thirdparty/harfbuzz/src/hb-ot-post-table.hh index 39de671707..a4844e94bc 100644 --- a/thirdparty/harfbuzz/src/hb-ot-post-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-post-table.hh @@ -111,10 +111,9 @@ struct post struct accelerator_t { friend struct postV2Tail; - void init (hb_face_t *face) - { - index_to_offset.init (); + accelerator_t (hb_face_t *face) + { table = hb_sanitize_context_t ().reference_table<post> (face); unsigned int table_length = table.get_length (); @@ -132,9 +131,8 @@ struct post data += 1 + *data) index_to_offset.push (data - pool); } - void fini () + ~accelerator_t () { - index_to_offset.fini (); hb_free (gids_sorted_by_name.get ()); table.destroy (); } @@ -254,9 +252,9 @@ struct post private: uint32_t version; - const Array16Of<HBUINT16> *glyphNameIndex; + const Array16Of<HBUINT16> *glyphNameIndex = nullptr; hb_vector_t<uint32_t> index_to_offset; - const uint8_t *pool; + const uint8_t *pool = nullptr; hb_atomic_ptr_t<uint16_t *> gids_sorted_by_name; }; @@ -307,7 +305,10 @@ struct post DEFINE_SIZE_MIN (32); }; -struct post_accelerator_t : post::accelerator_t {}; +struct post_accelerator_t : post::accelerator_t { + post_accelerator_t (hb_face_t *face) : post::accelerator_t (face) {} +}; + } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh index 41e3dd38ab..429974d05b 100644 --- a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh @@ -87,6 +87,8 @@ #define OT_GLYPHID /* GlyphID */ \ OT_UINT16 +/* Shorthand. */ +#define G OT_GLYPHID #define OT_UARRAY(Name, Items) \ OT_LABEL_START(Name) \ @@ -183,8 +185,6 @@ Tag \ OT_OFFSET(manifest, Name) -/* Shorthand. */ -#define G OT_GLYPHID /* * Table Start @@ -300,14 +300,40 @@ OT_TABLE_END /* * Clean up */ + +#undef MANIFEST +#undef MANIFEST_LOOKUP + #undef OT_TABLE_START #undef OT_TABLE_END #undef OT_LABEL_START #undef OT_LABEL_END #undef OT_UINT8 #undef OT_UINT16 -#undef OT_DISTANCE #undef OT_COUNT +#undef OT_DISTANCE + +#undef OT_LABEL +#undef OT_LIST + +#undef OT_TAG +#undef OT_OFFSET +#undef OT_GLYPHID +#undef G +#undef OT_UARRAY +#undef OT_UHEADLESSARRAY + +#undef OT_LOOKUP_FLAG_IGNORE_MARKS +#undef OT_LOOKUP +#undef OT_SUBLOOKUP +#undef OT_COVERAGE1 +#undef OT_LOOKUP_TYPE_SUBST_SINGLE +#undef OT_LOOKUP_TYPE_SUBST_LIGATURE +#undef OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2 +#undef OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1 +#undef OT_LIGATURE_SET +#undef OT_LIGATURE + /* * Include a second time to get the table data... diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc index 222c5d6b71..2298aa92f2 100644 --- a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc @@ -321,6 +321,20 @@ arabic_joining (hb_buffer_t *buffer) info[prev].arabic_shaping_action() = entry->prev_action; buffer->unsafe_to_break (prev, i + 1); } + else + { + if (prev == UINT_MAX) + { + if (this_type >= JOINING_TYPE_R) + buffer->unsafe_to_concat_from_outbuffer (0, i + 1); + } + else + { + if (this_type >= JOINING_TYPE_R || + (2 <= state && state <= 5) /* States that have a possible prev_action. */) + buffer->unsafe_to_concat (prev, i + 1); + } + } info[i].arabic_shaping_action() = entry->curr_action; @@ -337,7 +351,14 @@ arabic_joining (hb_buffer_t *buffer) const arabic_state_table_entry *entry = &arabic_state_table[state][this_type]; if (entry->prev_action != NONE && prev != UINT_MAX) + { info[prev].arabic_shaping_action() = entry->prev_action; + buffer->unsafe_to_break (prev, buffer->len); + } + else if (2 <= state && state <= 5) /* States that have a possible prev_action. */ + { + buffer->unsafe_to_concat (prev, buffer->len); + } break; } } diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc index 0d84a76b85..3bc9e9b961 100644 --- a/thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc @@ -140,7 +140,7 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan HB_UNUSED, * * - LV can be precomposed, or decomposed. Lets call those * <LV> and <L,V>, - * - LVT can be fully precomposed, partically precomposed, or + * - LVT can be fully precomposed, partially precomposed, or * fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>. * * The composition / decomposition is mechanical. However, not @@ -392,7 +392,7 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan HB_UNUSED, */ (void) buffer->next_glyph (); } - buffer->swap_buffers (); + buffer->sync (); } static void diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-syllabic.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-syllabic.cc index 5a08f878dc..76092c7f38 100644 --- a/thirdparty/harfbuzz/src/hb-ot-shape-complex-syllabic.cc +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-syllabic.cc @@ -96,7 +96,7 @@ hb_syllabic_insert_dotted_circles (hb_font_t *font, else (void) buffer->next_glyph (); } - buffer->swap_buffers (); + buffer->sync (); } diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc index 4c3068173b..a1e27a83be 100644 --- a/thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc @@ -364,7 +364,7 @@ preprocess_text_thai (const hb_ot_shape_plan_t *plan, buffer->merge_out_clusters (start - 1, end); } } - buffer->swap_buffers (); + buffer->sync (); /* If font has Thai GSUB, we are done. */ if (plan->props.script == HB_SCRIPT_THAI && !plan->map.found_script[0]) diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc index 045731dfb4..d2cca105a4 100644 --- a/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc @@ -435,7 +435,7 @@ _hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED, default: break; } - buffer->swap_buffers (); + buffer->sync (); } diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc b/thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc index eb1bc79768..671f30327f 100644 --- a/thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc +++ b/thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc @@ -446,6 +446,9 @@ _hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan, return; #endif + if (!buffer->message (font, "start fallback mark")) + return; + _hb_buffer_assert_gsubgpos_vars (buffer); unsigned int start = 0; @@ -457,6 +460,8 @@ _hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan, start = i; } position_cluster (plan, font, buffer, start, count, adjust_offsets_when_zeroing); + + (void) buffer->message (font, "end fallback mark"); } @@ -492,6 +497,9 @@ _hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan, #endif #ifndef HB_DISABLE_DEPRECATED + if (!buffer->message (font, "start fallback kern")) + return; + if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction) ? !font->has_glyph_h_kerning_func () : !font->has_glyph_v_kerning_func ()) @@ -508,6 +516,8 @@ _hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan, if (reverse) buffer->reverse (); + + (void) buffer->message (font, "end fallback kern"); #endif } @@ -525,6 +535,15 @@ _hb_ot_shape_fallback_spaces (const hb_ot_shape_plan_t *plan HB_UNUSED, for (unsigned int i = 0; i < count; i++) if (_hb_glyph_info_is_unicode_space (&info[i]) && !_hb_glyph_info_ligated (&info[i])) { + /* If font had no ASCII space and we used the invisible glyph, give it a 1/4 EM default advance. */ + if (buffer->invisible && info[i].codepoint == buffer->invisible) + { + if (horizontal) + pos[i].x_advance = +font->x_scale / 4; + else + pos[i].y_advance = -font->y_scale / 4; + } + hb_unicode_funcs_t::space_t space_type = _hb_glyph_info_get_unicode_space_fallback_type (&info[i]); hb_codepoint_t glyph; typedef hb_unicode_funcs_t t; diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc b/thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc index 839cc9122c..aa5a8eeaa3 100644 --- a/thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc +++ b/thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc @@ -193,7 +193,8 @@ decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shor { hb_codepoint_t space_glyph; hb_unicode_funcs_t::space_t space_type = buffer->unicode->space_fallback_type (u); - if (space_type != hb_unicode_funcs_t::NOT_SPACE && c->font->get_nominal_glyph (0x0020u, &space_glyph)) + if (space_type != hb_unicode_funcs_t::NOT_SPACE && + (c->font->get_nominal_glyph (0x0020, &space_glyph) || (space_glyph = buffer->invisible))) { _hb_glyph_info_set_unicode_space_fallback_type (&buffer->cur(), space_type); next_char (buffer, space_glyph); @@ -374,7 +375,7 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, decompose_multi_char_cluster (&c, end, always_short_circuit); } while (buffer->idx < count && buffer->successful); - buffer->swap_buffers (); + buffer->sync (); } @@ -477,7 +478,7 @@ _hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, if (info_cc (buffer->prev()) == 0) starter = buffer->out_len - 1; } - buffer->swap_buffers (); + buffer->sync (); } } diff --git a/thirdparty/harfbuzz/src/hb-ot-shape.cc b/thirdparty/harfbuzz/src/hb-ot-shape.cc index 4dde3520d8..4bd8aaf03b 100644 --- a/thirdparty/harfbuzz/src/hb-ot-shape.cc +++ b/thirdparty/harfbuzz/src/hb-ot-shape.cc @@ -566,7 +566,7 @@ hb_insert_dotted_circle (hb_buffer_t *buffer, hb_font_t *font) info.mask = buffer->cur().mask; (void) buffer->output_info (info); - buffer->swap_buffers (); + buffer->sync (); } static void @@ -1034,7 +1034,7 @@ hb_ot_position_complex (const hb_ot_shape_context_t *c) * hanging over the next glyph after the final reordering. * * Note: If fallback positinoing happens, we don't care about - * this as it will be overriden. + * this as it will be overridden. */ bool adjust_offsets_when_zeroing = c->plan->adjust_mark_positioning_when_zeroing && HB_DIRECTION_IS_FORWARD (c->buffer->props.direction); @@ -1120,7 +1120,7 @@ hb_propagate_flags (hb_buffer_t *buffer) /* Propagate cluster-level glyph flags to be the same on all cluster glyphs. * Simplifies using them. */ - if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK)) + if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GLYPH_FLAGS)) return; hb_glyph_info_t *info = buffer->info; @@ -1129,11 +1129,7 @@ hb_propagate_flags (hb_buffer_t *buffer) { unsigned int mask = 0; for (unsigned int i = start; i < end; i++) - if (info[i].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) - { - mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK; - break; - } + mask |= info[i].mask & HB_GLYPH_FLAG_DEFINED; if (mask) for (unsigned int i = start; i < end; i++) info[i].mask |= mask; @@ -1145,18 +1141,7 @@ hb_propagate_flags (hb_buffer_t *buffer) static void hb_ot_shape_internal (hb_ot_shape_context_t *c) { - c->buffer->deallocate_var_all (); - c->buffer->scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT; - if (likely (!hb_unsigned_mul_overflows (c->buffer->len, HB_BUFFER_MAX_LEN_FACTOR))) - { - c->buffer->max_len = hb_max (c->buffer->len * HB_BUFFER_MAX_LEN_FACTOR, - (unsigned) HB_BUFFER_MAX_LEN_MIN); - } - if (likely (!hb_unsigned_mul_overflows (c->buffer->len, HB_BUFFER_MAX_OPS_FACTOR))) - { - c->buffer->max_ops = hb_max (c->buffer->len * HB_BUFFER_MAX_OPS_FACTOR, - (unsigned) HB_BUFFER_MAX_OPS_MIN); - } + c->buffer->enter (); /* Save the original direction, we use it later. */ c->target_direction = c->buffer->props.direction; @@ -1188,9 +1173,7 @@ hb_ot_shape_internal (hb_ot_shape_context_t *c) c->buffer->props.direction = c->target_direction; - c->buffer->max_len = HB_BUFFER_MAX_LEN_DEFAULT; - c->buffer->max_ops = HB_BUFFER_MAX_OPS_DEFAULT; - c->buffer->deallocate_var_all (); + c->buffer->leave (); } diff --git a/thirdparty/harfbuzz/src/hb-ot-tag-table.hh b/thirdparty/harfbuzz/src/hb-ot-tag-table.hh index 2c6316df4f..61d2814e93 100644 --- a/thirdparty/harfbuzz/src/hb-ot-tag-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-tag-table.hh @@ -6,8 +6,8 @@ * * on files with these headers: * - * <meta name="updated_at" content="2021-12-09 12:01 AM" /> - * File-Date: 2021-08-06 + * <meta name="updated_at" content="2022-01-28 10:00 PM" /> + * File-Date: 2021-12-29 */ #ifndef HB_OT_TAG_TABLE_HH @@ -66,7 +66,7 @@ static const LangTag ot_languages[] = { {"an", HB_TAG('A','R','G',' ')}, /* Aragonese */ /*{"ang", HB_TAG('A','N','G',' ')},*/ /* Old English (ca. 450-1100) -> Anglo-Saxon */ {"aoa", HB_TAG('C','P','P',' ')}, /* Angolar -> Creoles */ - {"apa", HB_TAG('A','T','H',' ')}, /* Apache [family] -> Athapaskan */ + {"apa", HB_TAG('A','T','H',' ')}, /* Apache [collection] -> Athapaskan */ {"apc", HB_TAG('A','R','A',' ')}, /* North Levantine Arabic -> Arabic */ {"apd", HB_TAG('A','R','A',' ')}, /* Sudanese Arabic -> Arabic */ {"apj", HB_TAG('A','T','H',' ')}, /* Jicarilla Apache -> Athapaskan */ @@ -86,7 +86,7 @@ static const LangTag ot_languages[] = { {"arz", HB_TAG('A','R','A',' ')}, /* Egyptian Arabic -> Arabic */ {"as", HB_TAG('A','S','M',' ')}, /* Assamese */ /*{"ast", HB_TAG('A','S','T',' ')},*/ /* Asturian */ -/*{"ath", HB_TAG('A','T','H',' ')},*/ /* Athapascan [family] -> Athapaskan */ +/*{"ath", HB_TAG('A','T','H',' ')},*/ /* Athapascan [collection] -> Athapaskan */ {"atj", HB_TAG('R','C','R',' ')}, /* Atikamekw -> R-Cree */ {"atv", HB_TAG('A','L','T',' ')}, /* Northern Altai -> Altai */ {"auj", HB_TAG('B','B','R',' ')}, /* Awjilah -> Berber */ @@ -110,10 +110,10 @@ static const LangTag ot_languages[] = { {"azn", HB_TAG('N','A','H',' ')}, /* Western Durango Nahuatl -> Nahuatl */ {"azz", HB_TAG('N','A','H',' ')}, /* Highland Puebla Nahuatl -> Nahuatl */ {"ba", HB_TAG('B','S','H',' ')}, /* Bashkir */ - {"bad", HB_TAG('B','A','D','0')}, /* Banda [family] */ + {"bad", HB_TAG('B','A','D','0')}, /* Banda [collection] */ {"bag", HB_TAG_NONE }, /* Tuki != Baghelkhandi */ {"bah", HB_TAG('C','P','P',' ')}, /* Bahamas Creole English -> Creoles */ - {"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [family] */ + {"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [collection] */ {"bal", HB_TAG('B','L','I',' ')}, /* Baluchi [macrolanguage] */ /*{"ban", HB_TAG('B','A','N',' ')},*/ /* Balinese */ /*{"bar", HB_TAG('B','A','R',' ')},*/ /* Bavarian */ @@ -135,7 +135,7 @@ static const LangTag ot_languages[] = { {"bea", HB_TAG('A','T','H',' ')}, /* Beaver -> Athapaskan */ {"beb", HB_TAG('B','T','I',' ')}, /* Bebele -> Beti */ /*{"bem", HB_TAG('B','E','M',' ')},*/ /* Bemba (Zambia) */ - {"ber", HB_TAG('B','B','R',' ')}, /* Berber [family] */ + {"ber", HB_TAG('B','B','R',' ')}, /* Berber [collection] */ {"bew", HB_TAG('C','P','P',' ')}, /* Betawi -> Creoles */ {"bfl", HB_TAG('B','A','D','0')}, /* Banda-NdĂ©lĂ© -> Banda */ {"bfq", HB_TAG('B','A','D',' ')}, /* Badaga */ @@ -203,7 +203,7 @@ static const LangTag ot_languages[] = { {"btd", HB_TAG('B','T','K',' ')}, /* Batak Dairi -> Batak */ {"bti", HB_TAG_NONE }, /* Burate != Beti */ {"btj", HB_TAG('M','L','Y',' ')}, /* Bacanese Malay -> Malay */ -/*{"btk", HB_TAG('B','T','K',' ')},*/ /* Batak [family] */ +/*{"btk", HB_TAG('B','T','K',' ')},*/ /* Batak [collection] */ {"btm", HB_TAG('B','T','M',' ')}, /* Batak Mandailing */ {"btm", HB_TAG('B','T','K',' ')}, /* Batak Mandailing -> Batak */ {"bto", HB_TAG('B','I','K',' ')}, /* Rinconada Bikol -> Bikol */ @@ -256,6 +256,8 @@ static const LangTag ot_languages[] = { {"chh", HB_TAG_NONE }, /* Chinook != Chattisgarhi */ {"chj", HB_TAG('C','C','H','N')}, /* OjitlĂ¡n Chinantec -> Chinantec */ {"chk", HB_TAG('C','H','K','0')}, /* Chuukese */ + {"chm", HB_TAG('H','M','A',' ')}, /* Mari (Russia) [macrolanguage] -> High Mari */ + {"chm", HB_TAG('L','M','A',' ')}, /* Mari (Russia) [macrolanguage] -> Low Mari */ {"chn", HB_TAG('C','P','P',' ')}, /* Chinook jargon -> Creoles */ /*{"cho", HB_TAG('C','H','O',' ')},*/ /* Choctaw */ {"chp", HB_TAG('C','H','P',' ')}, /* Chipewyan */ @@ -297,10 +299,10 @@ static const LangTag ot_languages[] = { /*{"cop", HB_TAG('C','O','P',' ')},*/ /* Coptic */ {"coq", HB_TAG('A','T','H',' ')}, /* Coquille -> Athapaskan */ {"cpa", HB_TAG('C','C','H','N')}, /* Palantla Chinantec -> Chinantec */ - {"cpe", HB_TAG('C','P','P',' ')}, /* English-based creoles and pidgins [family] -> Creoles */ - {"cpf", HB_TAG('C','P','P',' ')}, /* French-based creoles and pidgins [family] -> Creoles */ + {"cpe", HB_TAG('C','P','P',' ')}, /* English-based creoles and pidgins [collection] -> Creoles */ + {"cpf", HB_TAG('C','P','P',' ')}, /* French-based creoles and pidgins [collection] -> Creoles */ {"cpi", HB_TAG('C','P','P',' ')}, /* Chinese Pidgin English -> Creoles */ -/*{"cpp", HB_TAG('C','P','P',' ')},*/ /* Portuguese-based creoles and pidgins [family] -> Creoles */ +/*{"cpp", HB_TAG('C','P','P',' ')},*/ /* Portuguese-based creoles and pidgins [collection] -> Creoles */ {"cpx", HB_TAG('Z','H','S',' ')}, /* Pu-Xian Chinese -> Chinese, Simplified */ {"cqd", HB_TAG('H','M','N',' ')}, /* Chuanqiandian Cluster Miao -> Hmong */ {"cqu", HB_TAG('Q','U','H',' ')}, /* Chilean Quechua (retired code) -> Quechua (Bolivia) */ @@ -320,7 +322,7 @@ static const LangTag ot_languages[] = { {"crm", HB_TAG('M','C','R',' ')}, /* Moose Cree */ {"crm", HB_TAG('L','C','R',' ')}, /* Moose Cree -> L-Cree */ {"crm", HB_TAG('C','R','E',' ')}, /* Moose Cree -> Cree */ - {"crp", HB_TAG('C','P','P',' ')}, /* Creoles and pidgins [family] -> Creoles */ + {"crp", HB_TAG('C','P','P',' ')}, /* Creoles and pidgins [collection] -> Creoles */ {"crr", HB_TAG_NONE }, /* Carolina Algonquian != Carrier */ {"crs", HB_TAG('C','P','P',' ')}, /* Seselwa Creole French -> Creoles */ {"crt", HB_TAG_NONE }, /* Iyojwa'ja Chorote != Crimean Tatar */ @@ -431,7 +433,7 @@ static const LangTag ot_languages[] = { {"et", HB_TAG('E','T','I',' ')}, /* Estonian [macrolanguage] */ {"eto", HB_TAG('B','T','I',' ')}, /* Eton (Cameroon) -> Beti */ {"eu", HB_TAG('E','U','Q',' ')}, /* Basque */ - {"euq", HB_TAG_NONE }, /* Basque [family] != Basque */ + {"euq", HB_TAG_NONE }, /* Basque [collection] != Basque */ {"eve", HB_TAG('E','V','N',' ')}, /* Even */ {"evn", HB_TAG('E','V','K',' ')}, /* Evenki */ {"ewo", HB_TAG('B','T','I',' ')}, /* Ewondo -> Beti */ @@ -620,10 +622,11 @@ static const LangTag ot_languages[] = { {"ijc", HB_TAG('I','J','O',' ')}, /* Izon -> Ijo */ {"ije", HB_TAG('I','J','O',' ')}, /* Biseni -> Ijo */ {"ijn", HB_TAG('I','J','O',' ')}, /* Kalabari -> Ijo */ -/*{"ijo", HB_TAG('I','J','O',' ')},*/ /* Ijo [family] */ +/*{"ijo", HB_TAG('I','J','O',' ')},*/ /* Ijo [collection] */ {"ijs", HB_TAG('I','J','O',' ')}, /* Southeast Ijo -> Ijo */ {"ik", HB_TAG('I','P','K',' ')}, /* Inupiaq [macrolanguage] -> Inupiat */ {"ike", HB_TAG('I','N','U',' ')}, /* Eastern Canadian Inuktitut -> Inuktitut */ + {"ike", HB_TAG('I','N','U','K')}, /* Eastern Canadian Inuktitut -> Nunavik Inuktitut */ {"ikt", HB_TAG('I','N','U',' ')}, /* Inuinnaqtun -> Inuktitut */ /*{"ilo", HB_TAG('I','L','O',' ')},*/ /* Iloko -> Ilokano */ {"in", HB_TAG('I','N','D',' ')}, /* Indonesian (retired code) */ @@ -638,6 +641,7 @@ static const LangTag ot_languages[] = { {"it", HB_TAG('I','T','A',' ')}, /* Italian */ {"itz", HB_TAG('M','Y','N',' ')}, /* ItzĂ¡ -> Mayan */ {"iu", HB_TAG('I','N','U',' ')}, /* Inuktitut [macrolanguage] */ + {"iu", HB_TAG('I','N','U','K')}, /* Inuktitut [macrolanguage] -> Nunavik Inuktitut */ {"iw", HB_TAG('I','W','R',' ')}, /* Hebrew (retired code) */ {"ixl", HB_TAG('M','Y','N',' ')}, /* Ixil -> Mayan */ {"ja", HB_TAG('J','A','N',' ')}, /* Japanese */ @@ -667,7 +671,7 @@ static const LangTag ot_languages[] = { {"kab", HB_TAG('B','B','R',' ')}, /* Kabyle -> Berber */ {"kac", HB_TAG_NONE }, /* Kachin != Kachchi */ {"kam", HB_TAG('K','M','B',' ')}, /* Kamba (Kenya) */ - {"kar", HB_TAG('K','R','N',' ')}, /* Karen [family] */ + {"kar", HB_TAG('K','R','N',' ')}, /* Karen [collection] */ /*{"kaw", HB_TAG('K','A','W',' ')},*/ /* Kawi (Old Javanese) */ {"kbd", HB_TAG('K','A','B',' ')}, /* Kabardian */ {"kby", HB_TAG('K','N','R',' ')}, /* Manga Kanuri -> Kanuri */ @@ -876,7 +880,7 @@ static const LangTag ot_languages[] = { {"mam", HB_TAG('M','A','M',' ')}, /* Mam */ {"mam", HB_TAG('M','Y','N',' ')}, /* Mam -> Mayan */ {"man", HB_TAG('M','N','K',' ')}, /* Mandingo [macrolanguage] -> Maninka */ - {"map", HB_TAG_NONE }, /* Austronesian [family] != Mapudungun */ + {"map", HB_TAG_NONE }, /* Austronesian [collection] != Mapudungun */ {"maw", HB_TAG_NONE }, /* Mampruli != Marwari */ {"max", HB_TAG('M','L','Y',' ')}, /* North Moluccan Malay -> Malay */ {"max", HB_TAG('C','P','P',' ')}, /* North Moluccan Malay -> Creoles */ @@ -936,6 +940,7 @@ static const LangTag ot_languages[] = { {"mnw", HB_TAG('M','O','N','T')}, /* Mon -> Thailand Mon */ {"mnx", HB_TAG_NONE }, /* Manikion != Manx */ {"mo", HB_TAG('M','O','L',' ')}, /* Moldavian (retired code) */ + {"mo", HB_TAG('R','O','M',' ')}, /* Moldavian (retired code) -> Romanian */ {"mod", HB_TAG('C','P','P',' ')}, /* Mobilian -> Creoles */ /*{"moh", HB_TAG('M','O','H',' ')},*/ /* Mohawk */ {"mok", HB_TAG_NONE }, /* Morori != Moksha */ @@ -958,7 +963,7 @@ static const LangTag ot_languages[] = { {"mts", HB_TAG_NONE }, /* Yora != Maltese */ {"mud", HB_TAG('C','P','P',' ')}, /* Mednyj Aleut -> Creoles */ {"mui", HB_TAG('M','L','Y',' ')}, /* Musi -> Malay */ - {"mun", HB_TAG_NONE }, /* Munda [family] != Mundari */ + {"mun", HB_TAG_NONE }, /* Munda [collection] != Mundari */ {"mup", HB_TAG('R','A','J',' ')}, /* Malvi -> Rajasthani */ {"muq", HB_TAG('H','M','N',' ')}, /* Eastern Xiangxi Miao -> Hmong */ /*{"mus", HB_TAG('M','U','S',' ')},*/ /* Creek -> Muscogee */ @@ -973,7 +978,7 @@ static const LangTag ot_languages[] = { {"mww", HB_TAG('H','M','N',' ')}, /* Hmong Daw -> Hmong */ {"my", HB_TAG('B','R','M',' ')}, /* Burmese */ {"mym", HB_TAG('M','E','N',' ')}, /* Me’en */ -/*{"myn", HB_TAG('M','Y','N',' ')},*/ /* Mayan [family] */ +/*{"myn", HB_TAG('M','Y','N',' ')},*/ /* Mayan [collection] */ {"myq", HB_TAG('M','N','K',' ')}, /* Forest Maninka (retired code) -> Maninka */ {"myv", HB_TAG('E','R','Z',' ')}, /* Erzya */ {"mzb", HB_TAG('B','B','R',' ')}, /* Tumzabt -> Berber */ @@ -982,7 +987,7 @@ static const LangTag ot_languages[] = { {"na", HB_TAG('N','A','U',' ')}, /* Nauru -> Nauruan */ {"nag", HB_TAG('N','A','G',' ')}, /* Naga Pidgin -> Naga-Assamese */ {"nag", HB_TAG('C','P','P',' ')}, /* Naga Pidgin -> Creoles */ -/*{"nah", HB_TAG('N','A','H',' ')},*/ /* Nahuatl [family] */ +/*{"nah", HB_TAG('N','A','H',' ')},*/ /* Nahuatl [collection] */ {"nan", HB_TAG('Z','H','S',' ')}, /* Min Nan Chinese -> Chinese, Simplified */ /*{"nap", HB_TAG('N','A','P',' ')},*/ /* Neapolitan */ {"nas", HB_TAG_NONE }, /* Naasioi != Naskapi */ @@ -1039,7 +1044,6 @@ static const LangTag ot_languages[] = { {"nln", HB_TAG('N','A','H',' ')}, /* Durango Nahuatl (retired code) -> Nahuatl */ {"nlv", HB_TAG('N','A','H',' ')}, /* Orizaba Nahuatl -> Nahuatl */ {"nn", HB_TAG('N','Y','N',' ')}, /* Norwegian Nynorsk (Nynorsk, Norwegian) */ - {"nn", HB_TAG('N','O','R',' ')}, /* Norwegian Nynorsk -> Norwegian */ {"nnh", HB_TAG('B','M','L',' ')}, /* Ngiemboon -> Bamileke */ {"nnz", HB_TAG('B','M','L',' ')}, /* Nda'nda' -> Bamileke */ {"no", HB_TAG('N','O','R',' ')}, /* Norwegian [macrolanguage] */ @@ -1093,7 +1097,7 @@ static const LangTag ot_languages[] = { {"otw", HB_TAG('O','J','B',' ')}, /* Ottawa -> Ojibway */ {"oua", HB_TAG('B','B','R',' ')}, /* Tagargrent -> Berber */ {"pa", HB_TAG('P','A','N',' ')}, /* Punjabi */ - {"paa", HB_TAG_NONE }, /* Papuan [family] != Palestinian Aramaic */ + {"paa", HB_TAG_NONE }, /* Papuan [collection] != Palestinian Aramaic */ /*{"pag", HB_TAG('P','A','G',' ')},*/ /* Pangasinan */ {"pal", HB_TAG_NONE }, /* Pahlavi != Pali */ /*{"pam", HB_TAG('P','A','M',' ')},*/ /* Pampanga -> Pampangan */ @@ -1308,6 +1312,9 @@ static const LangTag ot_languages[] = { {"sgo", HB_TAG_NONE }, /* Songa (retired code) != Sango */ /*{"sgs", HB_TAG('S','G','S',' ')},*/ /* Samogitian */ {"sgw", HB_TAG('C','H','G',' ')}, /* Sebat Bet Gurage -> Chaha Gurage */ + {"sh", HB_TAG('B','O','S',' ')}, /* Serbo-Croatian [macrolanguage] -> Bosnian */ + {"sh", HB_TAG('H','R','V',' ')}, /* Serbo-Croatian [macrolanguage] -> Croatian */ + {"sh", HB_TAG('S','R','B',' ')}, /* Serbo-Croatian [macrolanguage] -> Serbian */ {"shi", HB_TAG('S','H','I',' ')}, /* Tachelhit */ {"shi", HB_TAG('B','B','R',' ')}, /* Tachelhit -> Berber */ {"shl", HB_TAG('Q','I','N',' ')}, /* Shendu -> Chin */ @@ -1329,7 +1336,7 @@ static const LangTag ot_languages[] = { {"skw", HB_TAG('C','P','P',' ')}, /* Skepi Creole Dutch -> Creoles */ {"sky", HB_TAG_NONE }, /* Sikaiana != Slovak */ {"sl", HB_TAG('S','L','V',' ')}, /* Slovenian */ - {"sla", HB_TAG_NONE }, /* Slavic [family] != Slavey */ + {"sla", HB_TAG_NONE }, /* Slavic [collection] != Slavey */ {"sm", HB_TAG('S','M','O',' ')}, /* Samoan */ {"sma", HB_TAG('S','S','M',' ')}, /* Southern Sami */ {"smj", HB_TAG('L','S','M',' ')}, /* Lule Sami */ @@ -1451,7 +1458,7 @@ static const LangTag ot_languages[] = { {"tpi", HB_TAG('C','P','P',' ')}, /* Tok Pisin -> Creoles */ {"tr", HB_TAG('T','R','K',' ')}, /* Turkish */ {"trf", HB_TAG('C','P','P',' ')}, /* Trinidadian Creole English -> Creoles */ - {"trk", HB_TAG_NONE }, /* Turkic [family] != Turkish */ + {"trk", HB_TAG_NONE }, /* Turkic [collection] != Turkish */ {"tru", HB_TAG('T','U','A',' ')}, /* Turoyo -> Turoyo Aramaic */ {"tru", HB_TAG('S','Y','R',' ')}, /* Turoyo -> Syriac */ {"ts", HB_TAG('T','S','G',' ')}, /* Tsonga */ @@ -1593,7 +1600,7 @@ static const LangTag ot_languages[] = { {"zlq", HB_TAG('Z','H','A',' ')}, /* Liuqian Zhuang -> Zhuang */ {"zmi", HB_TAG('M','L','Y',' ')}, /* Negeri Sembilan Malay -> Malay */ {"zmz", HB_TAG('B','A','D','0')}, /* Mbandja -> Banda */ - {"znd", HB_TAG_NONE }, /* Zande [family] != Zande */ + {"znd", HB_TAG_NONE }, /* Zande [collection] != Zande */ {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */ {"zom", HB_TAG('Q','I','N',' ')}, /* Zou -> Chin */ {"zqe", HB_TAG('Z','H','A',' ')}, /* Qiubei Zhuang -> Zhuang */ @@ -2607,14 +2614,8 @@ hb_ot_tags_from_complex_language (const char *lang_str, if (0 == strcmp (&lang_str[1], "o-nyn")) { /* Norwegian Nynorsk (retired code) */ - unsigned int i; - hb_tag_t possible_tags[] = { - HB_TAG('N','Y','N',' '), /* Norwegian Nynorsk (Nynorsk, Norwegian) */ - HB_TAG('N','O','R',' '), /* Norwegian */ - }; - for (i = 0; i < 2 && i < *count; i++) - tags[i] = possible_tags[i]; - *count = i; + tags[0] = HB_TAG('N','Y','N',' '); /* Norwegian Nynorsk (Nynorsk, Norwegian) */ + *count = 1; return true; } break; @@ -2623,8 +2624,14 @@ hb_ot_tags_from_complex_language (const char *lang_str, && subtag_matches (lang_str, limit, "-md")) { /* Romanian; Moldova */ - tags[0] = HB_TAG('M','O','L',' '); /* Moldavian */ - *count = 1; + unsigned int i; + hb_tag_t possible_tags[] = { + HB_TAG('M','O','L',' '), /* Moldavian */ + HB_TAG('R','O','M',' '), /* Romanian */ + }; + for (i = 0; i < 2 && i < *count; i++) + tags[i] = possible_tags[i]; + *count = i; return true; } break; @@ -2813,15 +2820,15 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) case HB_TAG('A','R','K',' '): /* Rakhine */ return hb_language_from_string ("rki", -1); /* Rakhine */ case HB_TAG('A','T','H',' '): /* Athapaskan */ - return hb_language_from_string ("ath", -1); /* Athapascan [family] */ + return hb_language_from_string ("ath", -1); /* Athapascan [collection] */ case HB_TAG('B','B','R',' '): /* Berber */ - return hb_language_from_string ("ber", -1); /* Berber [family] */ + return hb_language_from_string ("ber", -1); /* Berber [collection] */ case HB_TAG('B','I','K',' '): /* Bikol */ return hb_language_from_string ("bik", -1); /* Bikol [macrolanguage] */ case HB_TAG('B','T','K',' '): /* Batak */ - return hb_language_from_string ("btk", -1); /* Batak [family] */ + return hb_language_from_string ("btk", -1); /* Batak [collection] */ case HB_TAG('C','P','P',' '): /* Creoles */ - return hb_language_from_string ("crp", -1); /* Creoles and pidgins [family] */ + return hb_language_from_string ("crp", -1); /* Creoles and pidgins [collection] */ case HB_TAG('C','R','R',' '): /* Carrier */ return hb_language_from_string ("crx", -1); /* Carrier */ case HB_TAG('D','G','R',' '): /* Dogri (macrolanguage) */ @@ -2838,6 +2845,8 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) return hb_language_from_string ("fa", -1); /* Persian [macrolanguage] */ case HB_TAG('G','O','N',' '): /* Gondi */ return hb_language_from_string ("gon", -1); /* Gondi [macrolanguage] */ + case HB_TAG('H','M','A',' '): /* High Mari */ + return hb_language_from_string ("mrj", -1); /* Western Mari */ case HB_TAG('H','M','N',' '): /* Hmong */ return hb_language_from_string ("hmn", -1); /* Hmong [macrolanguage] */ case HB_TAG('H','N','D',' '): /* Hindko */ @@ -2847,7 +2856,7 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) case HB_TAG('I','B','A',' '): /* Iban */ return hb_language_from_string ("iba", -1); /* Iban */ case HB_TAG('I','J','O',' '): /* Ijo */ - return hb_language_from_string ("ijo", -1); /* Ijo [family] */ + return hb_language_from_string ("ijo", -1); /* Ijo [collection] */ case HB_TAG('I','N','U',' '): /* Inuktitut */ return hb_language_from_string ("iu", -1); /* Inuktitut [macrolanguage] */ case HB_TAG('I','P','K',' '): /* Inupiat */ @@ -2873,11 +2882,13 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) case HB_TAG('K','P','L',' '): /* Kpelle */ return hb_language_from_string ("kpe", -1); /* Kpelle [macrolanguage] */ case HB_TAG('K','R','N',' '): /* Karen */ - return hb_language_from_string ("kar", -1); /* Karen [family] */ + return hb_language_from_string ("kar", -1); /* Karen [collection] */ case HB_TAG('K','U','I',' '): /* Kui */ return hb_language_from_string ("uki", -1); /* Kui (India) */ case HB_TAG('K','U','R',' '): /* Kurdish */ return hb_language_from_string ("ku", -1); /* Kurdish [macrolanguage] */ + case HB_TAG('L','M','A',' '): /* Low Mari */ + return hb_language_from_string ("mhr", -1); /* Eastern Mari */ case HB_TAG('L','U','H',' '): /* Luyia */ return hb_language_from_string ("luy", -1); /* Luyia [macrolanguage] */ case HB_TAG('L','V','I',' '): /* Latvian */ @@ -2897,9 +2908,9 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) case HB_TAG('M','O','N','T'): /* Thailand Mon */ return hb_language_from_string ("mnw-TH", -1); /* Mon; Thailand */ case HB_TAG('M','Y','N',' '): /* Mayan */ - return hb_language_from_string ("myn", -1); /* Mayan [family] */ + return hb_language_from_string ("myn", -1); /* Mayan [collection] */ case HB_TAG('N','A','H',' '): /* Nahuatl */ - return hb_language_from_string ("nah", -1); /* Nahuatl [family] */ + return hb_language_from_string ("nah", -1); /* Nahuatl [collection] */ case HB_TAG('N','E','P',' '): /* Nepali */ return hb_language_from_string ("ne", -1); /* Nepali [macrolanguage] */ case HB_TAG('N','I','S',' '): /* Nisi */ @@ -2926,6 +2937,8 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag) return hb_language_from_string ("qwh", -1); /* Huaylas Ancash Quechua */ case HB_TAG('R','A','J',' '): /* Rajasthani */ return hb_language_from_string ("raj", -1); /* Rajasthani [macrolanguage] */ + case HB_TAG('R','O','M',' '): /* Romanian */ + return hb_language_from_string ("ro", -1); /* Romanian */ case HB_TAG('R','O','Y',' '): /* Romany */ return hb_language_from_string ("rom", -1); /* Romany [macrolanguage] */ case HB_TAG('S','Q','I',' '): /* Albanian */ diff --git a/thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh index 05f289db26..e066558683 100644 --- a/thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh @@ -263,7 +263,7 @@ struct fvar if (coords_length && *coords_length) { hb_array_t<const HBFixed> instanceCoords = instance->get_coordinates (axisCount) - .sub_array (0, *coords_length); + .sub_array (0, coords_length); for (unsigned int i = 0; i < instanceCoords.length; i++) coords[i] = instanceCoords.arrayZ[i].to_float (); } diff --git a/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh index 49b5532d40..539213c339 100644 --- a/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh @@ -399,7 +399,7 @@ struct gvar get_offset (glyphCount) - get_offset (0))); } - /* GlyphVariationData not sanitized here; must be checked while accessing each glyph varation data */ + /* GlyphVariationData not sanitized here; must be checked while accessing each glyph variation data */ bool sanitize (hb_sanitize_context_t *c) const { return sanitize_shallow (c); } @@ -498,9 +498,9 @@ struct gvar public: struct accelerator_t { - void init (hb_face_t *face) + accelerator_t (hb_face_t *face) { table = hb_sanitize_context_t ().reference_table<gvar> (face); } - void fini () { table.destroy (); } + ~accelerator_t () { table.destroy (); } private: struct x_getter { static float get (const contour_point_t &p) { return p.x; } }; @@ -698,7 +698,9 @@ no_more_gaps: DEFINE_SIZE_MIN (20); }; -struct gvar_accelerator_t : gvar::accelerator_t {}; +struct gvar_accelerator_t : gvar::accelerator_t { + gvar_accelerator_t (hb_face_t *face) : gvar::accelerator_t (face) {} +}; } /* namespace OT */ diff --git a/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh index 074b6a3785..e9d90352f0 100644 --- a/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh +++ b/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh @@ -177,9 +177,6 @@ struct hvarvvar_subset_plan_t inner_maps.resize (var_store->get_sub_table_count ()); - for (unsigned int i = 0; i < inner_maps.length; i++) - inner_maps[i].init (); - if (unlikely (!index_map_plans.length || !inner_sets.length || !inner_maps.length)) return; bool retain_adv_map = false; @@ -229,8 +226,8 @@ struct hvarvvar_subset_plan_t for (unsigned int i = 0; i < inner_sets.length; i++) hb_set_destroy (inner_sets[i]); hb_set_destroy (adv_set); - inner_maps.fini_deep (); - index_map_plans.fini_deep (); + inner_maps.fini (); + index_map_plans.fini (); } hb_inc_bimap_t outer_map; diff --git a/thirdparty/harfbuzz/src/hb-ot-var.cc b/thirdparty/harfbuzz/src/hb-ot-var.cc index 6b42b45cd9..0376e26b4a 100644 --- a/thirdparty/harfbuzz/src/hb-ot-var.cc +++ b/thirdparty/harfbuzz/src/hb-ot-var.cc @@ -303,6 +303,9 @@ hb_ot_var_normalize_variations (hb_face_t *face, * values for the axis are mapped to the interval [-1,1], with the default * axis value mapped to 0. * + * The normalized values have 14 bits of fixed-point sub-integer precision as per + * OpenType specification. + * * Any additional scaling defined in the face's `avar` table is also * applied, as described at https://docs.microsoft.com/en-us/typography/opentype/spec/avar * diff --git a/thirdparty/harfbuzz/src/hb-ot-var.h b/thirdparty/harfbuzz/src/hb-ot-var.h index ce201d3b4f..05147cc25e 100644 --- a/thirdparty/harfbuzz/src/hb-ot-var.h +++ b/thirdparty/harfbuzz/src/hb-ot-var.h @@ -109,7 +109,7 @@ typedef enum { /*< flags >*/ * @tag: The #hb_tag_t tag identifying the design variation of the axis * @name_id: The `name` table Name ID that provides display names for the axis * @flags: The #hb_ot_var_axis_flags_t flags for the axis - * @min_value: The mininum value on the variation axis that the font covers + * @min_value: The minimum value on the variation axis that the font covers * @default_value: The position on the variation axis corresponding to the font's defaults * @max_value: The maximum value on the variation axis that the font covers * diff --git a/thirdparty/harfbuzz/src/hb-repacker.hh b/thirdparty/harfbuzz/src/hb-repacker.hh index 5c46b4cccc..b1726d8beb 100644 --- a/thirdparty/harfbuzz/src/hb-repacker.hh +++ b/thirdparty/harfbuzz/src/hb-repacker.hh @@ -42,26 +42,13 @@ struct graph_t { struct vertex_t { - vertex_t () : - distance (0), - space (0), - parents (), - start (0), - end (0), - priority(0) {} - - void fini () { - obj.fini (); - parents.fini (); - } - hb_serialize_context_t::object_t obj; - int64_t distance; - int64_t space; + int64_t distance = 0 ; + int64_t space = 0 ; hb_vector_t<unsigned> parents; - unsigned start; - unsigned end; - unsigned priority; + unsigned start = 0; + unsigned end = 0; + unsigned priority = 0; bool is_shared () const { @@ -186,7 +173,7 @@ struct graph_t ~graph_t () { - vertices_.fini_deep (); + vertices_.fini (); } bool in_error () const @@ -309,7 +296,7 @@ struct graph_t remap_all_obj_indices (id_map, &sorted_graph); hb_swap (vertices_, sorted_graph); - sorted_graph.fini_deep (); + sorted_graph.fini (); } /* @@ -369,7 +356,7 @@ struct graph_t remap_all_obj_indices (id_map, &sorted_graph); hb_swap (vertices_, sorted_graph); - sorted_graph.fini_deep (); + sorted_graph.fini (); } /* @@ -402,11 +389,15 @@ struct graph_t while (roots) { unsigned next = HB_SET_VALUE_INVALID; + if (unlikely (!check_success (!roots.in_error ()))) break; if (!roots.next (&next)) break; hb_set_t connected_roots; find_connected_nodes (next, roots, visited, connected_roots); + if (unlikely (!check_success (!connected_roots.in_error ()))) break; + isolate_subgraph (connected_roots); + if (unlikely (!check_success (!connected_roots.in_error ()))) break; unsigned next_space = this->next_space (); num_roots_for_space_.push (0); @@ -423,6 +414,8 @@ struct graph_t // into the 32 bit space as needed, instead of using isolation. } + + return true; } @@ -865,7 +858,7 @@ struct graph_t // Redundant ones are filtered out later on by the visited set. // According to https://www3.cs.stonybrook.edu/~rezaul/papers/TR-07-54.pdf // for practical performance this is faster then using a more advanced queue - // (such as a fibonaacci queue) with a fast decrease priority. + // (such as a fibonacci queue) with a fast decrease priority. for (unsigned i = 0; i < vertices_.length; i++) { if (i == vertices_.length - 1) @@ -1074,6 +1067,7 @@ struct graph_t hb_set_t& visited, hb_set_t& connected) { + if (unlikely (!check_success (!visited.in_error ()))) return; if (visited.has (start_idx)) return; visited.add (start_idx); diff --git a/thirdparty/harfbuzz/src/hb-serialize.hh b/thirdparty/harfbuzz/src/hb-serialize.hh index 823c0be8b5..6615f033c5 100644 --- a/thirdparty/harfbuzz/src/hb-serialize.hh +++ b/thirdparty/harfbuzz/src/hb-serialize.hh @@ -279,7 +279,7 @@ struct hb_serialize_context_t object_pool.release (obj); } - /* Set share to false when an object is unlikely sharable with others + /* Set share to false when an object is unlikely shareable with others * so not worth an attempt, or a contiguous table is serialized as * multiple consecutive objects in the reverse order so can't be shared. */ @@ -381,7 +381,7 @@ struct hb_serialize_context_t // Adding a virtual link from object a to object b will ensure that object b is always packed after // object a in the final serialized order. // - // This is useful in certain situtations where there needs to be a specific ordering in the + // This is useful in certain situations where there needs to be a specific ordering in the // final serialization. Such as when platform bugs require certain orderings, or to provide // guidance to the repacker for better offset overflow resolution. void add_virtual_link (objidx_t objidx) @@ -510,7 +510,7 @@ struct hb_serialize_context_t { return reinterpret_cast<Type *> (this->head); } template <typename Type> Type *start_embed (const Type &obj) const - { return start_embed (hb_addressof (obj)); } + { return start_embed (std::addressof (obj)); } bool err (hb_serialize_error_t err_type) { @@ -548,7 +548,7 @@ struct hb_serialize_context_t } template <typename Type> Type *embed (const Type &obj) - { return embed (hb_addressof (obj)); } + { return embed (std::addressof (obj)); } template <typename Type, typename ...Ts> auto _copy (const Type &src, hb_priority<1>, Ts&&... ds) HB_RETURN @@ -595,19 +595,19 @@ struct hb_serialize_context_t } template <typename Type> Type *extend_size (Type &obj, size_t size) - { return extend_size (hb_addressof (obj), size); } + { return extend_size (std::addressof (obj), size); } template <typename Type> Type *extend_min (Type *obj) { return extend_size (obj, obj->min_size); } template <typename Type> - Type *extend_min (Type &obj) { return extend_min (hb_addressof (obj)); } + Type *extend_min (Type &obj) { return extend_min (std::addressof (obj)); } template <typename Type, typename ...Ts> Type *extend (Type *obj, Ts&&... ds) { return extend_size (obj, obj->get_size (std::forward<Ts> (ds)...)); } template <typename Type, typename ...Ts> Type *extend (Type &obj, Ts&&... ds) - { return extend (hb_addressof (obj), std::forward<Ts> (ds)...); } + { return extend (std::addressof (obj), std::forward<Ts> (ds)...); } /* Output routines. */ hb_bytes_t copy_bytes () const diff --git a/thirdparty/harfbuzz/src/hb-style.cc b/thirdparty/harfbuzz/src/hb-style.cc index f1b44cea53..c0c5c4832c 100644 --- a/thirdparty/harfbuzz/src/hb-style.cc +++ b/thirdparty/harfbuzz/src/hb-style.cc @@ -48,13 +48,12 @@ _hb_angle_to_ratio (float a) { return tanf (a * float (M_PI / 180.)); } -#if 0 + static inline float _hb_ratio_to_angle (float r) { return atanf (r) * float (180. / M_PI); } -#endif /** * hb_style_get_value: @@ -73,7 +72,8 @@ float hb_style_get_value (hb_font_t *font, hb_style_tag_t style_tag) { if (unlikely (style_tag == HB_STYLE_TAG_SLANT_RATIO)) - return _hb_angle_to_ratio (hb_style_get_value (font, HB_STYLE_TAG_SLANT_ANGLE)); + return _hb_angle_to_ratio (hb_style_get_value (font, HB_STYLE_TAG_SLANT_ANGLE)) + + font->slant; hb_face_t *face = font->face; @@ -109,7 +109,14 @@ hb_style_get_value (hb_font_t *font, hb_style_tag_t style_tag) : 12.f; } case HB_STYLE_TAG_SLANT_ANGLE: - return face->table.post->table->italicAngle.to_float (); + { + float angle = face->table.post->table->italicAngle.to_float (); + + if (font->slant) + angle = _hb_ratio_to_angle (font->slant + _hb_angle_to_ratio (angle)); + + return angle; + } case HB_STYLE_TAG_WIDTH: return face->table.OS2->has_data () ? face->table.OS2->get_width () diff --git a/thirdparty/harfbuzz/src/hb-subset-cff-common.hh b/thirdparty/harfbuzz/src/hb-subset-cff-common.hh index 7fd96ca86d..18657705fa 100644 --- a/thirdparty/harfbuzz/src/hb-subset-cff-common.hh +++ b/thirdparty/harfbuzz/src/hb-subset-cff-common.hh @@ -275,60 +275,36 @@ struct subr_flattener_t struct subr_closures_t { - subr_closures_t () : valid (false), global_closure (nullptr) - { local_closures.init (); } - - void init (unsigned int fd_count) + subr_closures_t (unsigned int fd_count) : valid (false), global_closure (), local_closures () { valid = true; - global_closure = hb_set_create (); - if (global_closure == hb_set_get_empty ()) - valid = false; if (!local_closures.resize (fd_count)) valid = false; - - for (unsigned int i = 0; i < local_closures.length; i++) - { - local_closures[i] = hb_set_create (); - if (local_closures[i] == hb_set_get_empty ()) - valid = false; - } - } - - void fini () - { - hb_set_destroy (global_closure); - for (unsigned int i = 0; i < local_closures.length; i++) - hb_set_destroy (local_closures[i]); - local_closures.fini (); } void reset () { - hb_set_clear (global_closure); + global_closure.clear(); for (unsigned int i = 0; i < local_closures.length; i++) - hb_set_clear (local_closures[i]); + local_closures[i].clear(); } bool is_valid () const { return valid; } bool valid; - hb_set_t *global_closure; - hb_vector_t<hb_set_t *> local_closures; + hb_set_t global_closure; + hb_vector_t<hb_set_t> local_closures; }; struct parsed_cs_op_t : op_str_t { void init (unsigned int subr_num_ = 0) { - op_str_t::init (); subr_num = subr_num_; drop_flag = false; keep_flag = false; skip_flag = false; } - void fini () { op_str_t::fini (); } - bool for_drop () const { return drop_flag; } void set_drop () { if (!for_keep ()) drop_flag = true; } @@ -416,16 +392,6 @@ struct parsed_cs_str_t : parsed_values_t<parsed_cs_op_t> struct parsed_cs_str_vec_t : hb_vector_t<parsed_cs_str_t> { - void init (unsigned int len_ = 0) - { - SUPER::init (); - if (unlikely (!resize (len_))) - return; - for (unsigned int i = 0; i < length; i++) - (*this)[i].init (); - } - void fini () { SUPER::fini_deep (); } - private: typedef hb_vector_t<parsed_cs_str_t> SUPER; }; @@ -496,7 +462,7 @@ struct subr_subset_param_t struct subr_remap_t : hb_inc_bimap_t { - void create (hb_set_t *closure) + void create (const hb_set_t *closure) { /* create a remapping of subroutine numbers from old to new. * no optimization based on usage counts. fonttools doesn't appear doing that either. @@ -526,19 +492,9 @@ struct subr_remap_t : hb_inc_bimap_t struct subr_remaps_t { - subr_remaps_t () + subr_remaps_t (unsigned int fdCount) { - global_remap.init (); - local_remaps.init (); - } - - ~subr_remaps_t () { fini (); } - - void init (unsigned int fdCount) - { - if (unlikely (!local_remaps.resize (fdCount))) return; - for (unsigned int i = 0; i < fdCount; i++) - local_remaps[i].init (); + local_remaps.resize (fdCount); } bool in_error() @@ -548,15 +504,9 @@ struct subr_remaps_t void create (subr_closures_t& closures) { - global_remap.create (closures.global_closure); + global_remap.create (&closures.global_closure); for (unsigned int i = 0; i < local_remaps.length; i++) - local_remaps[i].create (closures.local_closures[i]); - } - - void fini () - { - global_remap.fini (); - local_remaps.fini_deep (); + local_remaps[i].create (&closures.local_closures[i]); } subr_remap_t global_remap; @@ -567,21 +517,8 @@ template <typename SUBSETTER, typename SUBRS, typename ACC, typename ENV, typena struct subr_subsetter_t { subr_subsetter_t (ACC &acc_, const hb_subset_plan_t *plan_) - : acc (acc_), plan (plan_) - { - parsed_charstrings.init (); - parsed_global_subrs.init (); - parsed_local_subrs.init (); - } - - ~subr_subsetter_t () - { - closures.fini (); - remaps.fini (); - parsed_charstrings.fini_deep (); - parsed_global_subrs.fini_deep (); - parsed_local_subrs.fini_deep (); - } + : acc (acc_), plan (plan_), closures(acc_.fdCount), remaps(acc_.fdCount) + {} /* Subroutine subsetting with --no-desubroutinize runs in phases: * @@ -599,11 +536,8 @@ struct subr_subsetter_t */ bool subset (void) { - closures.init (acc.fdCount); - remaps.init (acc.fdCount); - - parsed_charstrings.init (plan->num_output_glyphs ()); - parsed_global_subrs.init (acc.globalSubrs->count); + parsed_charstrings.resize (plan->num_output_glyphs ()); + parsed_global_subrs.resize (acc.globalSubrs->count); if (unlikely (remaps.in_error() || parsed_charstrings.in_error () @@ -615,7 +549,7 @@ struct subr_subsetter_t for (unsigned int i = 0; i < acc.fdCount; i++) { - parsed_local_subrs[i].init (acc.privateDicts[i].localSubrs->count); + parsed_local_subrs[i].resize (acc.privateDicts[i].localSubrs->count); if (unlikely (parsed_local_subrs[i].in_error ())) return false; } if (unlikely (!closures.valid)) @@ -638,7 +572,7 @@ struct subr_subsetter_t subr_subset_param_t param; param.init (&parsed_charstrings[i], &parsed_global_subrs, &parsed_local_subrs[fd], - closures.global_closure, closures.local_closures[fd], + &closures.global_closure, &closures.local_closures[fd], plan->flags & HB_SUBSET_FLAGS_NO_HINTING); if (unlikely (!interp.interpret (param))) @@ -662,7 +596,7 @@ struct subr_subsetter_t subr_subset_param_t param; param.init (&parsed_charstrings[i], &parsed_global_subrs, &parsed_local_subrs[fd], - closures.global_closure, closures.local_closures[fd], + &closures.global_closure, &closures.local_closures[fd], plan->flags & HB_SUBSET_FLAGS_NO_HINTING); drop_hints_param_t drop; @@ -687,7 +621,7 @@ struct subr_subsetter_t subr_subset_param_t param; param.init (&parsed_charstrings[i], &parsed_global_subrs, &parsed_local_subrs[fd], - closures.global_closure, closures.local_closures[fd], + &closures.global_closure, &closures.local_closures[fd], plan->flags & HB_SUBSET_FLAGS_NO_HINTING); collect_subr_refs_in_str (parsed_charstrings[i], param); } diff --git a/thirdparty/harfbuzz/src/hb-subset-cff1.cc b/thirdparty/harfbuzz/src/hb-subset-cff1.cc index b4e24122c9..35fecd67bc 100644 --- a/thirdparty/harfbuzz/src/hb-subset-cff1.cc +++ b/thirdparty/harfbuzz/src/hb-subset-cff1.cc @@ -362,43 +362,11 @@ struct cff1_subr_subsetter_t : subr_subsetter_t<cff1_subr_subsetter_t, CFF1Subrs struct cff_subset_plan { cff_subset_plan () - : info (), - orig_fdcount (0), - subset_fdcount (1), - subset_fdselect_format (0), - drop_hints (false), - desubroutinize(false) { - topdict_mod.init (); - subset_fdselect_ranges.init (); - fdmap.init (); - subset_charstrings.init (); - subset_globalsubrs.init (); - subset_localsubrs.init (); - fontdicts_mod.init (); - subset_enc_code_ranges.init (); - subset_enc_supp_codes.init (); - subset_charset_ranges.init (); - sidmap.init (); for (unsigned int i = 0; i < name_dict_values_t::ValCount; i++) topDictModSIDs[i] = CFF_UNDEF_SID; } - ~cff_subset_plan () - { - topdict_mod.fini (); - subset_fdselect_ranges.fini (); - fdmap.fini (); - subset_charstrings.fini_deep (); - subset_globalsubrs.fini_deep (); - subset_localsubrs.fini_deep (); - fontdicts_mod.fini (); - subset_enc_code_ranges.fini (); - subset_enc_supp_codes.fini (); - subset_charset_ranges.fini (); - sidmap.fini (); - } - void plan_subset_encoding (const OT::cff1::accelerator_subset_t &acc, hb_subset_plan_t *plan) { const Encoding *encoding = acc.encoding; @@ -672,9 +640,9 @@ struct cff_subset_plan { cff1_sub_table_info_t info; unsigned int num_glyphs; - unsigned int orig_fdcount; - unsigned int subset_fdcount; - unsigned int subset_fdselect_format; + unsigned int orig_fdcount = 0; + unsigned int subset_fdcount = 1; + unsigned int subset_fdselect_format = 0; hb_vector_t<code_pair_t> subset_fdselect_ranges; /* font dict index remap table from fullset FDArray to subset FDArray. @@ -686,7 +654,7 @@ struct cff_subset_plan { hb_vector_t<str_buff_vec_t> subset_localsubrs; hb_vector_t<cff1_font_dict_values_mod_t> fontdicts_mod; - bool drop_hints; + bool drop_hints = false; bool gid_renum; bool subset_encoding; @@ -702,7 +670,7 @@ struct cff_subset_plan { remap_sid_t sidmap; unsigned int topDictModSIDs[name_dict_values_t::ValCount]; - bool desubroutinize; + bool desubroutinize = false; }; static bool _serialize_cff1 (hb_serialize_context_t *c, diff --git a/thirdparty/harfbuzz/src/hb-subset-cff2.cc b/thirdparty/harfbuzz/src/hb-subset-cff2.cc index 896ae64016..92dd6b1d2c 100644 --- a/thirdparty/harfbuzz/src/hb-subset-cff2.cc +++ b/thirdparty/harfbuzz/src/hb-subset-cff2.cc @@ -233,29 +233,6 @@ struct cff2_subr_subsetter_t : subr_subsetter_t<cff2_subr_subsetter_t, CFF2Subrs }; struct cff2_subset_plan { - cff2_subset_plan () - : orig_fdcount (0), - subset_fdcount(1), - subset_fdselect_size (0), - subset_fdselect_format (0), - drop_hints (false), - desubroutinize (false) - { - subset_fdselect_ranges.init (); - fdmap.init (); - subset_charstrings.init (); - subset_globalsubrs.init (); - subset_localsubrs.init (); - } - - ~cff2_subset_plan () - { - subset_fdselect_ranges.fini (); - fdmap.fini (); - subset_charstrings.fini_deep (); - subset_globalsubrs.fini_deep (); - subset_localsubrs.fini_deep (); - } bool create (const OT::cff2::accelerator_subset_t &acc, hb_subset_plan_t *plan) @@ -320,10 +297,10 @@ struct cff2_subset_plan { cff2_sub_table_info_t info; - unsigned int orig_fdcount; - unsigned int subset_fdcount; - unsigned int subset_fdselect_size; - unsigned int subset_fdselect_format; + unsigned int orig_fdcount = 0; + unsigned int subset_fdcount = 1; + unsigned int subset_fdselect_size = 0; + unsigned int subset_fdselect_format = 0; hb_vector_t<code_pair_t> subset_fdselect_ranges; hb_inc_bimap_t fdmap; @@ -332,8 +309,8 @@ struct cff2_subset_plan { str_buff_vec_t subset_globalsubrs; hb_vector_t<str_buff_vec_t> subset_localsubrs; - bool drop_hints; - bool desubroutinize; + bool drop_hints = false; + bool desubroutinize = false; }; static bool _serialize_cff2 (hb_serialize_context_t *c, @@ -473,12 +450,8 @@ _hb_subset_cff2 (const OT::cff2::accelerator_subset_t &acc, bool hb_subset_cff2 (hb_subset_context_t *c) { - OT::cff2::accelerator_subset_t acc; - acc.init (c->plan->source); - bool result = likely (acc.is_valid ()) && _hb_subset_cff2 (acc, c); - acc.fini (); - - return result; + OT::cff2::accelerator_subset_t acc (c->plan->source); + return acc.is_valid () && _hb_subset_cff2 (acc, c); } #endif diff --git a/thirdparty/harfbuzz/src/hb-subset-plan.cc b/thirdparty/harfbuzz/src/hb-subset-plan.cc index 883ab82093..af4fcb8137 100644 --- a/thirdparty/harfbuzz/src/hb-subset-plan.cc +++ b/thirdparty/harfbuzz/src/hb-subset-plan.cc @@ -228,10 +228,8 @@ _cmap_closure (hb_face_t *face, const hb_set_t *unicodes, hb_set_t *glyphset) { - OT::cmap::accelerator_t cmap; - cmap.init (face); + OT::cmap::accelerator_t cmap (face); cmap.table->closure_glyphs (unicodes, glyphset); - cmap.fini (); } static void _colr_closure (hb_face_t *face, @@ -239,8 +237,7 @@ static void _colr_closure (hb_face_t *face, hb_map_t *palettes_map, hb_set_t *glyphs_colred) { - OT::COLR::accelerator_t colr; - colr.init (face); + OT::COLR::accelerator_t colr (face); if (!colr.is_valid ()) return; unsigned iteration_count = 0; @@ -263,7 +260,6 @@ static void _colr_closure (hb_face_t *face, colr.closure_V0palette_indices (glyphs_colred, &palette_indices); _remap_indexes (&layer_indices, layers_map); _remap_palette_indexes (&palette_indices, palettes_map); - colr.fini (); } static inline void @@ -294,8 +290,7 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, const hb_set_t *glyphs, hb_subset_plan_t *plan) { - OT::cmap::accelerator_t cmap; - cmap.init (plan->source); + OT::cmap::accelerator_t cmap (plan->source); constexpr static const int size_threshold = 4096; @@ -343,8 +338,6 @@ _populate_unicodes_to_retain (const hb_set_t *unicodes, + plan->codepoint_to_glyph->keys () | hb_sink (plan->unicodes); + plan->codepoint_to_glyph->values () | hb_sink (plan->_glyphset_gsub); - - cmap.fini (); } static void @@ -353,13 +346,9 @@ _populate_gids_to_retain (hb_subset_plan_t* plan, bool close_over_gpos, bool close_over_gdef) { - OT::glyf::accelerator_t glyf; -#ifndef HB_NO_SUBSET_CFF - OT::cff1::accelerator_t cff; -#endif - glyf.init (plan->source); + OT::glyf::accelerator_t glyf (plan->source); #ifndef HB_NO_SUBSET_CFF - cff.init (plan->source); + OT::cff1::accelerator_t cff (plan->source); #endif plan->_glyphset_gsub->add (0); // Not-def @@ -419,11 +408,6 @@ _populate_gids_to_retain (hb_subset_plan_t* plan, plan->layout_variation_indices, plan->layout_variation_idx_map); #endif - -#ifndef HB_NO_SUBSET_CFF - cff.fini (); -#endif - glyf.fini (); } static void diff --git a/thirdparty/harfbuzz/src/hb-uniscribe.cc b/thirdparty/harfbuzz/src/hb-uniscribe.cc index 0e5a114f7d..50f71ce9ce 100644 --- a/thirdparty/harfbuzz/src/hb-uniscribe.cc +++ b/thirdparty/harfbuzz/src/hb-uniscribe.cc @@ -878,7 +878,8 @@ retry: if (backward) hb_buffer_reverse (buffer); - buffer->clear_glyph_flags (HB_GLYPH_FLAG_UNSAFE_TO_BREAK); + buffer->clear_glyph_flags (); + buffer->unsafe_to_break (); /* Wow, done! */ return true; diff --git a/thirdparty/harfbuzz/src/hb-vector.hh b/thirdparty/harfbuzz/src/hb-vector.hh index b0a1e5e966..6c7d32e49d 100644 --- a/thirdparty/harfbuzz/src/hb-vector.hh +++ b/thirdparty/harfbuzz/src/hb-vector.hh @@ -32,11 +32,14 @@ #include "hb-null.hh" -template <typename Type> -struct hb_vector_t +template <typename Type, + bool sorted=false> +struct hb_vector_t : std::conditional<sorted, hb_vector_t<Type, false>, hb_empty_t>::type { typedef Type item_t; static constexpr unsigned item_size = hb_static_size (Type); + using array_t = typename std::conditional<sorted, hb_sorted_array_t<Type>, hb_array_t<Type>>::type; + using c_array_t = typename std::conditional<sorted, hb_sorted_array_t<const Type>, hb_array_t<const Type>>::type; hb_vector_t () = default; hb_vector_t (std::initializer_list<Type> lst) : hb_vector_t () @@ -82,16 +85,10 @@ struct hb_vector_t void fini () { + shrink_vector (0); hb_free (arrayZ); init (); } - void fini_deep () - { - unsigned int count = length; - for (unsigned int i = 0; i < count; i++) - arrayZ[i].fini (); - fini (); - } void reset () { @@ -152,24 +149,24 @@ struct hb_vector_t template <typename T> hb_vector_t& operator << (T&& v) { push (std::forward<T> (v)); return *this; } - hb_array_t< Type> as_array () { return hb_array (arrayZ, length); } - hb_array_t<const Type> as_array () const { return hb_array (arrayZ, length); } + array_t as_array () { return hb_array (arrayZ, length); } + c_array_t as_array () const { return hb_array (arrayZ, length); } /* Iterator. */ - typedef hb_array_t<const Type> iter_t; - typedef hb_array_t< Type> writer_t; + typedef c_array_t iter_t; + typedef array_t writer_t; iter_t iter () const { return as_array (); } writer_t writer () { return as_array (); } operator iter_t () const { return iter (); } operator writer_t () { return writer (); } - hb_array_t<const Type> sub_array (unsigned int start_offset, unsigned int count) const + c_array_t sub_array (unsigned int start_offset, unsigned int count) const { return as_array ().sub_array (start_offset, count); } - hb_array_t<const Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) const + c_array_t sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) const { return as_array ().sub_array (start_offset, count); } - hb_array_t<Type> sub_array (unsigned int start_offset, unsigned int count) + array_t sub_array (unsigned int start_offset, unsigned int count) { return as_array ().sub_array (start_offset, count); } - hb_array_t<Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) + array_t sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) { return as_array ().sub_array (start_offset, count); } hb_sorted_array_t<Type> as_sorted_array () @@ -192,6 +189,7 @@ struct hb_vector_t template <typename T> Type *push (T&& v) { + /* TODO Emplace? */ Type *p = push (); if (p == &Crap (Type)) // If push failed to allocate then don't copy v, since this may cause @@ -204,6 +202,92 @@ struct hb_vector_t bool in_error () const { return allocated < 0; } + template <typename T = Type, + hb_enable_if (std::is_trivially_copy_assignable<T>::value)> + Type * + realloc_vector (unsigned new_allocated) + { + return (Type *) hb_realloc (arrayZ, new_allocated * sizeof (Type)); + } + template <typename T = Type, + hb_enable_if (!std::is_trivially_copy_assignable<T>::value)> + Type * + realloc_vector (unsigned new_allocated) + { + Type *new_array = (Type *) hb_malloc (new_allocated * sizeof (Type)); + if (likely (new_array)) + { + for (unsigned i = 0; i < length; i++) + new (std::addressof (new_array[i])) Type (); + for (unsigned i = 0; i < (unsigned) length; i++) + new_array[i] = std::move (arrayZ[i]); + unsigned old_length = length; + shrink_vector (0); + length = old_length; + hb_free (arrayZ); + } + return new_array; + } + + template <typename T = Type, + hb_enable_if (std::is_trivially_constructible<T>::value || + !std::is_default_constructible<T>::value)> + void + grow_vector (unsigned size) + { + memset (arrayZ + length, 0, (size - length) * sizeof (*arrayZ)); + length = size; + } + template <typename T = Type, + hb_enable_if (!std::is_trivially_constructible<T>::value && + std::is_default_constructible<T>::value)> + void + grow_vector (unsigned size) + { + while (length < size) + { + length++; + new (std::addressof (arrayZ[length - 1])) Type (); + } + } + + template <typename T = Type, + hb_enable_if (std::is_trivially_destructible<T>::value)> + void + shrink_vector (unsigned size) + { + length = size; + } + template <typename T = Type, + hb_enable_if (!std::is_trivially_destructible<T>::value)> + void + shrink_vector (unsigned size) + { + while ((unsigned) length > size) + { + arrayZ[(unsigned) length - 1].~Type (); + length--; + } + } + + template <typename T = Type, + hb_enable_if (std::is_trivially_copy_assignable<T>::value)> + void + shift_down_vector (unsigned i) + { + memmove (static_cast<void *> (&arrayZ[i - 1]), + static_cast<void *> (&arrayZ[i]), + (length - i) * sizeof (Type)); + } + template <typename T = Type, + hb_enable_if (!std::is_trivially_copy_assignable<T>::value)> + void + shift_down_vector (unsigned i) + { + for (; i < length; i++) + arrayZ[i - 1] = std::move (arrayZ[i]); + } + /* Allocate for size but don't adjust length. */ bool alloc (unsigned int size) { @@ -225,7 +309,7 @@ struct hb_vector_t (new_allocated < (unsigned) allocated) || hb_unsigned_mul_overflows (new_allocated, sizeof (Type)); if (likely (!overflows)) - new_array = (Type *) hb_realloc (arrayZ, new_allocated * sizeof (Type)); + new_array = realloc_vector (new_allocated); if (unlikely (!new_array)) { @@ -246,7 +330,9 @@ struct hb_vector_t return false; if (size > length) - memset (arrayZ + length, 0, (size - length) * sizeof (*arrayZ)); + grow_vector (size); + else if (size < length) + shrink_vector (size); length = size; return true; @@ -255,48 +341,38 @@ struct hb_vector_t Type pop () { if (!length) return Null (Type); - return std::move (arrayZ[--length]); /* Does this move actually work? */ + Type v = std::move (arrayZ[length - 1]); + arrayZ[length - 1].~Type (); + length--; + return v; } void remove (unsigned int i) { if (unlikely (i >= length)) return; - memmove (static_cast<void *> (&arrayZ[i]), - static_cast<void *> (&arrayZ[i + 1]), - (length - i - 1) * sizeof (Type)); + arrayZ[i].~Type (); + shift_down_vector (i + 1); length--; } void shrink (int size_) { unsigned int size = size_ < 0 ? 0u : (unsigned int) size_; - if (size < length) - length = size; - } + if (size >= length) + return; - template <typename T> - Type *find (T v) - { - for (unsigned int i = 0; i < length; i++) - if (arrayZ[i] == v) - return &arrayZ[i]; - return nullptr; - } - template <typename T> - const Type *find (T v) const - { - for (unsigned int i = 0; i < length; i++) - if (arrayZ[i] == v) - return &arrayZ[i]; - return nullptr; + shrink_vector (size); } + + /* Sorting API. */ void qsort (int (*cmp)(const void*, const void*)) { as_array ().qsort (cmp); } void qsort (unsigned int start = 0, unsigned int end = (unsigned int) -1) { as_array ().qsort (start, end); } + /* Unsorted search API. */ template <typename T> Type *lsearch (const T &x, Type *not_found = nullptr) { return as_array ().lsearch (x, not_found); } @@ -306,47 +382,25 @@ struct hb_vector_t template <typename T> bool lfind (const T &x, unsigned *pos = nullptr) const { return as_array ().lfind (x, pos); } -}; -template <typename Type> -struct hb_sorted_vector_t : hb_vector_t<Type> -{ - hb_sorted_vector_t () = default; - ~hb_sorted_vector_t () = default; - hb_sorted_vector_t (hb_sorted_vector_t& o) = default; - hb_sorted_vector_t (hb_sorted_vector_t &&o) = default; - hb_sorted_vector_t (std::initializer_list<Type> lst) : hb_vector_t<Type> (lst) {} - template <typename Iterable, - hb_requires (hb_is_iterable (Iterable))> - hb_sorted_vector_t (const Iterable &o) : hb_vector_t<Type> (o) {} - hb_sorted_vector_t& operator = (const hb_sorted_vector_t &o) = default; - hb_sorted_vector_t& operator = (hb_sorted_vector_t &&o) = default; - friend void swap (hb_sorted_vector_t& a, hb_sorted_vector_t& b) - { hb_swap ((hb_vector_t<Type>&) (a), (hb_vector_t<Type>&) (b)); } - - hb_sorted_array_t< Type> as_array () { return hb_sorted_array (this->arrayZ, this->length); } - hb_sorted_array_t<const Type> as_array () const { return hb_sorted_array (this->arrayZ, this->length); } - - /* Iterator. */ - typedef hb_sorted_array_t<const Type> const_iter_t; - typedef hb_sorted_array_t< Type> iter_t; - const_iter_t iter () const { return as_array (); } - const_iter_t citer () const { return as_array (); } - iter_t iter () { return as_array (); } - operator iter_t () { return iter (); } - operator const_iter_t () const { return iter (); } - - template <typename T> + /* Sorted search API. */ + template <typename T, + bool Sorted=sorted, hb_enable_if (Sorted)> Type *bsearch (const T &x, Type *not_found = nullptr) { return as_array ().bsearch (x, not_found); } - template <typename T> + template <typename T, + bool Sorted=sorted, hb_enable_if (Sorted)> const Type *bsearch (const T &x, const Type *not_found = nullptr) const { return as_array ().bsearch (x, not_found); } - template <typename T> + template <typename T, + bool Sorted=sorted, hb_enable_if (Sorted)> bool bfind (const T &x, unsigned int *i = nullptr, hb_not_found_t not_found = HB_NOT_FOUND_DONT_STORE, unsigned int to_store = (unsigned int) -1) const { return as_array ().bfind (x, i, not_found, to_store); } }; +template <typename Type> +using hb_sorted_vector_t = hb_vector_t<Type, true>; + #endif /* HB_VECTOR_HH */ diff --git a/thirdparty/harfbuzz/src/hb-version.h b/thirdparty/harfbuzz/src/hb-version.h index 52b124b745..493a09f8cf 100644 --- a/thirdparty/harfbuzz/src/hb-version.h +++ b/thirdparty/harfbuzz/src/hb-version.h @@ -47,20 +47,20 @@ HB_BEGIN_DECLS * * The minor component of the library version available at compile-time. */ -#define HB_VERSION_MINOR 2 +#define HB_VERSION_MINOR 3 /** * HB_VERSION_MICRO: * * The micro component of the library version available at compile-time. */ -#define HB_VERSION_MICRO 0 +#define HB_VERSION_MICRO 2 /** * HB_VERSION_STRING: * * A string literal containing the library version available at compile-time. */ -#define HB_VERSION_STRING "3.2.0" +#define HB_VERSION_STRING "3.3.2" /** * HB_VERSION_ATLEAST: diff --git a/thirdparty/harfbuzz/src/hb.hh b/thirdparty/harfbuzz/src/hb.hh index 1f14267525..b9f5f71415 100644 --- a/thirdparty/harfbuzz/src/hb.hh +++ b/thirdparty/harfbuzz/src/hb.hh @@ -447,6 +447,7 @@ static int HB_UNUSED _hb_errno = 0; #ifndef HB_USE_ATEXIT # define HB_USE_ATEXIT 0 #endif +#ifndef hb_atexit #if !HB_USE_ATEXIT # define hb_atexit(_) HB_STMT_START { if (0) (_) (); } HB_STMT_END #else /* HB_USE_ATEXIT */ @@ -457,6 +458,7 @@ static int HB_UNUSED _hb_errno = 0; # define hb_atexit(f) static hb_atexit_t<f> _hb_atexit_##__LINE__; # endif #endif +#endif /* Lets assert int types. Saves trouble down the road. */ static_assert ((sizeof (hb_codepoint_t) == 4), ""); diff --git a/thirdparty/libwebp/AUTHORS b/thirdparty/libwebp/AUTHORS index 30abde0326..8307c2099d 100644 --- a/thirdparty/libwebp/AUTHORS +++ b/thirdparty/libwebp/AUTHORS @@ -32,6 +32,7 @@ Contributors: - Pascal Massimino (pascal dot massimino at gmail dot com) - PaweÅ‚ Hajdan, Jr (phajdan dot jr at chromium dot org) - Pierre Joye (pierre dot php at gmail dot com) +- Roberto Alanis (alanisbaez at google dot com) - Sam Clegg (sbc at chromium dot org) - Scott Hancher (seh at google dot com) - Scott LaVarnway (slavarnway at google dot com) diff --git a/thirdparty/libwebp/src/dec/vp8_dec.c b/thirdparty/libwebp/src/dec/vp8_dec.c index 5f405e4c2a..2003935ec4 100644 --- a/thirdparty/libwebp/src/dec/vp8_dec.c +++ b/thirdparty/libwebp/src/dec/vp8_dec.c @@ -403,7 +403,7 @@ static const uint8_t kZigzag[16] = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; -// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2 +// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2 static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) { int v; if (!VP8GetBit(br, p[3], "coeffs")) { diff --git a/thirdparty/libwebp/src/dec/vp8i_dec.h b/thirdparty/libwebp/src/dec/vp8i_dec.h index 20526a87c4..9af22f8cc6 100644 --- a/thirdparty/libwebp/src/dec/vp8i_dec.h +++ b/thirdparty/libwebp/src/dec/vp8i_dec.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 1 #define DEC_MIN_VERSION 2 -#define DEC_REV_VERSION 1 +#define DEC_REV_VERSION 2 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // Constraints are: We need to store one 16x16 block of luma samples (y), diff --git a/thirdparty/libwebp/src/dec/vp8l_dec.c b/thirdparty/libwebp/src/dec/vp8l_dec.c index 73c3b54fff..78db014030 100644 --- a/thirdparty/libwebp/src/dec/vp8l_dec.c +++ b/thirdparty/libwebp/src/dec/vp8l_dec.c @@ -84,7 +84,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = { // to 256 (green component values) + 24 (length prefix values) // + color_cache_size (between 0 and 2048). // All values computed for 8-bit first level lookup with Mark Adler's tool: -// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c +// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c #define FIXED_TABLE_SIZE (630 * 3 + 410) static const uint16_t kTableSize[12] = { FIXED_TABLE_SIZE + 654, diff --git a/thirdparty/libwebp/src/demux/anim_decode.c b/thirdparty/libwebp/src/demux/anim_decode.c index 2bf4dcffe0..e077ffb536 100644 --- a/thirdparty/libwebp/src/demux/anim_decode.c +++ b/thirdparty/libwebp/src/demux/anim_decode.c @@ -23,6 +23,14 @@ #define NUM_CHANNELS 4 +// Channel extraction from a uint32_t representation of a uint8_t RGBA/BGRA +// buffer. +#ifdef WORDS_BIGENDIAN +#define CHANNEL_SHIFT(i) (24 - (i) * 8) +#else +#define CHANNEL_SHIFT(i) ((i) * 8) +#endif + typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int); static void BlendPixelRowNonPremult(uint32_t* const src, const uint32_t* const dst, int num_pixels); @@ -209,35 +217,35 @@ static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a, const uint8_t dst_channel = (dst >> shift) & 0xff; const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a; assert(blend_unscaled < (1ULL << 32) / scale); - return (blend_unscaled * scale) >> 24; + return (blend_unscaled * scale) >> CHANNEL_SHIFT(3); } // Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha. static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) { - const uint8_t src_a = (src >> 24) & 0xff; + const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff; if (src_a == 0) { return dst; } else { - const uint8_t dst_a = (dst >> 24) & 0xff; + const uint8_t dst_a = (dst >> CHANNEL_SHIFT(3)) & 0xff; // This is the approximate integer arithmetic for the actual formula: // dst_factor_a = (dst_a * (255 - src_a)) / 255. const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8; const uint8_t blend_a = src_a + dst_factor_a; const uint32_t scale = (1UL << 24) / blend_a; - const uint8_t blend_r = - BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0); - const uint8_t blend_g = - BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8); - const uint8_t blend_b = - BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16); + const uint8_t blend_r = BlendChannelNonPremult( + src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(0)); + const uint8_t blend_g = BlendChannelNonPremult( + src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(1)); + const uint8_t blend_b = BlendChannelNonPremult( + src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(2)); assert(src_a + dst_factor_a < 256); - return (blend_r << 0) | - (blend_g << 8) | - (blend_b << 16) | - ((uint32_t)blend_a << 24); + return ((uint32_t)blend_r << CHANNEL_SHIFT(0)) | + ((uint32_t)blend_g << CHANNEL_SHIFT(1)) | + ((uint32_t)blend_b << CHANNEL_SHIFT(2)) | + ((uint32_t)blend_a << CHANNEL_SHIFT(3)); } } @@ -247,7 +255,7 @@ static void BlendPixelRowNonPremult(uint32_t* const src, const uint32_t* const dst, int num_pixels) { int i; for (i = 0; i < num_pixels; ++i) { - const uint8_t src_alpha = (src[i] >> 24) & 0xff; + const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff; if (src_alpha != 0xff) { src[i] = BlendPixelNonPremult(src[i], dst[i]); } @@ -264,7 +272,7 @@ static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) { // Blend 'src' over 'dst' assuming they are pre-multiplied by alpha. static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) { - const uint8_t src_a = (src >> 24) & 0xff; + const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff; return src + ChannelwiseMultiply(dst, 256 - src_a); } @@ -274,7 +282,7 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst, int num_pixels) { int i; for (i = 0; i < num_pixels; ++i) { - const uint8_t src_alpha = (src[i] >> 24) & 0xff; + const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff; if (src_alpha != 0xff) { src[i] = BlendPixelPremult(src[i], dst[i]); } diff --git a/thirdparty/libwebp/src/demux/demux.c b/thirdparty/libwebp/src/demux/demux.c index 547a7725de..f04a2b8450 100644 --- a/thirdparty/libwebp/src/demux/demux.c +++ b/thirdparty/libwebp/src/demux/demux.c @@ -25,7 +25,7 @@ #define DMUX_MAJ_VERSION 1 #define DMUX_MIN_VERSION 2 -#define DMUX_REV_VERSION 1 +#define DMUX_REV_VERSION 2 typedef struct { size_t start_; // start location of the data diff --git a/thirdparty/libwebp/src/dsp/dsp.h b/thirdparty/libwebp/src/dsp/dsp.h index 513e159bb3..c4f57e4d5b 100644 --- a/thirdparty/libwebp/src/dsp/dsp.h +++ b/thirdparty/libwebp/src/dsp/dsp.h @@ -119,7 +119,12 @@ extern "C" { #define WEBP_USE_NEON #endif -#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM) +// Note: ARM64 is supported in Visual Studio 2017, but requires the direct +// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in +// arm_neon.h. +#if defined(_MSC_VER) && \ + ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ + (_MSC_VER >= 1920 && defined(_M_ARM64))) #define WEBP_USE_NEON #define WEBP_USE_INTRINSICS #endif diff --git a/thirdparty/libwebp/src/dsp/enc_neon.c b/thirdparty/libwebp/src/dsp/enc_neon.c index 43bf1245c5..601962ba76 100644 --- a/thirdparty/libwebp/src/dsp/enc_neon.c +++ b/thirdparty/libwebp/src/dsp/enc_neon.c @@ -9,7 +9,7 @@ // // ARM NEON version of speed-critical encoding functions. // -// adapted from libvpx (http://www.webmproject.org/code/) +// adapted from libvpx (https://www.webmproject.org/code/) #include "src/dsp/dsp.h" diff --git a/thirdparty/libwebp/src/dsp/lossless.c b/thirdparty/libwebp/src/dsp/lossless.c index d8bbb02b35..84a54296fd 100644 --- a/thirdparty/libwebp/src/dsp/lossless.c +++ b/thirdparty/libwebp/src/dsp/lossless.c @@ -107,63 +107,77 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { //------------------------------------------------------------------------------ // Predictors -uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor0_C(const uint32_t* const left, + const uint32_t* const top) { (void)top; (void)left; return ARGB_BLACK; } -uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor1_C(const uint32_t* const left, + const uint32_t* const top) { (void)top; - return left; + return *left; } -uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor2_C(const uint32_t* const left, + const uint32_t* const top) { (void)left; return top[0]; } -uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor3_C(const uint32_t* const left, + const uint32_t* const top) { (void)left; return top[1]; } -uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor4_C(const uint32_t* const left, + const uint32_t* const top) { (void)left; return top[-1]; } -uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average3(left, top[0], top[1]); +uint32_t VP8LPredictor5_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average3(*left, top[0], top[1]); return pred; } -uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[-1]); +uint32_t VP8LPredictor6_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2(*left, top[-1]); return pred; } -uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2(left, top[0]); +uint32_t VP8LPredictor7_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2(*left, top[0]); return pred; } -uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor8_C(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2(top[-1], top[0]); (void)left; return pred; } -uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) { +uint32_t VP8LPredictor9_C(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2(top[0], top[1]); (void)left; return pred; } -uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average4(left, top[-1], top[0], top[1]); +uint32_t VP8LPredictor10_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average4(*left, top[-1], top[0], top[1]); return pred; } -uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Select(top[0], left, top[-1]); +uint32_t VP8LPredictor11_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Select(top[0], *left, top[-1]); return pred; } -uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); +uint32_t VP8LPredictor12_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]); return pred; } -uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); +uint32_t VP8LPredictor13_C(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]); return pred; } diff --git a/thirdparty/libwebp/src/dsp/lossless.h b/thirdparty/libwebp/src/dsp/lossless.h index ebd316d1ed..c26c6bca07 100644 --- a/thirdparty/libwebp/src/dsp/lossless.h +++ b/thirdparty/libwebp/src/dsp/lossless.h @@ -28,23 +28,38 @@ extern "C" { //------------------------------------------------------------------------------ // Decoding -typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top); +typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left, + const uint32_t* const top); extern VP8LPredictorFunc VP8LPredictors[16]; -uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top); -uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top); +uint32_t VP8LPredictor0_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor1_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor2_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor3_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor4_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor5_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor6_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor7_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor8_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor9_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor10_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor11_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor12_C(const uint32_t* const left, + const uint32_t* const top); +uint32_t VP8LPredictor13_C(const uint32_t* const left, + const uint32_t* const top); // These Add/Sub function expects upper[-1] and out[-1] to be readable. typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in, diff --git a/thirdparty/libwebp/src/dsp/lossless_common.h b/thirdparty/libwebp/src/dsp/lossless_common.h index 96a106f9ee..6a2f736b5e 100644 --- a/thirdparty/libwebp/src/dsp/lossless_common.h +++ b/thirdparty/libwebp/src/dsp/lossless_common.h @@ -179,7 +179,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ int x; \ assert(upper != NULL); \ for (x = 0; x < num_pixels; ++x) { \ - const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \ + const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \ out[x] = VP8LAddPixels(in[x], pred); \ } \ } diff --git a/thirdparty/libwebp/src/dsp/lossless_enc.c b/thirdparty/libwebp/src/dsp/lossless_enc.c index c3e8537ade..1580631e38 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc.c @@ -745,7 +745,7 @@ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \ assert(upper != NULL); \ for (x = 0; x < num_pixels; ++x) { \ const uint32_t pred = \ - VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x); \ + VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x); \ out[x] = VP8LSubPixels(in[x], pred); \ } \ } diff --git a/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c b/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c index 9888854d57..bfe5ea6b38 100644 --- a/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c +++ b/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c @@ -188,46 +188,51 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, return Average2(Average2(a0, a1), Average2(a2, a3)); } -static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) { - return Average3(left, top[0], top[1]); +static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { + return Average3(*left, top[0], top[1]); } -static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) { - return Average2(left, top[-1]); +static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { + return Average2(*left, top[-1]); } -static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) { - return Average2(left, top[0]); +static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { + return Average2(*left, top[0]); } -static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { (void)left; return Average2(top[-1], top[0]); } -static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left, + const uint32_t* const top) { (void)left; return Average2(top[0], top[1]); } -static uint32_t Predictor10_MIPSdspR2(uint32_t left, +static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return Average4(left, top[-1], top[0], top[1]); + return Average4(*left, top[-1], top[0], top[1]); } -static uint32_t Predictor11_MIPSdspR2(uint32_t left, +static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return Select(top[0], left, top[-1]); + return Select(top[0], *left, top[-1]); } -static uint32_t Predictor12_MIPSdspR2(uint32_t left, +static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return ClampedAddSubtractFull(left, top[0], top[-1]); + return ClampedAddSubtractFull(*left, top[0], top[-1]); } -static uint32_t Predictor13_MIPSdspR2(uint32_t left, +static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left, const uint32_t* const top) { - return ClampedAddSubtractHalf(left, top[0], top[-1]); + return ClampedAddSubtractHalf(*left, top[0], top[-1]); } // Add green to blue and red channels (i.e. perform the inverse transform of diff --git a/thirdparty/libwebp/src/dsp/lossless_neon.c b/thirdparty/libwebp/src/dsp/lossless_neon.c index 76a1b6f873..89e3e013a0 100644 --- a/thirdparty/libwebp/src/dsp/lossless_neon.c +++ b/thirdparty/libwebp/src/dsp/lossless_neon.c @@ -188,17 +188,21 @@ static WEBP_INLINE uint32_t Average3_NEON(uint32_t a0, uint32_t a1, return avg; } -static uint32_t Predictor5_NEON(uint32_t left, const uint32_t* const top) { - return Average3_NEON(left, top[0], top[1]); +static uint32_t Predictor5_NEON(const uint32_t* const left, + const uint32_t* const top) { + return Average3_NEON(*left, top[0], top[1]); } -static uint32_t Predictor6_NEON(uint32_t left, const uint32_t* const top) { - return Average2_NEON(left, top[-1]); +static uint32_t Predictor6_NEON(const uint32_t* const left, + const uint32_t* const top) { + return Average2_NEON(*left, top[-1]); } -static uint32_t Predictor7_NEON(uint32_t left, const uint32_t* const top) { - return Average2_NEON(left, top[0]); +static uint32_t Predictor7_NEON(const uint32_t* const left, + const uint32_t* const top) { + return Average2_NEON(*left, top[0]); } -static uint32_t Predictor13_NEON(uint32_t left, const uint32_t* const top) { - return ClampedAddSubtractHalf_NEON(left, top[0], top[-1]); +static uint32_t Predictor13_NEON(const uint32_t* const left, + const uint32_t* const top) { + return ClampedAddSubtractHalf_NEON(*left, top[0], top[-1]); } // Batch versions of those functions. diff --git a/thirdparty/libwebp/src/dsp/lossless_sse2.c b/thirdparty/libwebp/src/dsp/lossless_sse2.c index 3a0eb440db..396cb0bdfc 100644 --- a/thirdparty/libwebp/src/dsp/lossless_sse2.c +++ b/thirdparty/libwebp/src/dsp/lossless_sse2.c @@ -138,42 +138,51 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1, return output; } -static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average3_SSE2(left, top[0], top[1]); +static uint32_t Predictor5_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average3_SSE2(*left, top[0], top[1]); return pred; } -static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2_SSE2(left, top[-1]); +static uint32_t Predictor6_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2_SSE2(*left, top[-1]); return pred; } -static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average2_SSE2(left, top[0]); +static uint32_t Predictor7_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average2_SSE2(*left, top[0]); return pred; } -static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor8_SSE2(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2_SSE2(top[-1], top[0]); (void)left; return pred; } -static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) { +static uint32_t Predictor9_SSE2(const uint32_t* const left, + const uint32_t* const top) { const uint32_t pred = Average2_SSE2(top[0], top[1]); (void)left; return pred; } -static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]); +static uint32_t Predictor10_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Average4_SSE2(*left, top[-1], top[0], top[1]); return pred; } -static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = Select_SSE2(top[0], left, top[-1]); +static uint32_t Predictor11_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = Select_SSE2(top[0], *left, top[-1]); return pred; } -static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]); +static uint32_t Predictor12_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractFull_SSE2(*left, top[0], top[-1]); return pred; } -static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) { - const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]); +static uint32_t Predictor13_SSE2(const uint32_t* const left, + const uint32_t* const top) { + const uint32_t pred = ClampedAddSubtractHalf_SSE2(*left, top[0], top[-1]); return pred; } diff --git a/thirdparty/libwebp/src/dsp/msa_macro.h b/thirdparty/libwebp/src/dsp/msa_macro.h index de026a1d9e..51f6c643ab 100644 --- a/thirdparty/libwebp/src/dsp/msa_macro.h +++ b/thirdparty/libwebp/src/dsp/msa_macro.h @@ -14,6 +14,10 @@ #ifndef WEBP_DSP_MSA_MACRO_H_ #define WEBP_DSP_MSA_MACRO_H_ +#include "src/dsp/dsp.h" + +#if defined(WEBP_USE_MSA) + #include <stdint.h> #include <msa.h> @@ -1389,4 +1393,5 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) { } while (0) #define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__) +#endif // WEBP_USE_MSA #endif // WEBP_DSP_MSA_MACRO_H_ diff --git a/thirdparty/libwebp/src/dsp/neon.h b/thirdparty/libwebp/src/dsp/neon.h index aa1dea1301..c591f9b9a7 100644 --- a/thirdparty/libwebp/src/dsp/neon.h +++ b/thirdparty/libwebp/src/dsp/neon.h @@ -12,10 +12,12 @@ #ifndef WEBP_DSP_NEON_H_ #define WEBP_DSP_NEON_H_ -#include <arm_neon.h> - #include "src/dsp/dsp.h" +#if defined(WEBP_USE_NEON) + +#include <arm_neon.h> + // Right now, some intrinsics functions seem slower, so we disable them // everywhere except newer clang/gcc or aarch64 where the inline assembly is // incompatible. @@ -98,4 +100,5 @@ static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) { } while (0) #endif +#endif // WEBP_USE_NEON #endif // WEBP_DSP_NEON_H_ diff --git a/thirdparty/libwebp/src/dsp/yuv.h b/thirdparty/libwebp/src/dsp/yuv.h index c12be1d094..66a397d117 100644 --- a/thirdparty/libwebp/src/dsp/yuv.h +++ b/thirdparty/libwebp/src/dsp/yuv.h @@ -10,7 +10,7 @@ // inline YUV<->RGB conversion function // // The exact naming is Y'CbCr, following the ITU-R BT.601 standard. -// More information at: http://en.wikipedia.org/wiki/YCbCr +// More information at: https://en.wikipedia.org/wiki/YCbCr // Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16 // U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128 // V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128 diff --git a/thirdparty/libwebp/src/enc/frame_enc.c b/thirdparty/libwebp/src/enc/frame_enc.c index af538d83ba..b93d9e5b99 100644 --- a/thirdparty/libwebp/src/enc/frame_enc.c +++ b/thirdparty/libwebp/src/enc/frame_enc.c @@ -778,6 +778,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { // Roughly refresh the proba eight times per pass int max_count = (enc->mb_w_ * enc->mb_h_) >> 3; int num_pass_left = enc->config_->pass; + int remaining_progress = 40; // percents const int do_search = enc->do_search_; VP8EncIterator it; VP8EncProba* const proba = &enc->proba_; @@ -805,6 +806,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { uint64_t size_p0 = 0; uint64_t distortion = 0; int cnt = max_count; + // The final number of passes is not trivial to know in advance. + const int pass_progress = remaining_progress / (2 + num_pass_left); + remaining_progress -= pass_progress; VP8IteratorInit(enc, &it); SetLoopParams(enc, stats.q); if (is_last_pass) { @@ -832,7 +836,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { StoreSideInfo(&it); VP8StoreFilterStats(&it); VP8IteratorExport(&it); - ok = VP8IteratorProgress(&it, 20); + ok = VP8IteratorProgress(&it, pass_progress); } VP8IteratorSaveBoundary(&it); } while (ok && VP8IteratorNext(&it)); @@ -878,7 +882,8 @@ int VP8EncTokenLoop(VP8Encoder* const enc) { ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0, (const uint8_t*)proba->coeffs_, 1); } - ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_); + ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress, + &enc->percent_); return PostLoopFinalize(&it, ok); } diff --git a/thirdparty/libwebp/src/enc/predictor_enc.c b/thirdparty/libwebp/src/enc/predictor_enc.c index 2e6762ea0d..2b5c767280 100644 --- a/thirdparty/libwebp/src/enc/predictor_enc.c +++ b/thirdparty/libwebp/src/enc/predictor_enc.c @@ -249,7 +249,7 @@ static WEBP_INLINE void GetResidual( } else if (x == 0) { predict = upper_row[x]; // Top. } else { - predict = pred_func(current_row[x - 1], upper_row + x); + predict = pred_func(¤t_row[x - 1], upper_row + x); } #if (WEBP_NEAR_LOSSLESS == 1) if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 || diff --git a/thirdparty/libwebp/src/enc/quant_enc.c b/thirdparty/libwebp/src/enc/quant_enc.c index 01eb565c7f..6cede28ab4 100644 --- a/thirdparty/libwebp/src/enc/quant_enc.c +++ b/thirdparty/libwebp/src/enc/quant_enc.c @@ -585,6 +585,9 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, return rate * lambda + RD_DISTO_MULT * distortion; } +// Coefficient type. +enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 }; + static int TrellisQuantizeBlock(const VP8Encoder* const enc, int16_t in[16], int16_t out[16], int ctx0, int coeff_type, @@ -593,7 +596,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; CostArrayPtr const costs = (CostArrayPtr)enc->proba_.remapped_costs_[coeff_type]; - const int first = (coeff_type == 0) ? 1 : 0; + const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0; Node nodes[16][NUM_NODES]; ScoreState score_states[2][NUM_NODES]; ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA); @@ -657,16 +660,17 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, // test all alternate level values around level0. for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) { Node* const cur = &NODE(n, m); - int level = level0 + m; + const int level = level0 + m; const int ctx = (level > 2) ? 2 : level; const int band = VP8EncBands[n + 1]; score_t base_score; - score_t best_cur_score = MAX_COST; - int best_prev = 0; // default, in case + score_t best_cur_score; + int best_prev; + score_t cost, score; - ss_cur[m].score = MAX_COST; ss_cur[m].costs = costs[n + 1][ctx]; if (level < 0 || level > thresh_level) { + ss_cur[m].score = MAX_COST; // Node is dead. continue; } @@ -682,18 +686,24 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, } // Inspect all possible non-dead predecessors. Retain only the best one. - for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) { + // The base_score is added to all scores so it is only added for the final + // value after the loop. + cost = VP8LevelCost(ss_prev[-MIN_DELTA].costs, level); + best_cur_score = + ss_prev[-MIN_DELTA].score + RDScoreTrellis(lambda, cost, 0); + best_prev = -MIN_DELTA; + for (p = -MIN_DELTA + 1; p <= MAX_DELTA; ++p) { // Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically // eliminated since their score can't be better than the current best. - const score_t cost = VP8LevelCost(ss_prev[p].costs, level); + cost = VP8LevelCost(ss_prev[p].costs, level); // Examine node assuming it's a non-terminal one. - const score_t score = - base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); + score = ss_prev[p].score + RDScoreTrellis(lambda, cost, 0); if (score < best_cur_score) { best_cur_score = score; best_prev = p; } } + best_cur_score += base_score; // Store best finding in current node. cur->sign = sign; cur->level = level; @@ -701,11 +711,11 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, ss_cur[m].score = best_cur_score; // Now, record best terminal node (and thus best entry in the graph). - if (level != 0) { + if (level != 0 && best_cur_score < best_score) { const score_t last_pos_cost = (n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0; const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0); - const score_t score = best_cur_score + last_pos_score; + score = best_cur_score + last_pos_score; if (score < best_score) { best_score = score; best_path[0] = n; // best eob position @@ -717,10 +727,16 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, } // Fresh start - memset(in + first, 0, (16 - first) * sizeof(*in)); - memset(out + first, 0, (16 - first) * sizeof(*out)); + // Beware! We must preserve in[0]/out[0] value for TYPE_I16_AC case. + if (coeff_type == TYPE_I16_AC) { + memset(in + 1, 0, 15 * sizeof(*in)); + memset(out + 1, 0, 15 * sizeof(*out)); + } else { + memset(in, 0, 16 * sizeof(*in)); + memset(out, 0, 16 * sizeof(*out)); + } if (best_path[0] == -1) { - return 0; // skip! + return 0; // skip! } { @@ -775,9 +791,9 @@ static int ReconstructIntra16(VP8EncIterator* const it, for (y = 0, n = 0; y < 4; ++y) { for (x = 0; x < 4; ++x, ++n) { const int ctx = it->top_nz_[x] + it->left_nz_[y]; - const int non_zero = - TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0, - &dqm->y1_, dqm->lambda_trellis_i16_); + const int non_zero = TrellisQuantizeBlock( + enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_, + dqm->lambda_trellis_i16_); it->top_nz_[x] = it->left_nz_[y] = non_zero; rd->y_ac_levels[n][0] = 0; nz |= non_zero << n; @@ -818,7 +834,7 @@ static int ReconstructIntra4(VP8EncIterator* const it, if (DO_TRELLIS_I4 && it->do_trellis_) { const int x = it->i4_ & 3, y = it->i4_ >> 2; const int ctx = it->top_nz_[x] + it->left_nz_[y]; - nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_, + nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_, dqm->lambda_trellis_i4_); } else { nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_); @@ -927,9 +943,9 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x, ++n) { const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y]; - const int non_zero = - TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2, - &dqm->uv_, dqm->lambda_trellis_uv_); + const int non_zero = TrellisQuantizeBlock( + enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_, + dqm->lambda_trellis_uv_); it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero; nz |= non_zero << n; } diff --git a/thirdparty/libwebp/src/enc/vp8i_enc.h b/thirdparty/libwebp/src/enc/vp8i_enc.h index 67e9509367..b4bba08f27 100644 --- a/thirdparty/libwebp/src/enc/vp8i_enc.h +++ b/thirdparty/libwebp/src/enc/vp8i_enc.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 1 #define ENC_MIN_VERSION 2 -#define ENC_REV_VERSION 1 +#define ENC_REV_VERSION 2 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost diff --git a/thirdparty/libwebp/src/mux/muxi.h b/thirdparty/libwebp/src/mux/muxi.h index 330da66754..d9bf9b3770 100644 --- a/thirdparty/libwebp/src/mux/muxi.h +++ b/thirdparty/libwebp/src/mux/muxi.h @@ -29,7 +29,7 @@ extern "C" { #define MUX_MAJ_VERSION 1 #define MUX_MIN_VERSION 2 -#define MUX_REV_VERSION 1 +#define MUX_REV_VERSION 2 // Chunk object. typedef struct WebPChunk WebPChunk; diff --git a/thirdparty/libwebp/src/utils/huffman_encode_utils.c b/thirdparty/libwebp/src/utils/huffman_encode_utils.c index fd7a47d8f7..585db91951 100644 --- a/thirdparty/libwebp/src/utils/huffman_encode_utils.c +++ b/thirdparty/libwebp/src/utils/huffman_encode_utils.c @@ -161,7 +161,7 @@ static void SetBitDepths(const HuffmanTree* const tree, // especially when population counts are longer than 2**tree_limit, but // we are not planning to use this with extremely long blocks. // -// See http://en.wikipedia.org/wiki/Huffman_coding +// See https://en.wikipedia.org/wiki/Huffman_coding static void GenerateOptimalTree(const uint32_t* const histogram, int histogram_size, HuffmanTree* tree, int tree_depth_limit, diff --git a/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c index f65b6cdbb6..97e7893704 100644 --- a/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c +++ b/thirdparty/libwebp/src/utils/quant_levels_dec_utils.c @@ -30,7 +30,7 @@ #define DFIX 4 // extra precision for ordered dithering #define DSIZE 4 // dithering size (must be a power of two) -// cf. http://en.wikipedia.org/wiki/Ordered_dithering +// cf. https://en.wikipedia.org/wiki/Ordered_dithering static const uint8_t kOrderedDither[DSIZE][DSIZE] = { { 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision { 12, 4, 14, 6 }, diff --git a/thirdparty/libwebp/src/utils/utils.c b/thirdparty/libwebp/src/utils/utils.c index 9e464c16ce..a7c3a70fef 100644 --- a/thirdparty/libwebp/src/utils/utils.c +++ b/thirdparty/libwebp/src/utils/utils.c @@ -23,7 +23,7 @@ // alloc/free etc) is printed. For debugging/tuning purpose only (it's slow, // and not multi-thread safe!). // An interesting alternative is valgrind's 'massif' tool: -// http://valgrind.org/docs/manual/ms-manual.html +// https://valgrind.org/docs/manual/ms-manual.html // Here is an example command line: /* valgrind --tool=massif --massif-out-file=massif.out \ --stacks=yes --alloc-fn=WebPSafeMalloc --alloc-fn=WebPSafeCalloc diff --git a/thirdparty/libwebp/src/webp/decode.h b/thirdparty/libwebp/src/webp/decode.h index 44fcd64a84..d98247509a 100644 --- a/thirdparty/libwebp/src/webp/decode.h +++ b/thirdparty/libwebp/src/webp/decode.h @@ -85,7 +85,7 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size, // Upon return, the Y buffer has a stride returned as '*stride', while U and V // have a common stride returned as '*uv_stride'. // Return NULL in case of error. -// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr +// (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, int* width, int* height, uint8_t** u, uint8_t** v, diff --git a/thirdparty/misc/patches/polypartition-godot-types.patch b/thirdparty/misc/patches/polypartition-godot-types.patch index 782f02e8dc..61737f9fd2 100644 --- a/thirdparty/misc/patches/polypartition-godot-types.patch +++ b/thirdparty/misc/patches/polypartition-godot-types.patch @@ -1,19 +1,16 @@ diff --git a/thirdparty/misc/polypartition.cpp b/thirdparty/misc/polypartition.cpp -index 3a8a6efa83..5e94793b79 100644 +index 3a8a6efa83..8c5409bf24 100644 --- a/thirdparty/misc/polypartition.cpp +++ b/thirdparty/misc/polypartition.cpp -@@ -23,10 +23,7 @@ - - #include "polypartition.h" - --#include <math.h> --#include <string.h> +@@ -26,7 +26,6 @@ + #include <math.h> + #include <string.h> #include <algorithm> -#include <vector> TPPLPoly::TPPLPoly() { hole = false; -@@ -186,7 +183,7 @@ int TPPLPartition::Intersects(TPPLPoint &p11, TPPLPoint &p12, TPPLPoint &p21, TP +@@ -186,7 +185,7 @@ int TPPLPartition::Intersects(TPPLPoint &p11, TPPLPoint &p12, TPPLPoint &p21, TP // Removes holes from inpolys by merging them with non-holes. int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { TPPLPolyList polys; @@ -22,7 +19,7 @@ index 3a8a6efa83..5e94793b79 100644 long i, i2, holepointindex, polypointindex; TPPLPoint holepoint, polypoint, bestpolypoint; TPPLPoint linep1, linep2; -@@ -198,15 +195,15 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { +@@ -198,15 +197,15 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { // Check for the trivial case of no holes. hasholes = false; @@ -42,7 +39,7 @@ index 3a8a6efa83..5e94793b79 100644 } return 1; } -@@ -216,8 +213,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { +@@ -216,8 +215,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { while (1) { // Find the hole point with the largest x. hasholes = false; @@ -53,7 +50,7 @@ index 3a8a6efa83..5e94793b79 100644 continue; } -@@ -227,8 +224,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { +@@ -227,8 +226,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { holepointindex = 0; } @@ -64,7 +61,7 @@ index 3a8a6efa83..5e94793b79 100644 holeiter = iter; holepointindex = i; } -@@ -237,24 +234,24 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { +@@ -237,24 +236,24 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { if (!hasholes) { break; } @@ -98,7 +95,7 @@ index 3a8a6efa83..5e94793b79 100644 if (pointfound) { v1 = Normalize(polypoint - holepoint); v2 = Normalize(bestpolypoint - holepoint); -@@ -263,13 +260,13 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { +@@ -263,13 +262,13 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { } } pointvisible = true; @@ -117,7 +114,7 @@ index 3a8a6efa83..5e94793b79 100644 if (Intersects(holepoint, polypoint, linep1, linep2)) { pointvisible = false; break; -@@ -292,18 +289,18 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { +@@ -292,18 +291,18 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { return 0; } @@ -142,7 +139,7 @@ index 3a8a6efa83..5e94793b79 100644 i2++; } -@@ -312,8 +309,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { +@@ -312,8 +311,8 @@ int TPPLPartition::RemoveHoles(TPPLPolyList *inpolys, TPPLPolyList *outpolys) { polys.push_back(newpoly); } @@ -153,7 +150,7 @@ index 3a8a6efa83..5e94793b79 100644 } return 1; -@@ -524,13 +521,13 @@ int TPPLPartition::Triangulate_EC(TPPLPoly *poly, TPPLPolyList *triangles) { +@@ -524,13 +523,13 @@ int TPPLPartition::Triangulate_EC(TPPLPoly *poly, TPPLPolyList *triangles) { int TPPLPartition::Triangulate_EC(TPPLPolyList *inpolys, TPPLPolyList *triangles) { TPPLPolyList outpolys; @@ -170,7 +167,7 @@ index 3a8a6efa83..5e94793b79 100644 return 0; } } -@@ -543,7 +540,7 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -543,7 +542,7 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) { } TPPLPolyList triangles; @@ -179,7 +176,7 @@ index 3a8a6efa83..5e94793b79 100644 TPPLPoly *poly1 = NULL, *poly2 = NULL; TPPLPoly newpoly; TPPLPoint d1, d2, p1, p2, p3; -@@ -578,19 +575,19 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -578,19 +577,19 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) { return 0; } @@ -203,7 +200,7 @@ index 3a8a6efa83..5e94793b79 100644 for (i21 = 0; i21 < poly2->GetNumPoints(); i21++) { if ((d2.x != poly2->GetPoint(i21).x) || (d2.y != poly2->GetPoint(i21).y)) { -@@ -660,16 +657,16 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -660,16 +659,16 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) { } triangles.erase(iter2); @@ -224,7 +221,7 @@ index 3a8a6efa83..5e94793b79 100644 } return 1; -@@ -677,13 +674,13 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -677,13 +676,13 @@ int TPPLPartition::ConvexPartition_HM(TPPLPoly *poly, TPPLPolyList *parts) { int TPPLPartition::ConvexPartition_HM(TPPLPolyList *inpolys, TPPLPolyList *parts) { TPPLPolyList outpolys; @@ -241,7 +238,7 @@ index 3a8a6efa83..5e94793b79 100644 return 0; } } -@@ -824,8 +821,8 @@ int TPPLPartition::Triangulate_OPT(TPPLPoly *poly, TPPLPolyList *triangles) { +@@ -824,8 +823,8 @@ int TPPLPartition::Triangulate_OPT(TPPLPoly *poly, TPPLPolyList *triangles) { newdiagonal.index1 = 0; newdiagonal.index2 = n - 1; diagonals.push_back(newdiagonal); @@ -252,7 +249,7 @@ index 3a8a6efa83..5e94793b79 100644 diagonals.pop_front(); bestvertex = dpstates[diagonal.index2][diagonal.index1].bestvertex; if (bestvertex == -1) { -@@ -873,10 +870,10 @@ void TPPLPartition::UpdateState(long a, long b, long w, long i, long j, DPState2 +@@ -873,10 +872,10 @@ void TPPLPartition::UpdateState(long a, long b, long w, long i, long j, DPState2 pairs->push_front(newdiagonal); dpstates[a][b].weight = w; } else { @@ -265,7 +262,7 @@ index 3a8a6efa83..5e94793b79 100644 pairs->pop_front(); } pairs->push_front(newdiagonal); -@@ -885,7 +882,7 @@ void TPPLPartition::UpdateState(long a, long b, long w, long i, long j, DPState2 +@@ -885,7 +884,7 @@ void TPPLPartition::UpdateState(long a, long b, long w, long i, long j, DPState2 void TPPLPartition::TypeA(long i, long j, long k, PartitionVertex *vertices, DPState2 **dpstates) { DiagonalList *pairs = NULL; @@ -274,7 +271,7 @@ index 3a8a6efa83..5e94793b79 100644 long top; long w; -@@ -902,23 +899,23 @@ void TPPLPartition::TypeA(long i, long j, long k, PartitionVertex *vertices, DPS +@@ -902,23 +901,23 @@ void TPPLPartition::TypeA(long i, long j, long k, PartitionVertex *vertices, DPS } if (j - i > 1) { pairs = &(dpstates[i][j].pairs); @@ -305,7 +302,7 @@ index 3a8a6efa83..5e94793b79 100644 } } } -@@ -927,7 +924,7 @@ void TPPLPartition::TypeA(long i, long j, long k, PartitionVertex *vertices, DPS +@@ -927,7 +926,7 @@ void TPPLPartition::TypeA(long i, long j, long k, PartitionVertex *vertices, DPS void TPPLPartition::TypeB(long i, long j, long k, PartitionVertex *vertices, DPState2 **dpstates) { DiagonalList *pairs = NULL; @@ -314,7 +311,7 @@ index 3a8a6efa83..5e94793b79 100644 long top; long w; -@@ -946,21 +943,21 @@ void TPPLPartition::TypeB(long i, long j, long k, PartitionVertex *vertices, DPS +@@ -946,21 +945,21 @@ void TPPLPartition::TypeB(long i, long j, long k, PartitionVertex *vertices, DPS if (k - j > 1) { pairs = &(dpstates[j][k].pairs); @@ -343,7 +340,7 @@ index 3a8a6efa83..5e94793b79 100644 } } else { w++; -@@ -981,11 +978,11 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -981,11 +980,11 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { DiagonalList diagonals, diagonals2; Diagonal diagonal, newdiagonal; DiagonalList *pairs = NULL, *pairs2 = NULL; @@ -358,7 +355,7 @@ index 3a8a6efa83..5e94793b79 100644 bool ijreal, jkreal; n = poly->GetNumPoints(); -@@ -1110,35 +1107,35 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -1110,35 +1109,35 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { newdiagonal.index1 = 0; newdiagonal.index2 = n - 1; diagonals.push_front(newdiagonal); @@ -403,7 +400,7 @@ index 3a8a6efa83..5e94793b79 100644 pairs2->pop_back(); } else { break; -@@ -1153,21 +1150,21 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -1153,21 +1152,21 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { diagonals.push_front(newdiagonal); } } else { @@ -431,7 +428,7 @@ index 3a8a6efa83..5e94793b79 100644 pairs2->pop_front(); } else { break; -@@ -1197,8 +1194,8 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -1197,8 +1196,8 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { newdiagonal.index1 = 0; newdiagonal.index2 = n - 1; diagonals.push_front(newdiagonal); @@ -442,7 +439,7 @@ index 3a8a6efa83..5e94793b79 100644 diagonals.pop_front(); if ((diagonal.index2 - diagonal.index1) <= 1) { continue; -@@ -1210,8 +1207,8 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -1210,8 +1209,8 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { indices.push_back(diagonal.index2); diagonals2.push_front(diagonal); @@ -453,7 +450,7 @@ index 3a8a6efa83..5e94793b79 100644 diagonals2.pop_front(); if ((diagonal.index2 - diagonal.index1) <= 1) { continue; -@@ -1220,16 +1217,16 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -1220,16 +1219,16 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { jkreal = true; pairs = &(dpstates[diagonal.index1][diagonal.index2].pairs); if (!vertices[diagonal.index1].isConvex) { @@ -476,7 +473,7 @@ index 3a8a6efa83..5e94793b79 100644 jkreal = false; } } -@@ -1253,11 +1250,12 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -1253,11 +1252,12 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { indices.push_back(j); } @@ -492,7 +489,7 @@ index 3a8a6efa83..5e94793b79 100644 k++; } parts->push_back(newpoly); -@@ -1281,7 +1279,7 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { +@@ -1281,7 +1281,7 @@ int TPPLPartition::ConvexPartition_OPT(TPPLPoly *poly, TPPLPolyList *parts) { // "Computational Geometry: Algorithms and Applications" // by Mark de Berg, Otfried Cheong, Marc van Kreveld, and Mark Overmars. int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monotonePolys) { @@ -501,7 +498,7 @@ index 3a8a6efa83..5e94793b79 100644 MonotoneVertex *vertices = NULL; long i, numvertices, vindex, vindex2, newnumvertices, maxnumvertices; long polystartindex, polyendindex; -@@ -1291,11 +1289,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto +@@ -1291,11 +1291,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto bool error = false; numvertices = 0; @@ -515,7 +512,7 @@ index 3a8a6efa83..5e94793b79 100644 } maxnumvertices = numvertices * 3; -@@ -1303,8 +1298,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto +@@ -1303,8 +1300,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto newnumvertices = numvertices; polystartindex = 0; @@ -526,7 +523,7 @@ index 3a8a6efa83..5e94793b79 100644 polyendindex = polystartindex + poly->GetNumPoints() - 1; for (i = 0; i < poly->GetNumPoints(); i++) { vertices[i + polystartindex].p = poly->GetPoint(i); -@@ -1360,14 +1355,14 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto +@@ -1360,14 +1357,14 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto // Note that while set doesn't actually have to be implemented as // a tree, complexity requirements for operations are the same as // for the balanced binary search tree. @@ -546,7 +543,7 @@ index 3a8a6efa83..5e94793b79 100644 } // For each vertex. -@@ -1387,13 +1382,14 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto +@@ -1387,13 +1384,14 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto newedge.p1 = v->p; newedge.p2 = vertices[v->next].p; newedge.index = vindex; @@ -564,7 +561,7 @@ index 3a8a6efa83..5e94793b79 100644 error = true; break; } -@@ -1412,29 +1408,30 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto +@@ -1412,29 +1410,30 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto newedge.p1 = v->p; newedge.p2 = v->p; edgeIter = edgeTree.lower_bound(newedge); @@ -601,7 +598,7 @@ index 3a8a6efa83..5e94793b79 100644 error = true; break; } -@@ -1452,25 +1449,25 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto +@@ -1452,25 +1451,25 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto newedge.p1 = v->p; newedge.p2 = v->p; edgeIter = edgeTree.lower_bound(newedge); @@ -632,7 +629,7 @@ index 3a8a6efa83..5e94793b79 100644 error = true; break; } -@@ -1488,27 +1485,28 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto +@@ -1488,27 +1487,28 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto newedge.p1 = v2->p; newedge.p2 = vertices[v2->next].p; newedge.index = vindex2; @@ -668,7 +665,7 @@ index 3a8a6efa83..5e94793b79 100644 } break; } -@@ -1569,8 +1567,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto +@@ -1569,8 +1569,8 @@ int TPPLPartition::MonotonePartition(TPPLPolyList *inpolys, TPPLPolyList *monoto // Adds a diagonal to the doubly-connected list of vertices. void TPPLPartition::AddDiagonal(MonotoneVertex *vertices, long *numvertices, long index1, long index2, @@ -679,7 +676,7 @@ index 3a8a6efa83..5e94793b79 100644 long newindex1, newindex2; newindex1 = *numvertices; -@@ -1597,14 +1595,14 @@ void TPPLPartition::AddDiagonal(MonotoneVertex *vertices, long *numvertices, lon +@@ -1597,14 +1597,14 @@ void TPPLPartition::AddDiagonal(MonotoneVertex *vertices, long *numvertices, lon vertextypes[newindex1] = vertextypes[index1]; edgeTreeIterators[newindex1] = edgeTreeIterators[index1]; helpers[newindex1] = helpers[index1]; @@ -698,7 +695,7 @@ index 3a8a6efa83..5e94793b79 100644 } } -@@ -1830,13 +1828,13 @@ int TPPLPartition::TriangulateMonotone(TPPLPoly *inPoly, TPPLPolyList *triangles +@@ -1830,13 +1830,13 @@ int TPPLPartition::TriangulateMonotone(TPPLPoly *inPoly, TPPLPolyList *triangles int TPPLPartition::Triangulate_MONO(TPPLPolyList *inpolys, TPPLPolyList *triangles) { TPPLPolyList monotone; diff --git a/thirdparty/misc/polypartition.cpp b/thirdparty/misc/polypartition.cpp index 5e94793b79..8c5409bf24 100644 --- a/thirdparty/misc/polypartition.cpp +++ b/thirdparty/misc/polypartition.cpp @@ -23,6 +23,8 @@ #include "polypartition.h" +#include <math.h> +#include <string.h> #include <algorithm> TPPLPoly::TPPLPoly() { diff --git a/thirdparty/msdfgen/core/edge-coloring.cpp b/thirdparty/msdfgen/core/edge-coloring.cpp index 370f9aa38d..914f1769fd 100644 --- a/thirdparty/msdfgen/core/edge-coloring.cpp +++ b/thirdparty/msdfgen/core/edge-coloring.cpp @@ -473,7 +473,7 @@ void edgeColoringByDistance(Shape &shape, double angleThreshold, unsigned long l edgeMatrix[i] = &edgeMatrixStorage[i*splineCount]; int nextEdge = 0; for (; nextEdge < graphEdgeCount && !*graphEdgeDistances[nextEdge]; ++nextEdge) { - int elem = graphEdgeDistances[nextEdge]-distanceMatrixBase; + int elem = (int) (graphEdgeDistances[nextEdge]-distanceMatrixBase); int row = elem/splineCount; int col = elem%splineCount; edgeMatrix[row][col] = 1; @@ -483,7 +483,7 @@ void edgeColoringByDistance(Shape &shape, double angleThreshold, unsigned long l std::vector<int> coloring(2*splineCount); colorSecondDegreeGraph(&coloring[0], &edgeMatrix[0], splineCount, seed); for (; nextEdge < graphEdgeCount; ++nextEdge) { - int elem = graphEdgeDistances[nextEdge]-distanceMatrixBase; + int elem = (int) (graphEdgeDistances[nextEdge]-distanceMatrixBase); tryAddEdge(&coloring[0], &edgeMatrix[0], splineCount, elem/splineCount, elem%splineCount, &coloring[splineCount]); } diff --git a/thirdparty/msdfgen/core/equation-solver.cpp b/thirdparty/msdfgen/core/equation-solver.cpp index fbe906428b..4144fa3340 100644 --- a/thirdparty/msdfgen/core/equation-solver.cpp +++ b/thirdparty/msdfgen/core/equation-solver.cpp @@ -4,17 +4,15 @@ #define _USE_MATH_DEFINES #include <cmath> -#define TOO_LARGE_RATIO 1e12 - namespace msdfgen { int solveQuadratic(double x[2], double a, double b, double c) { - // a = 0 -> linear equation - if (a == 0 || fabs(b)+fabs(c) > TOO_LARGE_RATIO*fabs(a)) { - // a, b = 0 -> no solution - if (b == 0 || fabs(c) > TOO_LARGE_RATIO*fabs(b)) { + // a == 0 -> linear equation + if (a == 0 || fabs(b) > 1e12*fabs(a)) { + // a == 0, b == 0 -> no solution + if (b == 0) { if (c == 0) - return -1; // 0 = 0 + return -1; // 0 == 0 return 0; } x[0] = -c/b; @@ -35,41 +33,38 @@ int solveQuadratic(double x[2], double a, double b, double c) { static int solveCubicNormed(double x[3], double a, double b, double c) { double a2 = a*a; - double q = (a2 - 3*b)/9; - double r = (a*(2*a2-9*b) + 27*c)/54; + double q = 1/9.*(a2-3*b); + double r = 1/54.*(a*(2*a2-9*b)+27*c); double r2 = r*r; double q3 = q*q*q; - double A, B; + a *= 1/3.; if (r2 < q3) { double t = r/sqrt(q3); if (t < -1) t = -1; if (t > 1) t = 1; t = acos(t); - a /= 3; q = -2*sqrt(q); - x[0] = q*cos(t/3)-a; - x[1] = q*cos((t+2*M_PI)/3)-a; - x[2] = q*cos((t-2*M_PI)/3)-a; + q = -2*sqrt(q); + x[0] = q*cos(1/3.*t)-a; + x[1] = q*cos(1/3.*(t+2*M_PI))-a; + x[2] = q*cos(1/3.*(t-2*M_PI))-a; return 3; } else { - A = -pow(fabs(r)+sqrt(r2-q3), 1/3.); - if (r < 0) A = -A; - B = A == 0 ? 0 : q/A; - a /= 3; - x[0] = (A+B)-a; - x[1] = -0.5*(A+B)-a; - x[2] = 0.5*sqrt(3.)*(A-B); - if (fabs(x[2]) < 1e-14) + double u = (r < 0 ? 1 : -1)*pow(fabs(r)+sqrt(r2-q3), 1/3.); + double v = u == 0 ? 0 : q/u; + x[0] = (u+v)-a; + if (u == v || fabs(u-v) < 1e-12*fabs(u+v)) { + x[1] = -.5*(u+v)-a; return 2; + } return 1; } } int solveCubic(double x[3], double a, double b, double c, double d) { if (a != 0) { - double bn = b/a, cn = c/a, dn = d/a; - // Check that a isn't "almost zero" - if (fabs(bn) < TOO_LARGE_RATIO && fabs(cn) < TOO_LARGE_RATIO && fabs(dn) < TOO_LARGE_RATIO) - return solveCubicNormed(x, bn, cn, dn); + double bn = b/a; + if (fabs(bn) < 1e6) // Above this ratio, the numerical error gets larger than if we treated a as zero + return solveCubicNormed(x, bn, c/a, d/a); } return solveQuadratic(x, b, c, d); } diff --git a/thirdparty/thorvg/AUTHORS b/thirdparty/thorvg/AUTHORS index 66057232b6..ec06c49118 100644 --- a/thirdparty/thorvg/AUTHORS +++ b/thirdparty/thorvg/AUTHORS @@ -13,3 +13,5 @@ Pankaj Kumar <pankaj.m1@samsung.com> Patryk Kaczmarek <patryk.k@partner.samsung.com> Michal Maciola <m.maciola@samsung.com> Peter Vullings <peter@projectitis.com> +K. S. Ernest (iFire) Lee <ernest.lee@chibifire.com> +RĂ©mi Verschelde <rverschelde@gmail.com> diff --git a/thirdparty/thorvg/inc/config.h b/thirdparty/thorvg/inc/config.h index 04a450b1bb..41e8f6dafa 100644 --- a/thirdparty/thorvg/inc/config.h +++ b/thirdparty/thorvg/inc/config.h @@ -13,5 +13,5 @@ #define THORVG_JPG_LOADER_SUPPORT 1 -#define THORVG_VERSION_STRING "0.7.0" +#define THORVG_VERSION_STRING "0.7.1" #endif diff --git a/thirdparty/thorvg/patches/thorvg-pr1159-mingw-fix.patch b/thirdparty/thorvg/patches/thorvg-pr1159-mingw-fix.patch deleted file mode 100644 index a174880306..0000000000 --- a/thirdparty/thorvg/patches/thorvg-pr1159-mingw-fix.patch +++ /dev/null @@ -1,73 +0,0 @@ -diff --git a/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp b/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp -index def8ae169a..cf103774c5 100644 ---- a/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp -+++ b/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp -@@ -51,6 +51,7 @@ - - #define _USE_MATH_DEFINES //Math Constants are not defined in Standard C/C++. - -+#include <cstring> - #include <fstream> - #include <float.h> - #include <math.h> -diff --git a/thirdparty/thorvg/src/loaders/svg/tvgSvgPath.cpp b/thirdparty/thorvg/src/loaders/svg/tvgSvgPath.cpp -index 2b62315de8..32685ee620 100644 ---- a/thirdparty/thorvg/src/loaders/svg/tvgSvgPath.cpp -+++ b/thirdparty/thorvg/src/loaders/svg/tvgSvgPath.cpp -@@ -50,6 +50,7 @@ - - #define _USE_MATH_DEFINES //Math Constants are not defined in Standard C/C++. - -+#include <cstring> - #include <math.h> - #include <clocale> - #include <ctype.h> -diff --git a/thirdparty/thorvg/src/loaders/svg/tvgSvgSceneBuilder.cpp b/thirdparty/thorvg/src/loaders/svg/tvgSvgSceneBuilder.cpp -index 8701fe32b1..ae17634f31 100644 ---- a/thirdparty/thorvg/src/loaders/svg/tvgSvgSceneBuilder.cpp -+++ b/thirdparty/thorvg/src/loaders/svg/tvgSvgSceneBuilder.cpp -@@ -49,6 +49,7 @@ - */ - - -+#include <cstring> - #include <string> - #include "tvgMath.h" - #include "tvgSvgLoaderCommon.h" -diff --git a/thirdparty/thorvg/src/loaders/svg/tvgSvgUtil.cpp b/thirdparty/thorvg/src/loaders/svg/tvgSvgUtil.cpp -index d5b9cdcf7b..9f269b29a2 100644 ---- a/thirdparty/thorvg/src/loaders/svg/tvgSvgUtil.cpp -+++ b/thirdparty/thorvg/src/loaders/svg/tvgSvgUtil.cpp -@@ -20,6 +20,7 @@ - * SOFTWARE. - */ - -+#include <cstring> - #include <math.h> - #include <memory.h> - #include "tvgSvgUtil.h" -diff --git a/thirdparty/thorvg/src/loaders/svg/tvgXmlParser.cpp b/thirdparty/thorvg/src/loaders/svg/tvgXmlParser.cpp -index 2e3d5928d9..1571aa4e25 100644 ---- a/thirdparty/thorvg/src/loaders/svg/tvgXmlParser.cpp -+++ b/thirdparty/thorvg/src/loaders/svg/tvgXmlParser.cpp -@@ -20,6 +20,7 @@ - * SOFTWARE. - */ - -+#include <cstring> - #include <ctype.h> - #include <string> - -diff --git a/thirdparty/thorvg/src/savers/tvg/tvgTvgSaver.cpp b/thirdparty/thorvg/src/savers/tvg/tvgTvgSaver.cpp -index 9450d80e88..9dd57e5a89 100644 ---- a/thirdparty/thorvg/src/savers/tvg/tvgTvgSaver.cpp -+++ b/thirdparty/thorvg/src/savers/tvg/tvgTvgSaver.cpp -@@ -24,6 +24,8 @@ - #include "tvgTvgSaver.h" - #include "tvgLzw.h" - -+#include <cstring> -+ - #ifdef _WIN32 - #include <malloc.h> - #else diff --git a/thirdparty/thorvg/patches/thorvg-pr1166-vs2017-minmax.patch b/thirdparty/thorvg/patches/thorvg-pr1166-vs2017-minmax.patch deleted file mode 100644 index 0b045bd05a..0000000000 --- a/thirdparty/thorvg/patches/thorvg-pr1166-vs2017-minmax.patch +++ /dev/null @@ -1,49 +0,0 @@ -diff --git a/thirdparty/thorvg/src/lib/sw_engine/tvgSwRenderer.cpp b/thirdparty/thorvg/src/lib/sw_engine/tvgSwRenderer.cpp -index 78537e7726..c75e73760e 100644 ---- a/thirdparty/thorvg/src/lib/sw_engine/tvgSwRenderer.cpp -+++ b/thirdparty/thorvg/src/lib/sw_engine/tvgSwRenderer.cpp -@@ -23,6 +23,7 @@ - #include "tvgSwCommon.h" - #include "tvgTaskScheduler.h" - #include "tvgSwRenderer.h" -+#include "tvgMath.h" - - /************************************************************************/ - /* Internal Class Implementation */ -@@ -594,10 +595,10 @@ void* SwRenderer::prepareCommon(SwTask* task, const RenderTransform* transform, - task->surface = surface; - task->mpool = mpool; - task->flags = flags; -- task->bbox.min.x = max(static_cast<SwCoord>(0), static_cast<SwCoord>(vport.x)); -- task->bbox.min.y = max(static_cast<SwCoord>(0), static_cast<SwCoord>(vport.y)); -- task->bbox.max.x = min(static_cast<SwCoord>(surface->w), static_cast<SwCoord>(vport.x + vport.w)); -- task->bbox.max.y = min(static_cast<SwCoord>(surface->h), static_cast<SwCoord>(vport.y + vport.h)); -+ task->bbox.min.x = mathMax(static_cast<SwCoord>(0), static_cast<SwCoord>(vport.x)); -+ task->bbox.min.y = mathMax(static_cast<SwCoord>(0), static_cast<SwCoord>(vport.y)); -+ task->bbox.max.x = mathMin(static_cast<SwCoord>(surface->w), static_cast<SwCoord>(vport.x + vport.w)); -+ task->bbox.max.y = mathMin(static_cast<SwCoord>(surface->h), static_cast<SwCoord>(vport.y + vport.h)); - - if (!task->pushed) { - task->pushed = true; -diff --git a/thirdparty/thorvg/src/lib/tvgMath.h b/thirdparty/thorvg/src/lib/tvgMath.h -index 9e5c915fc3..94b4fe1cf1 100644 ---- a/thirdparty/thorvg/src/lib/tvgMath.h -+++ b/thirdparty/thorvg/src/lib/tvgMath.h -@@ -29,6 +29,10 @@ - #include "tvgCommon.h" - - -+#define mathMin(x, y) (((x) < (y)) ? (x) : (y)) -+#define mathMax(x, y) (((x) > (y)) ? (x) : (y)) -+ -+ - static inline bool mathZero(float a) - { - return (fabsf(a) < FLT_EPSILON) ? true : false; -@@ -154,4 +158,4 @@ static inline Matrix mathMultiply(const Matrix* lhs, const Matrix* rhs) - } - - --#endif //_TVG_MATH_H_ -\ No newline at end of file -+#endif //_TVG_MATH_H_ diff --git a/thirdparty/thorvg/src/lib/sw_engine/tvgSwImage.cpp b/thirdparty/thorvg/src/lib/sw_engine/tvgSwImage.cpp index fe22fce017..f9974d9847 100644 --- a/thirdparty/thorvg/src/lib/sw_engine/tvgSwImage.cpp +++ b/thirdparty/thorvg/src/lib/sw_engine/tvgSwImage.cpp @@ -84,8 +84,8 @@ bool imagePrepare(SwImage* image, const Matrix* transform, const SwBBox& clipReg //Fast track: Non-transformed image but just shifted. if (image->direct) { - image->ox = -static_cast<uint32_t>(round(transform->e13)); - image->oy = -static_cast<uint32_t>(round(transform->e23)); + image->ox = -static_cast<int32_t>(round(transform->e13)); + image->oy = -static_cast<int32_t>(round(transform->e23)); //Figure out the scale factor by transform matrix } else { auto scaleX = sqrtf((transform->e11 * transform->e11) + (transform->e21 * transform->e21)); diff --git a/thirdparty/thorvg/src/lib/sw_engine/tvgSwRaster.cpp b/thirdparty/thorvg/src/lib/sw_engine/tvgSwRaster.cpp index deebed16ee..56bc2f77dc 100644 --- a/thirdparty/thorvg/src/lib/sw_engine/tvgSwRaster.cpp +++ b/thirdparty/thorvg/src/lib/sw_engine/tvgSwRaster.cpp @@ -481,7 +481,10 @@ static bool _rasterScaledRleRGBAImage(SwSurface* surface, const SwImage* image, static bool _scaledRleRGBAImage(SwSurface* surface, const SwImage* image, const Matrix* transform, const SwBBox& region, uint32_t opacity) { Matrix itransform; - if (transform && !mathInverse(transform, &itransform)) return false; + + if (transform) { + if (!mathInverse(transform, &itransform)) return false; + } else mathIdentity(&itransform); auto halfScale = _halfScale(image->scale); @@ -816,7 +819,10 @@ static bool _rasterScaledRGBAImage(SwSurface* surface, const SwImage* image, con static bool _scaledRGBAImage(SwSurface* surface, const SwImage* image, const Matrix* transform, const SwBBox& region, uint32_t opacity) { Matrix itransform; - if (transform && !mathInverse(transform, &itransform)) return false; + + if (transform) { + if (!mathInverse(transform, &itransform)) return false; + } else mathIdentity(&itransform); auto halfScale = _halfScale(image->scale); @@ -1113,12 +1119,12 @@ static bool _rasterTranslucentLinearGradientRle(SwSurface* surface, const SwRleD auto dst = &surface->buffer[span->y * surface->stride + span->x]; fillFetchLinear(fill, buffer, span->y, span->x, span->len); if (span->coverage == 255) { - for (uint32_t i = 0; i < span->len; ++i, ++dst) { - *dst = buffer[i] + ALPHA_BLEND(*dst, _ialpha(buffer[i])); + for (uint32_t x = 0; x < span->len; ++x, ++dst) { + *dst = buffer[x] + ALPHA_BLEND(*dst, _ialpha(buffer[x])); } } else { - for (uint32_t i = 0; i < span->len; ++i, ++dst) { - auto tmp = ALPHA_BLEND(buffer[i], span->coverage); + for (uint32_t x = 0; x < span->len; ++x, ++dst) { + auto tmp = ALPHA_BLEND(buffer[x], span->coverage); *dst = tmp + ALPHA_BLEND(*dst, _ialpha(tmp)); } } @@ -1142,8 +1148,8 @@ static bool _rasterSolidLinearGradientRle(SwSurface* surface, const SwRleData* r } else { fillFetchLinear(fill, buf, span->y, span->x, span->len); auto dst = &surface->buffer[span->y * surface->stride + span->x]; - for (uint32_t i = 0; i < span->len; ++i) { - dst[i] = INTERPOLATE(span->coverage, buf[i], dst[i]); + for (uint32_t x = 0; x < span->len; ++x) { + dst[x] = INTERPOLATE(span->coverage, buf[x], dst[x]); } } } @@ -1302,12 +1308,12 @@ static bool _rasterTranslucentRadialGradientRle(SwSurface* surface, const SwRleD auto dst = &surface->buffer[span->y * surface->stride + span->x]; fillFetchRadial(fill, buffer, span->y, span->x, span->len); if (span->coverage == 255) { - for (uint32_t i = 0; i < span->len; ++i, ++dst) { - *dst = buffer[i] + ALPHA_BLEND(*dst, _ialpha(buffer[i])); + for (uint32_t x = 0; x < span->len; ++x, ++dst) { + *dst = buffer[x] + ALPHA_BLEND(*dst, _ialpha(buffer[x])); } } else { - for (uint32_t i = 0; i < span->len; ++i, ++dst) { - auto tmp = ALPHA_BLEND(buffer[i], span->coverage); + for (uint32_t x = 0; x < span->len; ++x, ++dst) { + auto tmp = ALPHA_BLEND(buffer[x], span->coverage); *dst = tmp + ALPHA_BLEND(*dst, _ialpha(tmp)); } } @@ -1332,8 +1338,8 @@ static bool _rasterSolidRadialGradientRle(SwSurface* surface, const SwRleData* r } else { fillFetchRadial(fill, buf, span->y, span->x, span->len); auto ialpha = 255 - span->coverage; - for (uint32_t i = 0; i < span->len; ++i, ++dst) { - *dst = ALPHA_BLEND(buf[i], span->coverage) + ALPHA_BLEND(*dst, ialpha); + for (uint32_t x = 0; x < span->len; ++x, ++dst) { + *dst = ALPHA_BLEND(buf[x], span->coverage) + ALPHA_BLEND(*dst, ialpha); } } } @@ -1487,7 +1493,7 @@ bool rasterStroke(SwSurface* surface, SwShape* shape, uint8_t r, uint8_t g, uint bool rasterImage(SwSurface* surface, SwImage* image, const Matrix* transform, const SwBBox& bbox, uint32_t opacity) { //Verify Boundary - if (bbox.max.x < 0 || bbox.max.y < 0 || bbox.min.x >= surface->w || bbox.min.y >= surface->h) return false; + if (bbox.max.x < 0 || bbox.max.y < 0 || bbox.min.x >= static_cast<SwCoord>(surface->w) || bbox.min.y >= static_cast<SwCoord>(surface->h)) return false; //TOOD: switch (image->format) //TODO: case: _rasterRGBImage() diff --git a/thirdparty/thorvg/src/lib/sw_engine/tvgSwRasterTexmapInternal.h b/thirdparty/thorvg/src/lib/sw_engine/tvgSwRasterTexmapInternal.h index 4e8d342137..e96307c874 100644 --- a/thirdparty/thorvg/src/lib/sw_engine/tvgSwRasterTexmapInternal.h +++ b/thirdparty/thorvg/src/lib/sw_engine/tvgSwRasterTexmapInternal.h @@ -58,8 +58,8 @@ y = yStart; while (y < yEnd) { - x1 = _xa; - x2 = _xb; + x1 = (int32_t)_xa; + x2 = (int32_t)_xb; if (!region) { minx = INT32_MAX; @@ -160,4 +160,4 @@ next: xb = _xb; ua = _ua; va = _va; -}
\ No newline at end of file +} diff --git a/thirdparty/thorvg/src/lib/tvgMath.h b/thirdparty/thorvg/src/lib/tvgMath.h index 94b4fe1cf1..423fb6eb1b 100644 --- a/thirdparty/thorvg/src/lib/tvgMath.h +++ b/thirdparty/thorvg/src/lib/tvgMath.h @@ -47,7 +47,7 @@ static inline bool mathEqual(float a, float b) static inline bool mathRightAngle(const Matrix* m) { - auto radian = fabsf(atan2(m->e21, m->e11)); + auto radian = fabsf(atan2f(m->e21, m->e11)); if (radian < FLT_EPSILON || mathEqual(radian, float(M_PI_2)) || mathEqual(radian, float(M_PI))) return true; return false; } diff --git a/thirdparty/thorvg/src/loaders/jpg/tvgJpgLoader.cpp b/thirdparty/thorvg/src/loaders/jpg/tvgJpgLoader.cpp index 8846613c6b..f27881da42 100644 --- a/thirdparty/thorvg/src/loaders/jpg/tvgJpgLoader.cpp +++ b/thirdparty/thorvg/src/loaders/jpg/tvgJpgLoader.cpp @@ -47,6 +47,7 @@ JpgLoader::~JpgLoader() { jpgdDelete(decoder); if (freeData) free(data); + free(image); } @@ -128,5 +129,9 @@ unique_ptr<Surface> JpgLoader::bitmap() void JpgLoader::run(unsigned tid) { + if (image) { + free(image); + image = nullptr; + } image = jpgdDecompress(decoder); }
\ No newline at end of file diff --git a/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.cpp b/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.cpp index fa72734ec4..4ccc5788d5 100644 --- a/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.cpp +++ b/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.cpp @@ -1080,7 +1080,9 @@ namespace DCT_Upsample // Unconditionally frees all allocated m_blocks. void jpeg_decoder::free_all_blocks() { + delete(m_pStream); m_pStream = nullptr; + for (mem_block *b = m_pMem_blocks; b; ) { mem_block *n = b->m_pNext; free(b); @@ -2815,7 +2817,6 @@ int jpeg_decoder::begin_decoding() jpeg_decoder::~jpeg_decoder() { free_all_blocks(); - delete(m_pStream); } @@ -3025,4 +3026,4 @@ unsigned char* jpgdDecompress(jpeg_decoder* decoder) } } return pImage_data; -}
\ No newline at end of file +} diff --git a/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.h b/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.h index d32ffd99d4..ca9cb35c32 100644 --- a/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.h +++ b/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Samsung Electronics Co., Ltd. All rights reserved. + * Copyright (c) 2021 - 2022 Samsung Electronics Co., Ltd. All rights reserved. * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/thirdparty/thorvg/src/loaders/png/tvgPngLoader.cpp b/thirdparty/thorvg/src/loaders/png/tvgPngLoader.cpp index c6d95be5ba..3cc08e902b 100644 --- a/thirdparty/thorvg/src/loaders/png/tvgPngLoader.cpp +++ b/thirdparty/thorvg/src/loaders/png/tvgPngLoader.cpp @@ -72,6 +72,7 @@ PngLoader::PngLoader() PngLoader::~PngLoader() { if (freeData) free(data); + free(image); } @@ -121,7 +122,7 @@ bool PngLoader::open(const char* data, uint32_t size, bool copy) clear(); lodepng_state_init(&state); - + unsigned int width, height; if (lodepng_inspect(&width, &height, &state, (unsigned char*)(data), size) > 0) return false; @@ -180,10 +181,14 @@ unique_ptr<Surface> PngLoader::bitmap() void PngLoader::run(unsigned tid) { + if (image) { + free(image); + image = nullptr; + } auto width = static_cast<unsigned>(w); auto height = static_cast<unsigned>(h); lodepng_decode(&image, &width, &height, &state, data, size); _premultiply((uint32_t*)(image), width, height); -}
\ No newline at end of file +} diff --git a/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp b/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp index cf103774c5..08b3308165 100644 --- a/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp +++ b/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp @@ -541,7 +541,7 @@ static void _toColor(const char* str, uint8_t* r, uint8_t* g, uint8_t* b, char** } } } - } else if (len >= 3 && !strncmp(str, "url", 3)) { + } else if (ref && len >= 3 && !strncmp(str, "url", 3)) { *ref = _idFromUrl((const char*)(str + 3)); } else { //Handle named color @@ -789,7 +789,7 @@ static bool _attrParseSvgNode(void* data, const char* key, const char* value) return simpleXmlParseW3CAttribute(value, _parseStyleAttr, loader); } #ifdef THORVG_LOG_ENABLED - else if ((!strcmp(key, "x") || !strcmp(key, "y")) && fabsf(svgUtilStrtof(value, nullptr)) > FLT_EPSILON ) { + else if ((!strcmp(key, "x") || !strcmp(key, "y")) && fabsf(svgUtilStrtof(value, nullptr)) > FLT_EPSILON) { TVGLOG("SVG", "Unsupported attributes used [Elements type: Svg][Attribute: %s][Value: %s]", key, value); } #endif @@ -1611,6 +1611,7 @@ static bool _attrParseImageNode(void* data, const char* key, const char* value) } if (!strcmp(key, "href") || !strcmp(key, "xlink:href")) { + if (image->href && value) free(image->href); image->href = _idFromHref(value); } else if (!strcmp(key, "id")) { if (node->id && value) free(node->id); @@ -1728,6 +1729,112 @@ error_grad_alloc: } +static void _styleInherit(SvgStyleProperty* child, const SvgStyleProperty* parent) +{ + if (parent == nullptr) return; + //Inherit the property of parent if not present in child. + if (!child->curColorSet) { + child->color = parent->color; + child->curColorSet = parent->curColorSet; + } + //Fill + if (!((int)child->fill.flags & (int)SvgFillFlags::Paint)) { + child->fill.paint.color = parent->fill.paint.color; + child->fill.paint.none = parent->fill.paint.none; + child->fill.paint.curColor = parent->fill.paint.curColor; + if (parent->fill.paint.url) child->fill.paint.url = _copyId(parent->fill.paint.url); + } + if (!((int)child->fill.flags & (int)SvgFillFlags::Opacity)) { + child->fill.opacity = parent->fill.opacity; + } + if (!((int)child->fill.flags & (int)SvgFillFlags::FillRule)) { + child->fill.fillRule = parent->fill.fillRule; + } + //Stroke + if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Paint)) { + child->stroke.paint.color = parent->stroke.paint.color; + child->stroke.paint.none = parent->stroke.paint.none; + child->stroke.paint.curColor = parent->stroke.paint.curColor; + child->stroke.paint.url = parent->stroke.paint.url ? _copyId(parent->stroke.paint.url) : nullptr; + } + if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Opacity)) { + child->stroke.opacity = parent->stroke.opacity; + } + if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Width)) { + child->stroke.width = parent->stroke.width; + } + if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Dash)) { + if (parent->stroke.dash.array.count > 0) { + child->stroke.dash.array.clear(); + child->stroke.dash.array.reserve(parent->stroke.dash.array.count); + for (uint32_t i = 0; i < parent->stroke.dash.array.count; ++i) { + child->stroke.dash.array.push(parent->stroke.dash.array.data[i]); + } + } + } + if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Cap)) { + child->stroke.cap = parent->stroke.cap; + } + if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Join)) { + child->stroke.join = parent->stroke.join; + } +} + + +static void _styleCopy(SvgStyleProperty* to, const SvgStyleProperty* from) +{ + if (from == nullptr) return; + //Copy the properties of 'from' only if they were explicitly set (not the default ones). + if (from->curColorSet) { + to->color = from->color; + to->curColorSet = true; + } + //Fill + to->fill.flags = (SvgFillFlags)((int)to->fill.flags | (int)from->fill.flags); + if (((int)from->fill.flags & (int)SvgFillFlags::Paint)) { + to->fill.paint.color = from->fill.paint.color; + to->fill.paint.none = from->fill.paint.none; + to->fill.paint.curColor = from->fill.paint.curColor; + if (from->fill.paint.url) to->fill.paint.url = _copyId(from->fill.paint.url); + } + if (((int)from->fill.flags & (int)SvgFillFlags::Opacity)) { + to->fill.opacity = from->fill.opacity; + } + if (((int)from->fill.flags & (int)SvgFillFlags::FillRule)) { + to->fill.fillRule = from->fill.fillRule; + } + //Stroke + to->stroke.flags = (SvgStrokeFlags)((int)to->stroke.flags | (int)from->stroke.flags); + if (((int)from->stroke.flags & (int)SvgStrokeFlags::Paint)) { + to->stroke.paint.color = from->stroke.paint.color; + to->stroke.paint.none = from->stroke.paint.none; + to->stroke.paint.curColor = from->stroke.paint.curColor; + to->stroke.paint.url = from->stroke.paint.url ? _copyId(from->stroke.paint.url) : nullptr; + } + if (((int)from->stroke.flags & (int)SvgStrokeFlags::Opacity)) { + to->stroke.opacity = from->stroke.opacity; + } + if (((int)from->stroke.flags & (int)SvgStrokeFlags::Width)) { + to->stroke.width = from->stroke.width; + } + if (((int)from->stroke.flags & (int)SvgStrokeFlags::Dash)) { + if (from->stroke.dash.array.count > 0) { + to->stroke.dash.array.clear(); + to->stroke.dash.array.reserve(from->stroke.dash.array.count); + for (uint32_t i = 0; i < from->stroke.dash.array.count; ++i) { + to->stroke.dash.array.push(from->stroke.dash.array.data[i]); + } + } + } + if (((int)from->stroke.flags & (int)SvgStrokeFlags::Cap)) { + to->stroke.cap = from->stroke.cap; + } + if (((int)from->stroke.flags & (int)SvgStrokeFlags::Join)) { + to->stroke.join = from->stroke.join; + } +} + + static void _copyAttr(SvgNode* to, const SvgNode* from) { //Copy matrix attribute @@ -1736,7 +1843,8 @@ static void _copyAttr(SvgNode* to, const SvgNode* from) if (to->transform) *to->transform = *from->transform; } //Copy style attribute - *to->style = *from->style; + _styleCopy(to->style, from->style); + to->style->flags = (SvgStyleFlags)((int)to->style->flags | (int)from->style->flags); if (from->style->fill.paint.url) to->style->fill.paint.url = strdup(from->style->fill.paint.url); if (from->style->stroke.paint.url) to->style->stroke.paint.url = strdup(from->style->stroke.paint.url); if (from->style->clipPath.url) to->style->clipPath.url = strdup(from->style->clipPath.url); @@ -1780,15 +1888,17 @@ static void _copyAttr(SvgNode* to, const SvgNode* from) break; } case SvgNodeType::Polygon: { - to->node.polygon.pointsCount = from->node.polygon.pointsCount; - to->node.polygon.points = (float*)malloc(to->node.polygon.pointsCount * sizeof(float)); - memcpy(to->node.polygon.points, from->node.polygon.points, to->node.polygon.pointsCount * sizeof(float)); + if ((to->node.polygon.pointsCount = from->node.polygon.pointsCount)) { + to->node.polygon.points = (float*)malloc(to->node.polygon.pointsCount * sizeof(float)); + memcpy(to->node.polygon.points, from->node.polygon.points, to->node.polygon.pointsCount * sizeof(float)); + } break; } case SvgNodeType::Polyline: { - to->node.polyline.pointsCount = from->node.polyline.pointsCount; - to->node.polyline.points = (float*)malloc(to->node.polyline.pointsCount * sizeof(float)); - memcpy(to->node.polyline.points, from->node.polyline.points, to->node.polyline.pointsCount * sizeof(float)); + if ((to->node.polyline.pointsCount = from->node.polyline.pointsCount)) { + to->node.polyline.points = (float*)malloc(to->node.polyline.pointsCount * sizeof(float)); + memcpy(to->node.polyline.points, from->node.polyline.points, to->node.polyline.pointsCount * sizeof(float)); + } break; } case SvgNodeType::Image: { @@ -1806,35 +1916,45 @@ static void _copyAttr(SvgNode* to, const SvgNode* from) } -static void _cloneNode(SvgNode* from, SvgNode* parent) +static void _cloneNode(SvgNode* from, SvgNode* parent, int depth) { + /* Exception handling: Prevent invalid SVG data input. + The size is the arbitrary value, we need an experimental size. */ + if (depth == 8192) { + TVGERR("SVG", "Infinite recursive call - stopped after %d calls! Svg file may be incorrectly formatted.", depth); + return; + } + SvgNode* newNode; - if (!from || !parent) return; + if (!from || !parent || from == parent) return; newNode = _createNode(parent, from->type); - if (!newNode) return; + _styleInherit(newNode->style, parent->style); _copyAttr(newNode, from); auto child = from->child.data; for (uint32_t i = 0; i < from->child.count; ++i, ++child) { - _cloneNode(*child, newNode); + _cloneNode(*child, newNode, depth + 1); } } -static void _postponeCloneNode(SvgLoaderData* loader, SvgNode *node, char* id) { +static void _postponeCloneNode(SvgLoaderData* loader, SvgNode *node, char* id) +{ loader->cloneNodes.push({node, id}); } -static void _clonePostponedNodes(Array<SvgNodeIdPair>* cloneNodes) { +static void _clonePostponedNodes(Array<SvgNodeIdPair>* cloneNodes, SvgNode* doc) +{ for (uint32_t i = 0; i < cloneNodes->count; ++i) { auto nodeIdPair = cloneNodes->data[i]; auto defs = _getDefsNode(nodeIdPair.node); auto nodeFrom = _findChildById(defs, nodeIdPair.id); - _cloneNode(nodeFrom, nodeIdPair.node); + if (!nodeFrom) nodeFrom = _findChildById(doc, nodeIdPair.id); + _cloneNode(nodeFrom, nodeIdPair.node, 0); free(nodeIdPair.id); } } @@ -1875,7 +1995,7 @@ static bool _attrParseUseNode(void* data, const char* key, const char* value) defs = _getDefsNode(node); nodeFrom = _findChildById(defs, id); if (nodeFrom) { - _cloneNode(nodeFrom, node); + _cloneNode(nodeFrom, node, 0); free(id); } else { //some svg export software include <defs> element at the end of the file @@ -1883,10 +2003,6 @@ static bool _attrParseUseNode(void* data, const char* key, const char* value) //after the whole file is parsed _postponeCloneNode(loader, node, id); } - } else if (!strcmp(key, "clip-path")) { - _handleClipPathAttr(loader, node, value); - } else if (!strcmp(key, "mask")) { - _handleMaskAttr(loader, node, value); } else { return _attrParseGNode(data, key, value); } @@ -2081,10 +2197,12 @@ static bool _attrParseRadialGradientNode(void* data, const char* key, const char } if (!strcmp(key, "id")) { + if (grad->id && value) free(grad->id); grad->id = _copyId(value); } else if (!strcmp(key, "spreadMethod")) { grad->spread = _parseSpreadValue(value); } else if (!strcmp(key, "href") || !strcmp(key, "xlink:href")) { + if (grad->ref && value) free(grad->ref); grad->ref = _idFromHref(value); } else if (!strcmp(key, "gradientUnits") && !strcmp(value, "userSpaceOnUse")) { grad->userSpace = true; @@ -2269,10 +2387,12 @@ static bool _attrParseLinearGradientNode(void* data, const char* key, const char } if (!strcmp(key, "id")) { + if (grad->id && value) free(grad->id); grad->id = _copyId(value); } else if (!strcmp(key, "spreadMethod")) { grad->spread = _parseSpreadValue(value); } else if (!strcmp(key, "href") || !strcmp(key, "xlink:href")) { + if (grad->ref && value) free(grad->ref); grad->ref = _idFromHref(value); } else if (!strcmp(key, "gradientUnits") && !strcmp(value, "userSpaceOnUse")) { grad->userSpace = true; @@ -2408,6 +2528,7 @@ static void _svgLoaderParserXmlOpen(SvgLoaderData* loader, const char* content, if ((method = _findGroupFactory(tagName))) { //Group + if (empty) return; if (!loader->doc) { if (strcmp(tagName, "svg")) return; //Not a valid svg document node = method(loader, nullptr, attrs, attrsLength); @@ -2493,59 +2614,8 @@ static bool _svgLoaderParser(void* data, SimpleXMLType type, const char* content } -static void _styleInherit(SvgStyleProperty* child, const SvgStyleProperty* parent) +static void _inefficientNodeCheck(TVG_UNUSED SvgNode* node) { - if (parent == nullptr) return; - //Inherit the property of parent if not present in child. - //Fill - if (!((int)child->fill.flags & (int)SvgFillFlags::Paint)) { - child->fill.paint.color = parent->fill.paint.color; - child->fill.paint.none = parent->fill.paint.none; - child->fill.paint.curColor = parent->fill.paint.curColor; - if (parent->fill.paint.url) child->fill.paint.url = _copyId(parent->fill.paint.url); - } else if (child->fill.paint.curColor && !child->curColorSet) { - child->color = parent->color; - } - if (!((int)child->fill.flags & (int)SvgFillFlags::Opacity)) { - child->fill.opacity = parent->fill.opacity; - } - if (!((int)child->fill.flags & (int)SvgFillFlags::FillRule)) { - child->fill.fillRule = parent->fill.fillRule; - } - //Stroke - if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Paint)) { - child->stroke.paint.color = parent->stroke.paint.color; - child->stroke.paint.none = parent->stroke.paint.none; - child->stroke.paint.curColor = parent->stroke.paint.curColor; - child->stroke.paint.url = parent->stroke.paint.url ? _copyId(parent->stroke.paint.url) : nullptr; - } else if (child->stroke.paint.curColor && !child->curColorSet) { - child->color = parent->color; - } - if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Opacity)) { - child->stroke.opacity = parent->stroke.opacity; - } - if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Width)) { - child->stroke.width = parent->stroke.width; - } - if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Dash)) { - if (parent->stroke.dash.array.count > 0) { - child->stroke.dash.array.clear(); - child->stroke.dash.array.reserve(parent->stroke.dash.array.count); - for (uint32_t i = 0; i < parent->stroke.dash.array.count; ++i) { - child->stroke.dash.array.push(parent->stroke.dash.array.data[i]); - } - } - } - if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Cap)) { - child->stroke.cap = parent->stroke.cap; - } - if (!((int)child->stroke.flags & (int)SvgStrokeFlags::Join)) { - child->stroke.join = parent->stroke.join; - } -} - - -static void _inefficientNodeCheck(TVG_UNUSED SvgNode* node){ #ifdef THORVG_LOG_ENABLED auto type = simpleXmlNodeTypeToString(node->type); @@ -2838,14 +2908,14 @@ void SvgLoader::run(unsigned tid) if (loaderData.doc) { _updateStyle(loaderData.doc, nullptr); auto defs = loaderData.doc->node.doc.defs; - if (defs) _updateGradient(loaderData.doc, &defs->node.defs.gradients); - - if (loaderData.gradients.count > 0) _updateGradient(loaderData.doc, &loaderData.gradients); _updateComposite(loaderData.doc, loaderData.doc); if (defs) _updateComposite(loaderData.doc, defs); - if (loaderData.cloneNodes.count > 0) _clonePostponedNodes(&loaderData.cloneNodes); + if (loaderData.cloneNodes.count > 0) _clonePostponedNodes(&loaderData.cloneNodes, loaderData.doc); + + if (loaderData.gradients.count > 0) _updateGradient(loaderData.doc, &loaderData.gradients); + if (defs) _updateGradient(loaderData.doc, &defs->node.defs.gradients); } root = svgSceneBuild(loaderData.doc, vx, vy, vw, vh, w, h, preserveAspect, svgPath); } diff --git a/thirdparty/thorvg/src/loaders/svg/tvgXmlParser.cpp b/thirdparty/thorvg/src/loaders/svg/tvgXmlParser.cpp index 1571aa4e25..ee199da231 100644 --- a/thirdparty/thorvg/src/loaders/svg/tvgXmlParser.cpp +++ b/thirdparty/thorvg/src/loaders/svg/tvgXmlParser.cpp @@ -220,15 +220,15 @@ static SimpleXMLType _getXMLType(const char* itr, const char* itrEnd, size_t &to if ((itr + sizeof("<!DOCTYPE>") - 1 < itrEnd) && (!memcmp(itr + 2, "DOCTYPE", sizeof("DOCTYPE") - 1)) && ((itr[2 + sizeof("DOCTYPE") - 1] == '>') || (isspace((unsigned char)itr[2 + sizeof("DOCTYPE") - 1])))) { toff = sizeof("!DOCTYPE") - 1; return SimpleXMLType::Doctype; - } else if (itr + sizeof("<!>") - 1 < itrEnd) { - toff = sizeof("!") - 1; - return SimpleXMLType::DoctypeChild; } else if ((itr + sizeof("<![CDATA[]]>") - 1 < itrEnd) && (!memcmp(itr + 2, "[CDATA[", sizeof("[CDATA[") - 1))) { toff = sizeof("![CDATA[") - 1; return SimpleXMLType::CData; } else if ((itr + sizeof("<!---->") - 1 < itrEnd) && (!memcmp(itr + 2, "--", sizeof("--") - 1))) { toff = sizeof("!--") - 1; return SimpleXMLType::Comment; + } else if (itr + sizeof("<!>") - 1 < itrEnd) { + toff = sizeof("!") - 1; + return SimpleXMLType::DoctypeChild; } return SimpleXMLType::Open; } diff --git a/thirdparty/thorvg/update-thorvg.sh b/thirdparty/thorvg/update-thorvg.sh index c200131eba..ce3d5eed1c 100755 --- a/thirdparty/thorvg/update-thorvg.sh +++ b/thirdparty/thorvg/update-thorvg.sh @@ -1,4 +1,4 @@ -VERSION=0.7.0 +VERSION=0.7.1 rm -rf AUTHORS inc LICENSE src *.zip curl -L -O https://github.com/Samsung/thorvg/archive/refs/tags/v$VERSION.zip bsdtar --strip-components=1 -xvf *.zip |