diff options
Diffstat (limited to 'thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h')
-rw-r--r-- | thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h | 739 |
1 files changed, 739 insertions, 0 deletions
diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h new file mode 100644 index 0000000000..ffa9b6fcae --- /dev/null +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h @@ -0,0 +1,739 @@ +// basisu_transcoder_internal.h - Universal texture format transcoder library. +// Copyright (C) 2019 Binomial LLC. All Rights Reserved. +// +// Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#ifdef _MSC_VER +#pragma warning (disable: 4127) // conditional expression is constant +#endif + +#define BASISD_LIB_VERSION 107 +#define BASISD_VERSION_STRING "01.11" + +#ifdef _DEBUG +#define BASISD_BUILD_DEBUG +#else +#define BASISD_BUILD_RELEASE +#endif + +#include "basisu.h" + +#define BASISD_znew (z = 36969 * (z & 65535) + (z >> 16)) + +namespace basisu +{ + extern bool g_debug_printf; +} + +namespace basist +{ + // Low-level formats directly supported by the transcoder (other supported texture formats are combinations of these low-level block formats). + // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices. + enum block_format + { + cETC1, // ETC1S RGB + cBC1, // DXT1 RGB + cBC4, // DXT5A (alpha block only) + cPVRTC1_4_RGB, // opaque-only PVRTC1 4bpp + cPVRTC1_4_RGBA, // PVRTC1 4bpp RGBA + cBC7_M6_OPAQUE_ONLY, // RGB BC7 mode 6 + cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) + cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) + cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) + cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC + // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. + cATC_RGB, + cATC_RGBA_INTERPOLATED_ALPHA, + + cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) + + cRGB32, // Writes RGB components to 32bpp output pixels + cRGBA32, // Writes RGB255 components to 32bpp output pixels + cA32, // Writes alpha component to 32bpp output pixels + + cRGB565, + cBGR565, + + cRGBA4444_COLOR, + cRGBA4444_ALPHA, + cRGBA4444_COLOR_OPAQUE, + + cTotalBlockFormats + }; + + const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31; + const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21; + const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9; + const int COLOR5_PAL_MIN_DELTA_B_RUNLEN = 3, COLOR5_PAL_DELTA_5_RUNLEN_VLC_BITS = 3; + + const uint32_t ENDPOINT_PRED_TOTAL_SYMBOLS = (4 * 4 * 4 * 4) + 1; + const uint32_t ENDPOINT_PRED_REPEAT_LAST_SYMBOL = ENDPOINT_PRED_TOTAL_SYMBOLS - 1; + const uint32_t ENDPOINT_PRED_MIN_REPEAT_COUNT = 3; + const uint32_t ENDPOINT_PRED_COUNT_VLC_BITS = 4; + + const uint32_t NUM_ENDPOINT_PREDS = 3;// BASISU_ARRAY_SIZE(g_endpoint_preds); + const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1; + const uint32_t NO_ENDPOINT_PRED_INDEX = 3;//NUM_ENDPOINT_PREDS; + const uint32_t MAX_SELECTOR_HISTORY_BUF_SIZE = 64; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; + const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); + + uint16_t crc16(const void *r, size_t size, uint16_t crc); + + class huffman_decoding_table + { + friend class bitwise_decoder; + + public: + huffman_decoding_table() + { + } + + void clear() + { + basisu::clear_vector(m_code_sizes); + basisu::clear_vector(m_lookup); + basisu::clear_vector(m_tree); + } + + bool init(uint32_t total_syms, const uint8_t *pCode_sizes) + { + if (!total_syms) + { + clear(); + return true; + } + + m_code_sizes.resize(total_syms); + memcpy(&m_code_sizes[0], pCode_sizes, total_syms); + + m_lookup.resize(0); + m_lookup.resize(basisu::cHuffmanFastLookupSize); + + m_tree.resize(0); + m_tree.resize(total_syms * 2); + + uint32_t syms_using_codesize[basisu::cHuffmanMaxSupportedInternalCodeSize + 1]; + basisu::clear_obj(syms_using_codesize); + for (uint32_t i = 0; i < total_syms; i++) + { + if (pCode_sizes[i] > basisu::cHuffmanMaxSupportedInternalCodeSize) + return false; + syms_using_codesize[pCode_sizes[i]]++; + } + + uint32_t next_code[basisu::cHuffmanMaxSupportedInternalCodeSize + 1]; + next_code[0] = next_code[1] = 0; + + uint32_t used_syms = 0, total = 0; + for (uint32_t i = 1; i < basisu::cHuffmanMaxSupportedInternalCodeSize; i++) + { + used_syms += syms_using_codesize[i]; + next_code[i + 1] = (total = ((total + syms_using_codesize[i]) << 1)); + } + + if (((1U << basisu::cHuffmanMaxSupportedInternalCodeSize) != total) && (used_syms > 1U)) + return false; + + for (int tree_next = -1, sym_index = 0; sym_index < (int)total_syms; ++sym_index) + { + uint32_t rev_code = 0, l, cur_code, code_size = pCode_sizes[sym_index]; + if (!code_size) + continue; + + cur_code = next_code[code_size]++; + + for (l = code_size; l > 0; l--, cur_code >>= 1) + rev_code = (rev_code << 1) | (cur_code & 1); + + if (code_size <= basisu::cHuffmanFastLookupBits) + { + uint32_t k = (code_size << 16) | sym_index; + while (rev_code < basisu::cHuffmanFastLookupSize) + { + if (m_lookup[rev_code] != 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + m_lookup[rev_code] = k; + rev_code += (1 << code_size); + } + continue; + } + + int tree_cur; + if (0 == (tree_cur = m_lookup[rev_code & (basisu::cHuffmanFastLookupSize - 1)])) + { + const uint32_t idx = rev_code & (basisu::cHuffmanFastLookupSize - 1); + if (m_lookup[idx] != 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + m_lookup[idx] = tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + + if (tree_cur >= 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + rev_code >>= (basisu::cHuffmanFastLookupBits - 1); + + for (int j = code_size; j > (basisu::cHuffmanFastLookupBits + 1); j--) + { + tree_cur -= ((rev_code >>= 1) & 1); + + const int idx = -tree_cur - 1; + if (idx < 0) + return false; + else if (idx >= (int)m_tree.size()) + m_tree.resize(idx + 1); + + if (!m_tree[idx]) + { + m_tree[idx] = (int16_t)tree_next; + tree_cur = tree_next; + tree_next -= 2; + } + else + { + tree_cur = m_tree[idx]; + if (tree_cur >= 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + } + } + + tree_cur -= ((rev_code >>= 1) & 1); + + const int idx = -tree_cur - 1; + if (idx < 0) + return false; + else if (idx >= (int)m_tree.size()) + m_tree.resize(idx + 1); + + if (m_tree[idx] != 0) + { + // Supplied codesizes can't create a valid prefix code. + return false; + } + + m_tree[idx] = (int16_t)sym_index; + } + + return true; + } + + const basisu::uint8_vec &get_code_sizes() const { return m_code_sizes; } + + bool is_valid() const { return m_code_sizes.size() > 0; } + + private: + basisu::uint8_vec m_code_sizes; + basisu::int_vec m_lookup; + basisu::int16_vec m_tree; + }; + + class bitwise_decoder + { + public: + bitwise_decoder() : + m_buf_size(0), + m_pBuf(nullptr), + m_pBuf_start(nullptr), + m_pBuf_end(nullptr), + m_bit_buf(0), + m_bit_buf_size(0) + { + } + + void clear() + { + m_buf_size = 0; + m_pBuf = nullptr; + m_pBuf_start = nullptr; + m_pBuf_end = nullptr; + m_bit_buf = 0; + m_bit_buf_size = 0; + } + + bool init(const uint8_t *pBuf, uint32_t buf_size) + { + if ((!pBuf) && (buf_size)) + return false; + + m_buf_size = buf_size; + m_pBuf = pBuf; + m_pBuf_start = pBuf; + m_pBuf_end = pBuf + buf_size; + m_bit_buf = 0; + m_bit_buf_size = 0; + return true; + } + + void stop() + { + } + + inline uint32_t peek_bits(uint32_t num_bits) + { + if (!num_bits) + return 0; + + assert(num_bits <= 25); + + while (m_bit_buf_size < num_bits) + { + uint32_t c = 0; + if (m_pBuf < m_pBuf_end) + c = *m_pBuf++; + + m_bit_buf |= (c << m_bit_buf_size); + m_bit_buf_size += 8; + assert(m_bit_buf_size <= 32); + } + + return m_bit_buf & ((1 << num_bits) - 1); + } + + void remove_bits(uint32_t num_bits) + { + assert(m_bit_buf_size >= num_bits); + + m_bit_buf >>= num_bits; + m_bit_buf_size -= num_bits; + } + + uint32_t get_bits(uint32_t num_bits) + { + if (num_bits > 25) + { + assert(num_bits <= 32); + + const uint32_t bits0 = peek_bits(25); + m_bit_buf >>= 25; + m_bit_buf_size -= 25; + num_bits -= 25; + + const uint32_t bits = peek_bits(num_bits); + m_bit_buf >>= num_bits; + m_bit_buf_size -= num_bits; + + return bits0 | (bits << 25); + } + + const uint32_t bits = peek_bits(num_bits); + + m_bit_buf >>= num_bits; + m_bit_buf_size -= num_bits; + + return bits; + } + + uint32_t decode_truncated_binary(uint32_t n) + { + assert(n >= 2); + + const uint32_t k = basisu::floor_log2i(n); + const uint32_t u = (1 << (k + 1)) - n; + + uint32_t result = get_bits(k); + + if (result >= u) + result = ((result << 1) | get_bits(1)) - u; + + return result; + } + + uint32_t decode_rice(uint32_t m) + { + assert(m); + + uint32_t q = 0; + for (;;) + { + uint32_t k = peek_bits(16); + + uint32_t l = 0; + while (k & 1) + { + l++; + k >>= 1; + } + + q += l; + + remove_bits(l); + + if (l < 16) + break; + } + + return (q << m) + (get_bits(m + 1) >> 1); + } + + inline uint32_t decode_vlc(uint32_t chunk_bits) + { + assert(chunk_bits); + + const uint32_t chunk_size = 1 << chunk_bits; + const uint32_t chunk_mask = chunk_size - 1; + + uint32_t v = 0; + uint32_t ofs = 0; + + for ( ; ; ) + { + uint32_t s = get_bits(chunk_bits + 1); + v |= ((s & chunk_mask) << ofs); + ofs += chunk_bits; + + if ((s & chunk_size) == 0) + break; + + if (ofs >= 32) + { + assert(0); + break; + } + } + + return v; + } + + inline uint32_t decode_huffman(const huffman_decoding_table &ct) + { + assert(ct.m_code_sizes.size()); + + while (m_bit_buf_size < 16) + { + uint32_t c = 0; + if (m_pBuf < m_pBuf_end) + c = *m_pBuf++; + + m_bit_buf |= (c << m_bit_buf_size); + m_bit_buf_size += 8; + assert(m_bit_buf_size <= 32); + } + + int code_len; + + int sym; + if ((sym = ct.m_lookup[m_bit_buf & (basisu::cHuffmanFastLookupSize - 1)]) >= 0) + { + code_len = sym >> 16; + sym &= 0xFFFF; + } + else + { + code_len = basisu::cHuffmanFastLookupBits; + do + { + sym = ct.m_tree[~sym + ((m_bit_buf >> code_len++) & 1)]; // ~sym = -sym - 1 + } while (sym < 0); + } + + m_bit_buf >>= code_len; + m_bit_buf_size -= code_len; + + return sym; + } + + bool read_huffman_table(huffman_decoding_table &ct) + { + ct.clear(); + + const uint32_t total_used_syms = get_bits(basisu::cHuffmanMaxSymsLog2); + + if (!total_used_syms) + return true; + if (total_used_syms > basisu::cHuffmanMaxSyms) + return false; + + uint8_t code_length_code_sizes[basisu::cHuffmanTotalCodelengthCodes]; + basisu::clear_obj(code_length_code_sizes); + + const uint32_t num_codelength_codes = get_bits(5); + if ((num_codelength_codes < 1) || (num_codelength_codes > basisu::cHuffmanTotalCodelengthCodes)) + return false; + + for (uint32_t i = 0; i < num_codelength_codes; i++) + code_length_code_sizes[basisu::g_huffman_sorted_codelength_codes[i]] = static_cast<uint8_t>(get_bits(3)); + + huffman_decoding_table code_length_table; + if (!code_length_table.init(basisu::cHuffmanTotalCodelengthCodes, code_length_code_sizes)) + return false; + + if (!code_length_table.is_valid()) + return false; + + basisu::uint8_vec code_sizes(total_used_syms); + + uint32_t cur = 0; + while (cur < total_used_syms) + { + int c = decode_huffman(code_length_table); + + if (c <= 16) + code_sizes[cur++] = static_cast<uint8_t>(c); + else if (c == basisu::cHuffmanSmallZeroRunCode) + cur += get_bits(basisu::cHuffmanSmallZeroRunExtraBits) + basisu::cHuffmanSmallZeroRunSizeMin; + else if (c == basisu::cHuffmanBigZeroRunCode) + cur += get_bits(basisu::cHuffmanBigZeroRunExtraBits) + basisu::cHuffmanBigZeroRunSizeMin; + else + { + if (!cur) + return false; + + uint32_t l; + if (c == basisu::cHuffmanSmallRepeatCode) + l = get_bits(basisu::cHuffmanSmallRepeatExtraBits) + basisu::cHuffmanSmallRepeatSizeMin; + else + l = get_bits(basisu::cHuffmanBigRepeatExtraBits) + basisu::cHuffmanBigRepeatSizeMin; + + const uint8_t prev = code_sizes[cur - 1]; + if (prev == 0) + return false; + do + { + if (cur >= total_used_syms) + return false; + code_sizes[cur++] = prev; + } while (--l > 0); + } + } + + if (cur != total_used_syms) + return false; + + return ct.init(total_used_syms, &code_sizes[0]); + } + + private: + uint32_t m_buf_size; + const uint8_t *m_pBuf; + const uint8_t *m_pBuf_start; + const uint8_t *m_pBuf_end; + + uint32_t m_bit_buf; + uint32_t m_bit_buf_size; + }; + + inline uint32_t basisd_rand(uint32_t seed) + { + if (!seed) + seed++; + uint32_t z = seed; + BASISD_znew; + return z; + } + + // Returns random number in [0,limit). Max limit is 0xFFFF. + inline uint32_t basisd_urand(uint32_t& seed, uint32_t limit) + { + seed = basisd_rand(seed); + return (((seed ^ (seed >> 16)) & 0xFFFF) * limit) >> 16; + } + + class approx_move_to_front + { + public: + approx_move_to_front(uint32_t n) + { + init(n); + } + + void init(uint32_t n) + { + m_values.resize(n); + m_rover = n / 2; + } + + const basisu::int_vec& get_values() const { return m_values; } + basisu::int_vec& get_values() { return m_values; } + + uint32_t size() const { return (uint32_t)m_values.size(); } + + const int& operator[] (uint32_t index) const { return m_values[index]; } + int operator[] (uint32_t index) { return m_values[index]; } + + void add(int new_value) + { + m_values[m_rover++] = new_value; + if (m_rover == m_values.size()) + m_rover = (uint32_t)m_values.size() / 2; + } + + void use(uint32_t index) + { + if (index) + { + //std::swap(m_values[index / 2], m_values[index]); + int x = m_values[index / 2]; + int y = m_values[index]; + m_values[index / 2] = y; + m_values[index] = x; + } + } + + // returns -1 if not found + int find(int value) const + { + for (uint32_t i = 0; i < m_values.size(); i++) + if (m_values[i] == value) + return i; + return -1; + } + + void reset() + { + const uint32_t n = (uint32_t)m_values.size(); + + m_values.clear(); + + init(n); + } + + private: + basisu::int_vec m_values; + uint32_t m_rover; + }; + + struct decoder_etc_block; + + struct color32 + { + union + { + struct + { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + + uint8_t c[4]; + + uint32_t m; + }; + + color32() { } + + color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + + void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { c[0] = static_cast<uint8_t>(vr); c[1] = static_cast<uint8_t>(vg); c[2] = static_cast<uint8_t>(vb); c[3] = static_cast<uint8_t>(va); } + + uint8_t operator[] (uint32_t idx) const { assert(idx < 4); return c[idx]; } + uint8_t &operator[] (uint32_t idx) { assert(idx < 4); return c[idx]; } + + bool operator== (const color32&rhs) const { return m == rhs.m; } + }; + + struct endpoint + { + color32 m_color5; + uint8_t m_inten5; + }; + + struct selector + { + // Plain selectors (2-bits per value) + uint8_t m_selectors[4]; + + // ETC1 selectors + uint8_t m_bytes[4]; + + uint8_t m_lo_selector, m_hi_selector; + uint8_t m_num_unique_selectors; + + void init_flags() + { + uint32_t hist[4] = { 0, 0, 0, 0 }; + for (uint32_t y = 0; y < 4; y++) + { + for (uint32_t x = 0; x < 4; x++) + { + uint32_t s = get_selector(x, y); + hist[s]++; + } + } + + m_lo_selector = 3; + m_hi_selector = 0; + m_num_unique_selectors = 0; + + for (uint32_t i = 0; i < 4; i++) + { + if (hist[i]) + { + m_num_unique_selectors++; + if (i < m_lo_selector) m_lo_selector = static_cast<uint8_t>(i); + if (i > m_hi_selector) m_hi_selector = static_cast<uint8_t>(i); + } + } + } + + // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. + inline uint32_t get_selector(uint32_t x, uint32_t y) const + { + assert((x < 4) && (y < 4)); + return (m_selectors[y] >> (x * 2)) & 3; + } + + void set_selector(uint32_t x, uint32_t y, uint32_t val) + { + static const uint8_t s_selector_index_to_etc1[4] = { 3, 2, 0, 1 }; + + assert((x | y | val) < 4); + + m_selectors[y] &= ~(3 << (x * 2)); + m_selectors[y] |= (val << (x * 2)); + + const uint32_t etc1_bit_index = x * 4 + y; + + uint8_t *p = &m_bytes[3 - (etc1_bit_index >> 3)]; + + const uint32_t byte_bit_ofs = etc1_bit_index & 7; + const uint32_t mask = 1 << byte_bit_ofs; + + const uint32_t etc1_val = s_selector_index_to_etc1[val]; + + const uint32_t lsb = etc1_val & 1; + const uint32_t msb = etc1_val >> 1; + + p[0] &= ~mask; + p[0] |= (lsb << byte_bit_ofs); + + p[-2] &= ~mask; + p[-2] |= (msb << byte_bit_ofs); + } + }; + +} // namespace basist + + + |