diff options
Diffstat (limited to 'thirdparty')
37 files changed, 867 insertions, 4626 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md index cacc0275dd..33ce2423d9 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -89,17 +89,19 @@ will limit its functionality to IPv4 only. ## etcpak - Upstream: https://github.com/wolfpld/etcpak -- Version: git (403d38b3f1cb347c196d845d0a05e44a00d17169, 2021) +- Version: git (f27daea656ff77671580f838a889e33049430ebd, 2021) - License: BSD-3-Clause -Important: Some Godot-made changes, see `patches` folders. - Files extracted from upstream source: -- All `.cpp` and `.hpp` files in the root folder except `Application.cpp`. -- `lz4` folder. +- Only the files relevant for compression (i.e. `Process*.cpp` and their deps): + ``` + Dither.{cpp,hpp} ForceInline.hpp Math.hpp ProcessCommon.hpp ProcessRGB.{cpp,hpp} + ProcessDxtc.{cpp,hpp} Tables.{cpp,hpp} Vector.hpp + ``` - `AUTHORS.txt` and `LICENSE.txt` + ## fonts - `NotoSans*.ttf`, `NotoNaskhArabicUI_Regular.ttf`: @@ -342,7 +344,7 @@ File extracted from upstream release tarball: ## meshoptimizer - Upstream: https://github.com/zeux/meshoptimizer -- Version: git (e3f53f66e7a35b9b8764bee478589d79e34fa698, 2021) +- Version: 0.16 (95893c0566646434dd675b708d293fcb2d526d08, 2021) - License: MIT Files extracted from upstream repository: diff --git a/thirdparty/etcpak/Bitmap.cpp b/thirdparty/etcpak/Bitmap.cpp deleted file mode 100644 index ef318318ac..0000000000 --- a/thirdparty/etcpak/Bitmap.cpp +++ /dev/null @@ -1,216 +0,0 @@ -#include <ctype.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> - -#include <png.h> -#include "lz4/lz4.h" - -#include "Bitmap.hpp" -#include "Debug.hpp" - -Bitmap::Bitmap( const char* fn, unsigned int lines, bool bgr ) - : m_block( nullptr ) - , m_lines( lines ) - , m_alpha( true ) - , m_sema( 0 ) -{ - FILE* f = fopen( fn, "rb" ); - assert( f ); - - char buf[4]; - fread( buf, 1, 4, f ); - if( memcmp( buf, "raw4", 4 ) == 0 ) - { - uint8_t a; - fread( &a, 1, 1, f ); - m_alpha = a == 1; - uint32_t d; - fread( &d, 1, 4, f ); - m_size.x = d; - fread( &d, 1, 4, f ); - m_size.y = d; - DBGPRINT( "Raw bitmap " << fn << " " << m_size.x << "x" << m_size.y ); - - assert( m_size.x % 4 == 0 ); - assert( m_size.y % 4 == 0 ); - - int32_t csize; - fread( &csize, 1, 4, f ); - char* cbuf = new char[csize]; - fread( cbuf, 1, csize, f ); - fclose( f ); - - m_block = m_data = new uint32_t[m_size.x*m_size.y]; - m_linesLeft = m_size.y / 4; - - LZ4_decompress_fast( cbuf, (char*)m_data, m_size.x*m_size.y*4 ); - delete[] cbuf; - - for( int i=0; i<m_size.y/4; i++ ) - { - m_sema.unlock(); - } - } - else - { - fseek( f, 0, SEEK_SET ); - - unsigned int sig_read = 0; - int bit_depth, color_type, interlace_type; - - png_structp png_ptr = png_create_read_struct( PNG_LIBPNG_VER_STRING, NULL, NULL, NULL ); - png_infop info_ptr = png_create_info_struct( png_ptr ); - setjmp( png_jmpbuf( png_ptr ) ); - - png_init_io( png_ptr, f ); - png_set_sig_bytes( png_ptr, sig_read ); - - png_uint_32 w, h; - - png_read_info( png_ptr, info_ptr ); - png_get_IHDR( png_ptr, info_ptr, &w, &h, &bit_depth, &color_type, &interlace_type, NULL, NULL ); - - m_size = v2i( w, h ); - - png_set_strip_16( png_ptr ); - if( color_type == PNG_COLOR_TYPE_PALETTE ) - { - png_set_palette_to_rgb( png_ptr ); - } - else if( color_type == PNG_COLOR_TYPE_GRAY && bit_depth < 8 ) - { - png_set_expand_gray_1_2_4_to_8( png_ptr ); - } - if( png_get_valid( png_ptr, info_ptr, PNG_INFO_tRNS ) ) - { - png_set_tRNS_to_alpha( png_ptr ); - } - if( color_type == PNG_COLOR_TYPE_GRAY_ALPHA ) - { - png_set_gray_to_rgb(png_ptr); - } - if( bgr ) - { - png_set_bgr(png_ptr); - } - - switch( color_type ) - { - case PNG_COLOR_TYPE_PALETTE: - if( !png_get_valid( png_ptr, info_ptr, PNG_INFO_tRNS ) ) - { - png_set_filler( png_ptr, 0xff, PNG_FILLER_AFTER ); - m_alpha = false; - } - break; - case PNG_COLOR_TYPE_GRAY_ALPHA: - png_set_gray_to_rgb( png_ptr ); - break; - case PNG_COLOR_TYPE_RGB: - png_set_filler( png_ptr, 0xff, PNG_FILLER_AFTER ); - m_alpha = false; - break; - default: - break; - } - - DBGPRINT( "Bitmap " << fn << " " << w << "x" << h ); - - assert( w % 4 == 0 ); - assert( h % 4 == 0 ); - - m_block = m_data = new uint32_t[w*h]; - m_linesLeft = h / 4; - - m_load = std::async( std::launch::async, [this, f, png_ptr, info_ptr]() mutable - { - auto ptr = m_data; - unsigned int lines = 0; - for( int i=0; i<m_size.y / 4; i++ ) - { - for( int j=0; j<4; j++ ) - { - png_read_rows( png_ptr, (png_bytepp)&ptr, NULL, 1 ); - ptr += m_size.x; - } - lines++; - if( lines >= m_lines ) - { - lines = 0; - m_sema.unlock(); - } - } - - if( lines != 0 ) - { - m_sema.unlock(); - } - - png_read_end( png_ptr, info_ptr ); - png_destroy_read_struct( &png_ptr, &info_ptr, NULL ); - fclose( f ); - } ); - } -} - -Bitmap::Bitmap( const v2i& size ) - : m_data( new uint32_t[size.x*size.y] ) - , m_block( nullptr ) - , m_lines( 1 ) - , m_linesLeft( size.y / 4 ) - , m_size( size ) - , m_sema( 0 ) -{ -} - -Bitmap::Bitmap( const Bitmap& src, unsigned int lines ) - : m_lines( lines ) - , m_alpha( src.Alpha() ) - , m_sema( 0 ) -{ -} - -Bitmap::~Bitmap() -{ - delete[] m_data; -} - -void Bitmap::Write( const char* fn ) -{ - FILE* f = fopen( fn, "wb" ); - assert( f ); - - png_structp png_ptr = png_create_write_struct( PNG_LIBPNG_VER_STRING, NULL, NULL, NULL ); - png_infop info_ptr = png_create_info_struct( png_ptr ); - setjmp( png_jmpbuf( png_ptr ) ); - png_init_io( png_ptr, f ); - - png_set_IHDR( png_ptr, info_ptr, m_size.x, m_size.y, 8, PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE ); - - png_write_info( png_ptr, info_ptr ); - - uint32_t* ptr = m_data; - for( int i=0; i<m_size.y; i++ ) - { - png_write_rows( png_ptr, (png_bytepp)(&ptr), 1 ); - ptr += m_size.x; - } - - png_write_end( png_ptr, info_ptr ); - png_destroy_write_struct( &png_ptr, &info_ptr ); - - fclose( f ); -} - -const uint32_t* Bitmap::NextBlock( unsigned int& lines, bool& done ) -{ - std::lock_guard<std::mutex> lock( m_lock ); - lines = std::min( m_lines, m_linesLeft ); - auto ret = m_block; - m_sema.lock(); - m_block += m_size.x * 4 * lines; - m_linesLeft -= lines; - done = m_linesLeft == 0; - return ret; -} diff --git a/thirdparty/etcpak/Bitmap.hpp b/thirdparty/etcpak/Bitmap.hpp deleted file mode 100644 index fae8c936ed..0000000000 --- a/thirdparty/etcpak/Bitmap.hpp +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef __DARKRL__BITMAP_HPP__ -#define __DARKRL__BITMAP_HPP__ - -#include <future> -#include <memory> -#include <mutex> -#include <stdint.h> - -#include "Semaphore.hpp" -#include "Vector.hpp" - -enum class Channels -{ - RGB, - Alpha -}; - -class Bitmap -{ -public: - Bitmap( const char* fn, unsigned int lines, bool bgr ); - Bitmap( const v2i& size ); - virtual ~Bitmap(); - - void Write( const char* fn ); - - uint32_t* Data() { if( m_load.valid() ) m_load.wait(); return m_data; } - const uint32_t* Data() const { if( m_load.valid() ) m_load.wait(); return m_data; } - const v2i& Size() const { return m_size; } - bool Alpha() const { return m_alpha; } - - const uint32_t* NextBlock( unsigned int& lines, bool& done ); - -protected: - Bitmap( const Bitmap& src, unsigned int lines ); - - uint32_t* m_data; - uint32_t* m_block; - unsigned int m_lines; - unsigned int m_linesLeft; - v2i m_size; - bool m_alpha; - Semaphore m_sema; - std::mutex m_lock; - std::future<void> m_load; -}; - -typedef std::shared_ptr<Bitmap> BitmapPtr; - -#endif diff --git a/thirdparty/etcpak/BitmapDownsampled.cpp b/thirdparty/etcpak/BitmapDownsampled.cpp deleted file mode 100644 index 0eb0d81185..0000000000 --- a/thirdparty/etcpak/BitmapDownsampled.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include <string.h> -#include <utility> - -#include "BitmapDownsampled.hpp" -#include "Debug.hpp" - -BitmapDownsampled::BitmapDownsampled( const Bitmap& bmp, unsigned int lines ) - : Bitmap( bmp, lines ) -{ - m_size.x = std::max( 1, bmp.Size().x / 2 ); - m_size.y = std::max( 1, bmp.Size().y / 2 ); - - int w = std::max( m_size.x, 4 ); - int h = std::max( m_size.y, 4 ); - - DBGPRINT( "Subbitmap " << m_size.x << "x" << m_size.y ); - - m_block = m_data = new uint32_t[w*h]; - - if( m_size.x < w || m_size.y < h ) - { - memset( m_data, 0, w*h*sizeof( uint32_t ) ); - m_linesLeft = h / 4; - unsigned int lines = 0; - for( int i=0; i<h/4; i++ ) - { - for( int j=0; j<4; j++ ) - { - lines++; - if( lines > m_lines ) - { - lines = 0; - m_sema.unlock(); - } - } - } - if( lines != 0 ) - { - m_sema.unlock(); - } - } - else - { - m_linesLeft = h / 4; - m_load = std::async( std::launch::async, [this, &bmp, w, h]() mutable - { - auto ptr = m_data; - auto src1 = bmp.Data(); - auto src2 = src1 + bmp.Size().x; - unsigned int lines = 0; - for( int i=0; i<h/4; i++ ) - { - for( int j=0; j<4; j++ ) - { - for( int k=0; k<m_size.x; k++ ) - { - int r = ( ( *src1 & 0x000000FF ) + ( *(src1+1) & 0x000000FF ) + ( *src2 & 0x000000FF ) + ( *(src2+1) & 0x000000FF ) ) / 4; - int g = ( ( ( *src1 & 0x0000FF00 ) + ( *(src1+1) & 0x0000FF00 ) + ( *src2 & 0x0000FF00 ) + ( *(src2+1) & 0x0000FF00 ) ) / 4 ) & 0x0000FF00; - int b = ( ( ( *src1 & 0x00FF0000 ) + ( *(src1+1) & 0x00FF0000 ) + ( *src2 & 0x00FF0000 ) + ( *(src2+1) & 0x00FF0000 ) ) / 4 ) & 0x00FF0000; - int a = ( ( ( ( ( *src1 & 0xFF000000 ) >> 8 ) + ( ( *(src1+1) & 0xFF000000 ) >> 8 ) + ( ( *src2 & 0xFF000000 ) >> 8 ) + ( ( *(src2+1) & 0xFF000000 ) >> 8 ) ) / 4 ) & 0x00FF0000 ) << 8; - *ptr++ = r | g | b | a; - src1 += 2; - src2 += 2; - } - src1 += m_size.x * 2; - src2 += m_size.x * 2; - } - lines++; - if( lines >= m_lines ) - { - lines = 0; - m_sema.unlock(); - } - } - - if( lines != 0 ) - { - m_sema.unlock(); - } - } ); - } -} - -BitmapDownsampled::~BitmapDownsampled() -{ -} diff --git a/thirdparty/etcpak/BitmapDownsampled.hpp b/thirdparty/etcpak/BitmapDownsampled.hpp deleted file mode 100644 index b7313808df..0000000000 --- a/thirdparty/etcpak/BitmapDownsampled.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __DARKRL__BITMAPDOWNSAMPLED_HPP__ -#define __DARKRL__BITMAPDOWNSAMPLED_HPP__ - -#include "Bitmap.hpp" - -class BitmapDownsampled : public Bitmap -{ -public: - BitmapDownsampled( const Bitmap& bmp, unsigned int lines ); - ~BitmapDownsampled(); -}; - -#endif diff --git a/thirdparty/etcpak/BlockData.cpp b/thirdparty/etcpak/BlockData.cpp deleted file mode 100644 index 4906e69492..0000000000 --- a/thirdparty/etcpak/BlockData.cpp +++ /dev/null @@ -1,1296 +0,0 @@ -#include <assert.h> -#include <string.h> - -#include "BlockData.hpp" -#include "ColorSpace.hpp" -#include "Debug.hpp" -#include "MipMap.hpp" -#include "mmap.hpp" -#include "ProcessRGB.hpp" -#include "ProcessDxtc.hpp" -#include "Tables.hpp" -#include "TaskDispatch.hpp" - -#ifdef __ARM_NEON -# include <arm_neon.h> -#endif - -#if defined __SSE4_1__ || defined __AVX2__ || defined _MSC_VER -# ifdef _MSC_VER -# include <intrin.h> -# include <Windows.h> -# define _bswap(x) _byteswap_ulong(x) -# define _bswap64(x) _byteswap_uint64(x) -# else -# include <x86intrin.h> -# endif -#endif - -#ifndef _bswap -# define _bswap(x) __builtin_bswap32(x) -# define _bswap64(x) __builtin_bswap64(x) -#endif - -static uint8_t table59T58H[8] = { 3,6,11,16,23,32,41,64 }; - -BlockData::BlockData( const char* fn ) - : m_file( fopen( fn, "rb" ) ) -{ - assert( m_file ); - fseek( m_file, 0, SEEK_END ); - m_maplen = ftell( m_file ); - fseek( m_file, 0, SEEK_SET ); - m_data = (uint8_t*)mmap( nullptr, m_maplen, PROT_READ, MAP_SHARED, fileno( m_file ), 0 ); - - auto data32 = (uint32_t*)m_data; - if( *data32 == 0x03525650 ) - { - // PVR - switch( *(data32+2) ) - { - case 6: - m_type = Etc1; - break; - case 7: - m_type = Dxt1; - break; - case 11: - m_type = Dxt5; - break; - case 22: - m_type = Etc2_RGB; - break; - case 23: - m_type = Etc2_RGBA; - break; - default: - assert( false ); - break; - } - - m_size.y = *(data32+6); - m_size.x = *(data32+7); - m_dataOffset = 52 + *(data32+12); - } - else if( *data32 == 0x58544BAB ) - { - // KTX - switch( *(data32+7) ) - { - case 0x9274: - m_type = Etc2_RGB; - break; - case 0x9278: - m_type = Etc2_RGBA; - break; - default: - assert( false ); - break; - } - - m_size.x = *(data32+9); - m_size.y = *(data32+10); - m_dataOffset = sizeof( uint32_t ) * 17 + *(data32+15); - } - else - { - assert( false ); - } -} - -static uint8_t* OpenForWriting( const char* fn, size_t len, const v2i& size, FILE** f, int levels, BlockData::Type type ) -{ - *f = fopen( fn, "wb+" ); - assert( *f ); - fseek( *f, len - 1, SEEK_SET ); - const char zero = 0; - fwrite( &zero, 1, 1, *f ); - fseek( *f, 0, SEEK_SET ); - - auto ret = (uint8_t*)mmap( nullptr, len, PROT_WRITE, MAP_SHARED, fileno( *f ), 0 ); - auto dst = (uint32_t*)ret; - - *dst++ = 0x03525650; // version - *dst++ = 0; // flags - switch( type ) // pixelformat[0] - { - case BlockData::Etc1: - *dst++ = 6; - break; - case BlockData::Etc2_RGB: - *dst++ = 22; - break; - case BlockData::Etc2_RGBA: - *dst++ = 23; - break; - case BlockData::Dxt1: - *dst++ = 7; - break; - case BlockData::Dxt5: - *dst++ = 11; - break; - default: - assert( false ); - break; - } - *dst++ = 0; // pixelformat[1] - *dst++ = 0; // colourspace - *dst++ = 0; // channel type - *dst++ = size.y; // height - *dst++ = size.x; // width - *dst++ = 1; // depth - *dst++ = 1; // num surfs - *dst++ = 1; // num faces - *dst++ = levels; // mipmap count - *dst++ = 0; // metadata size - - return ret; -} - -static int AdjustSizeForMipmaps( const v2i& size, int levels ) -{ - int len = 0; - v2i current = size; - for( int i=1; i<levels; i++ ) - { - assert( current.x != 1 || current.y != 1 ); - current.x = std::max( 1, current.x / 2 ); - current.y = std::max( 1, current.y / 2 ); - len += std::max( 4, current.x ) * std::max( 4, current.y ) / 2; - } - assert( current.x == 1 && current.y == 1 ); - return len; -} - -BlockData::BlockData( const char* fn, const v2i& size, bool mipmap, Type type ) - : m_size( size ) - , m_dataOffset( 52 ) - , m_maplen( m_size.x*m_size.y/2 ) - , m_type( type ) -{ - assert( m_size.x%4 == 0 && m_size.y%4 == 0 ); - - uint32_t cnt = m_size.x * m_size.y / 16; - DBGPRINT( cnt << " blocks" ); - - int levels = 1; - - if( mipmap ) - { - levels = NumberOfMipLevels( size ); - DBGPRINT( "Number of mipmaps: " << levels ); - m_maplen += AdjustSizeForMipmaps( size, levels ); - } - - if( type == Etc2_RGBA || type == Dxt5 ) m_maplen *= 2; - - m_maplen += m_dataOffset; - m_data = OpenForWriting( fn, m_maplen, m_size, &m_file, levels, type ); -} - -BlockData::BlockData( const v2i& size, bool mipmap, Type type ) - : m_size( size ) - , m_dataOffset( 52 ) - , m_file( nullptr ) - , m_maplen( m_size.x*m_size.y/2 ) - , m_type( type ) -{ - assert( m_size.x%4 == 0 && m_size.y%4 == 0 ); - if( mipmap ) - { - const int levels = NumberOfMipLevels( size ); - m_maplen += AdjustSizeForMipmaps( size, levels ); - } - - if( type == Etc2_RGBA || type == Dxt5 ) m_maplen *= 2; - - m_maplen += m_dataOffset; - m_data = new uint8_t[m_maplen]; -} - -BlockData::~BlockData() -{ - if( m_file ) - { - munmap( m_data, m_maplen ); - fclose( m_file ); - } - else - { - delete[] m_data; - } -} - -void BlockData::Process( const uint32_t* src, uint32_t blocks, size_t offset, size_t width, Channels type, bool dither ) -{ - auto dst = ((uint64_t*)( m_data + m_dataOffset )) + offset; - - if( type == Channels::Alpha ) - { - if( m_type != Etc1 ) - { - CompressEtc2Alpha( src, dst, blocks, width ); - } - else - { - CompressEtc1Alpha( src, dst, blocks, width ); - } - } - else - { - switch( m_type ) - { - case Etc1: - if( dither ) - { - CompressEtc1RgbDither( src, dst, blocks, width ); - } - else - { - CompressEtc1Rgb( src, dst, blocks, width ); - } - break; - case Etc2_RGB: - CompressEtc2Rgb( src, dst, blocks, width ); - break; - case Dxt1: - if( dither ) - { - CompressDxt1Dither( src, dst, blocks, width ); - } - else - { - CompressDxt1( src, dst, blocks, width ); - } - break; - default: - assert( false ); - break; - } - } -} - -void BlockData::ProcessRGBA( const uint32_t* src, uint32_t blocks, size_t offset, size_t width ) -{ - auto dst = ((uint64_t*)( m_data + m_dataOffset )) + offset * 2; - - switch( m_type ) - { - case Etc2_RGBA: - CompressEtc2Rgba( src, dst, blocks, width ); - break; - case Dxt5: - CompressDxt5( src, dst, blocks, width ); - break; - default: - assert( false ); - break; - } -} - -namespace -{ - -static etcpak_force_inline int32_t expand6(uint32_t value) -{ - return (value << 2) | (value >> 4); -} - -static etcpak_force_inline int32_t expand7(uint32_t value) -{ - return (value << 1) | (value >> 6); -} - -static etcpak_force_inline void DecodeT( uint64_t block, uint32_t* dst, uint32_t w ) -{ - const auto r0 = ( block >> 24 ) & 0x1B; - const auto rh0 = ( r0 >> 3 ) & 0x3; - const auto rl0 = r0 & 0x3; - const auto g0 = ( block >> 20 ) & 0xF; - const auto b0 = ( block >> 16 ) & 0xF; - - const auto r1 = ( block >> 12 ) & 0xF; - const auto g1 = ( block >> 8 ) & 0xF; - const auto b1 = ( block >> 4 ) & 0xF; - - const auto cr0 = ( ( rh0 << 6 ) | ( rl0 << 4 ) | ( rh0 << 2 ) | rl0); - const auto cg0 = ( g0 << 4 ) | g0; - const auto cb0 = ( b0 << 4 ) | b0; - - const auto cr1 = ( r1 << 4 ) | r1; - const auto cg1 = ( g1 << 4 ) | g1; - const auto cb1 = ( b1 << 4 ) | b1; - - const auto codeword_hi = ( block >> 2 ) & 0x3; - const auto codeword_lo = block & 0x1; - const auto codeword = ( codeword_hi << 1 ) | codeword_lo; - - const auto c2r = clampu8( cr1 + table59T58H[codeword] ); - const auto c2g = clampu8( cg1 + table59T58H[codeword] ); - const auto c2b = clampu8( cb1 + table59T58H[codeword] ); - - const auto c3r = clampu8( cr1 - table59T58H[codeword] ); - const auto c3g = clampu8( cg1 - table59T58H[codeword] ); - const auto c3b = clampu8( cb1 - table59T58H[codeword] ); - - const uint32_t col_tab[4] = { - uint32_t(cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) | 0xFF000000), - uint32_t(c2r | ( c2g << 8 ) | ( c2b << 16 ) | 0xFF000000), - uint32_t(cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) | 0xFF000000), - uint32_t(c3r | ( c3g << 8 ) | ( c3b << 16 ) | 0xFF000000) - }; - - const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF; - for( uint8_t j = 0; j < 4; j++ ) - { - for( uint8_t i = 0; i < 4; i++ ) - { - //2bit indices distributed on two lane 16bit numbers - const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1) | ( ( indexes >> ( j + i * 4 ) ) & 0x1); - dst[j * w + i] = col_tab[index]; - } - } -} - -static etcpak_force_inline void DecodeTAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w ) -{ - const auto r0 = ( block >> 24 ) & 0x1B; - const auto rh0 = ( r0 >> 3 ) & 0x3; - const auto rl0 = r0 & 0x3; - const auto g0 = ( block >> 20 ) & 0xF; - const auto b0 = ( block >> 16 ) & 0xF; - - const auto r1 = ( block >> 12 ) & 0xF; - const auto g1 = ( block >> 8 ) & 0xF; - const auto b1 = ( block >> 4 ) & 0xF; - - const auto cr0 = ( ( rh0 << 6 ) | ( rl0 << 4 ) | ( rh0 << 2 ) | rl0); - const auto cg0 = ( g0 << 4 ) | g0; - const auto cb0 = ( b0 << 4 ) | b0; - - const auto cr1 = ( r1 << 4 ) | r1; - const auto cg1 = ( g1 << 4 ) | g1; - const auto cb1 = ( b1 << 4 ) | b1; - - const auto codeword_hi = ( block >> 2 ) & 0x3; - const auto codeword_lo = block & 0x1; - const auto codeword = (codeword_hi << 1) | codeword_lo; - - const int32_t base = alpha >> 56; - const int32_t mul = ( alpha >> 52 ) & 0xF; - const auto tbl = g_alpha[( alpha >> 48 ) & 0xF]; - - const auto c2r = clampu8( cr1 + table59T58H[codeword] ); - const auto c2g = clampu8( cg1 + table59T58H[codeword] ); - const auto c2b = clampu8( cb1 + table59T58H[codeword] ); - - const auto c3r = clampu8( cr1 - table59T58H[codeword] ); - const auto c3g = clampu8( cg1 - table59T58H[codeword] ); - const auto c3b = clampu8( cb1 - table59T58H[codeword] ); - - const uint32_t col_tab[4] = { - uint32_t(cr0 | ( cg0 << 8 ) | ( cb0 << 16 )), - uint32_t(c2r | ( c2g << 8 ) | ( c2b << 16 )), - uint32_t(cr1 | ( cg1 << 8 ) | ( cb1 << 16 )), - uint32_t(c3r | ( c3g << 8 ) | ( c3b << 16 )) - }; - - const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF; - for( uint8_t j = 0; j < 4; j++ ) - { - for( uint8_t i = 0; i < 4; i++ ) - { - //2bit indices distributed on two lane 16bit numbers - const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 ); - const auto amod = tbl[( alpha >> ( 45 - j * 3 - i * 12 ) ) & 0x7]; - const uint32_t a = clampu8( base + amod * mul ); - dst[j * w + i] = col_tab[index] | ( a << 24 ); - } - } -} - -static etcpak_force_inline void DecodeH( uint64_t block, uint32_t* dst, uint32_t w ) -{ - const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF; - - const auto r0444 = ( block >> 27 ) & 0xF; - const auto g0444 = ( ( block >> 20 ) & 0x1 ) | ( ( ( block >> 24 ) & 0x7 ) << 1 ); - const auto b0444 = ( ( block >> 15 ) & 0x7 ) | ( ( ( block >> 19 ) & 0x1 ) << 3 ); - - const auto r1444 = ( block >> 11 ) & 0xF; - const auto g1444 = ( block >> 7 ) & 0xF; - const auto b1444 = ( block >> 3 ) & 0xF; - - const auto r0 = ( r0444 << 4 ) | r0444; - const auto g0 = ( g0444 << 4 ) | g0444; - const auto b0 = ( b0444 << 4 ) | b0444; - - const auto r1 = ( r1444 << 4 ) | r1444; - const auto g1 = ( g1444 << 4 ) | g1444; - const auto b1 = ( b1444 << 4 ) | b1444; - - const auto codeword_hi = ( ( block & 0x1 ) << 1 ) | ( ( block & 0x4 ) ); - const auto c0 = ( r0444 << 8 ) | ( g0444 << 4 ) | ( b0444 << 0 ); - const auto c1 = ( block >> 3 ) & ( ( 1 << 12 ) - 1 ); - const auto codeword_lo = ( c0 >= c1 ) ? 1 : 0; - const auto codeword = codeword_hi | codeword_lo; - - const uint32_t col_tab[] = { - uint32_t(clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 )), - uint32_t(clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 )), - uint32_t(clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 )), - uint32_t(clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 )) - }; - - for( uint8_t j = 0; j < 4; j++ ) - { - for( uint8_t i = 0; i < 4; i++ ) - { - const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 ); - dst[j * w + i] = col_tab[index] | 0xFF000000; - } - } -} - -static etcpak_force_inline void DecodeHAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w ) -{ - const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF; - - const auto r0444 = ( block >> 27 ) & 0xF; - const auto g0444 = ( ( block >> 20 ) & 0x1 ) | ( ( ( block >> 24 ) & 0x7 ) << 1 ); - const auto b0444 = ( ( block >> 15 ) & 0x7 ) | ( ( ( block >> 19 ) & 0x1 ) << 3 ); - - const auto r1444 = ( block >> 11 ) & 0xF; - const auto g1444 = ( block >> 7 ) & 0xF; - const auto b1444 = ( block >> 3 ) & 0xF; - - const auto r0 = ( r0444 << 4 ) | r0444; - const auto g0 = ( g0444 << 4 ) | g0444; - const auto b0 = ( b0444 << 4 ) | b0444; - - const auto r1 = ( r1444 << 4 ) | r1444; - const auto g1 = ( g1444 << 4 ) | g1444; - const auto b1 = ( b1444 << 4 ) | b1444; - - const auto codeword_hi = ( ( block & 0x1 ) << 1 ) | ( ( block & 0x4 ) ); - const auto c0 = ( r0444 << 8 ) | ( g0444 << 4 ) | ( b0444 << 0 ); - const auto c1 = ( block >> 3 ) & ( ( 1 << 12 ) - 1 ); - const auto codeword_lo = ( c0 >= c1 ) ? 1 : 0; - const auto codeword = codeword_hi | codeword_lo; - - const int32_t base = alpha >> 56; - const int32_t mul = ( alpha >> 52 ) & 0xF; - const auto tbl = g_alpha[(alpha >> 48) & 0xF]; - - const uint32_t col_tab[] = { - uint32_t(clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 )), - uint32_t(clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 )), - uint32_t(clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 )), - uint32_t(clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 )) - }; - - for( uint8_t j = 0; j < 4; j++ ) - { - for( uint8_t i = 0; i < 4; i++ ) - { - const uint8_t index = ( ( ( indexes >> ( j + i * 4 + 16 ) ) & 0x1 ) << 1 ) | ( ( indexes >> ( j + i * 4 ) ) & 0x1 ); - const auto amod = tbl[( alpha >> ( 45 - j * 3 - i * 12) ) & 0x7]; - const uint32_t a = clampu8( base + amod * mul ); - dst[j * w + i] = col_tab[index] | ( a << 24 ); - } - } -} - -static etcpak_force_inline void DecodePlanar( uint64_t block, uint32_t* dst, uint32_t w ) -{ - const auto bv = expand6((block >> ( 0 + 32)) & 0x3F); - const auto gv = expand7((block >> ( 6 + 32)) & 0x7F); - const auto rv = expand6((block >> (13 + 32)) & 0x3F); - - const auto bh = expand6((block >> (19 + 32)) & 0x3F); - const auto gh = expand7((block >> (25 + 32)) & 0x7F); - - const auto rh0 = (block >> (32 - 32)) & 0x01; - const auto rh1 = ((block >> (34 - 32)) & 0x1F) << 1; - const auto rh = expand6(rh0 | rh1); - - const auto bo0 = (block >> (39 - 32)) & 0x07; - const auto bo1 = ((block >> (43 - 32)) & 0x3) << 3; - const auto bo2 = ((block >> (48 - 32)) & 0x1) << 5; - const auto bo = expand6(bo0 | bo1 | bo2); - const auto go0 = (block >> (49 - 32)) & 0x3F; - const auto go1 = ((block >> (56 - 32)) & 0x01) << 6; - const auto go = expand7(go0 | go1); - const auto ro = expand6((block >> (57 - 32)) & 0x3F); - -#ifdef __ARM_NEON - uint64_t init = uint64_t(uint16_t(rh-ro)) | ( uint64_t(uint16_t(gh-go)) << 16 ) | ( uint64_t(uint16_t(bh-bo)) << 32 ); - int16x8_t chco = vreinterpretq_s16_u64( vdupq_n_u64( init ) ); - init = uint64_t(uint16_t( (rv-ro) - 4 * (rh-ro) )) | ( uint64_t(uint16_t( (gv-go) - 4 * (gh-go) )) << 16 ) | ( uint64_t(uint16_t( (bv-bo) - 4 * (bh-bo) )) << 32 ); - int16x8_t cvco = vreinterpretq_s16_u64( vdupq_n_u64( init ) ); - init = uint64_t(4*ro+2) | ( uint64_t(4*go+2) << 16 ) | ( uint64_t(4*bo+2) << 32 ) | ( uint64_t(0xFFF) << 48 ); - int16x8_t col = vreinterpretq_s16_u64( vdupq_n_u64( init ) ); - - for( int j=0; j<4; j++ ) - { - for( int i=0; i<4; i++ ) - { - uint8x8_t c = vqshrun_n_s16( col, 2 ); - vst1_lane_u32( dst+j*w+i, vreinterpret_u32_u8( c ), 0 ); - col = vaddq_s16( col, chco ); - } - col = vaddq_s16( col, cvco ); - } -#elif defined __AVX2__ - const auto R0 = 4*ro+2; - const auto G0 = 4*go+2; - const auto B0 = 4*bo+2; - const auto RHO = rh-ro; - const auto GHO = gh-go; - const auto BHO = bh-bo; - - __m256i cvco = _mm256_setr_epi16( rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0, rv - ro, gv - go, bv - bo, 0 ); - __m256i col = _mm256_setr_epi16( R0, G0, B0, 0xFFF, R0+RHO, G0+GHO, B0+BHO, 0xFFF, R0+2*RHO, G0+2*GHO, B0+2*BHO, 0xFFF, R0+3*RHO, G0+3*GHO, B0+3*BHO, 0xFFF ); - - for( int j=0; j<4; j++ ) - { - __m256i c = _mm256_srai_epi16( col, 2 ); - __m128i s = _mm_packus_epi16( _mm256_castsi256_si128( c ), _mm256_extracti128_si256( c, 1 ) ); - _mm_storeu_si128( (__m128i*)(dst+j*w), s ); - col = _mm256_add_epi16( col, cvco ); - } -#elif defined __SSE4_1__ - __m128i chco = _mm_setr_epi16( rh - ro, gh - go, bh - bo, 0, 0, 0, 0, 0 ); - __m128i cvco = _mm_setr_epi16( (rv - ro) - 4 * (rh - ro), (gv - go) - 4 * (gh - go), (bv - bo) - 4 * (bh - bo), 0, 0, 0, 0, 0 ); - __m128i col = _mm_setr_epi16( 4*ro+2, 4*go+2, 4*bo+2, 0xFFF, 0, 0, 0, 0 ); - - for( int j=0; j<4; j++ ) - { - for( int i=0; i<4; i++ ) - { - __m128i c = _mm_srai_epi16( col, 2 ); - __m128i s = _mm_packus_epi16( c, c ); - dst[j*w+i] = _mm_cvtsi128_si32( s ); - col = _mm_add_epi16( col, chco ); - } - col = _mm_add_epi16( col, cvco ); - } -#else - for( int j=0; j<4; j++ ) - { - for( int i=0; i<4; i++ ) - { - const uint32_t r = (i * (rh - ro) + j * (rv - ro) + 4 * ro + 2) >> 2; - const uint32_t g = (i * (gh - go) + j * (gv - go) + 4 * go + 2) >> 2; - const uint32_t b = (i * (bh - bo) + j * (bv - bo) + 4 * bo + 2) >> 2; - if( ( ( r | g | b ) & ~0xFF ) == 0 ) - { - dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000; - } - else - { - const auto rc = clampu8( r ); - const auto gc = clampu8( g ); - const auto bc = clampu8( b ); - dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000; - } - } - } -#endif -} - -static etcpak_force_inline void DecodePlanarAlpha( uint64_t block, uint64_t alpha, uint32_t* dst, uint32_t w ) -{ - const auto bv = expand6((block >> ( 0 + 32)) & 0x3F); - const auto gv = expand7((block >> ( 6 + 32)) & 0x7F); - const auto rv = expand6((block >> (13 + 32)) & 0x3F); - - const auto bh = expand6((block >> (19 + 32)) & 0x3F); - const auto gh = expand7((block >> (25 + 32)) & 0x7F); - - const auto rh0 = (block >> (32 - 32)) & 0x01; - const auto rh1 = ((block >> (34 - 32)) & 0x1F) << 1; - const auto rh = expand6(rh0 | rh1); - - const auto bo0 = (block >> (39 - 32)) & 0x07; - const auto bo1 = ((block >> (43 - 32)) & 0x3) << 3; - const auto bo2 = ((block >> (48 - 32)) & 0x1) << 5; - const auto bo = expand6(bo0 | bo1 | bo2); - const auto go0 = (block >> (49 - 32)) & 0x3F; - const auto go1 = ((block >> (56 - 32)) & 0x01) << 6; - const auto go = expand7(go0 | go1); - const auto ro = expand6((block >> (57 - 32)) & 0x3F); - - const int32_t base = alpha >> 56; - const int32_t mul = ( alpha >> 52 ) & 0xF; - const auto tbl = g_alpha[( alpha >> 48 ) & 0xF]; - -#ifdef __ARM_NEON - uint64_t init = uint64_t(uint16_t(rh-ro)) | ( uint64_t(uint16_t(gh-go)) << 16 ) | ( uint64_t(uint16_t(bh-bo)) << 32 ); - int16x8_t chco = vreinterpretq_s16_u64( vdupq_n_u64( init ) ); - init = uint64_t(uint16_t( (rv-ro) - 4 * (rh-ro) )) | ( uint64_t(uint16_t( (gv-go) - 4 * (gh-go) )) << 16 ) | ( uint64_t(uint16_t( (bv-bo) - 4 * (bh-bo) )) << 32 ); - int16x8_t cvco = vreinterpretq_s16_u64( vdupq_n_u64( init ) ); - init = uint64_t(4*ro+2) | ( uint64_t(4*go+2) << 16 ) | ( uint64_t(4*bo+2) << 32 ); - int16x8_t col = vreinterpretq_s16_u64( vdupq_n_u64( init ) ); - - for( int j=0; j<4; j++ ) - { - for( int i=0; i<4; i++ ) - { - const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7]; - const uint32_t a = clampu8( base + amod * mul ); - uint8x8_t c = vqshrun_n_s16( col, 2 ); - dst[j*w+i] = vget_lane_u32( vreinterpret_u32_u8( c ), 0 ) | ( a << 24 ); - col = vaddq_s16( col, chco ); - } - col = vaddq_s16( col, cvco ); - } -#elif defined __SSE4_1__ - __m128i chco = _mm_setr_epi16( rh - ro, gh - go, bh - bo, 0, 0, 0, 0, 0 ); - __m128i cvco = _mm_setr_epi16( (rv - ro) - 4 * (rh - ro), (gv - go) - 4 * (gh - go), (bv - bo) - 4 * (bh - bo), 0, 0, 0, 0, 0 ); - __m128i col = _mm_setr_epi16( 4*ro+2, 4*go+2, 4*bo+2, 0, 0, 0, 0, 0 ); - - for( int j=0; j<4; j++ ) - { - for( int i=0; i<4; i++ ) - { - const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7]; - const uint32_t a = clampu8( base + amod * mul ); - __m128i c = _mm_srai_epi16( col, 2 ); - __m128i s = _mm_packus_epi16( c, c ); - dst[j*w+i] = _mm_cvtsi128_si32( s ) | ( a << 24 ); - col = _mm_add_epi16( col, chco ); - } - col = _mm_add_epi16( col, cvco ); - } -#else - for (auto j = 0; j < 4; j++) - { - for (auto i = 0; i < 4; i++) - { - const uint32_t r = (i * (rh - ro) + j * (rv - ro) + 4 * ro + 2) >> 2; - const uint32_t g = (i * (gh - go) + j * (gv - go) + 4 * go + 2) >> 2; - const uint32_t b = (i * (bh - bo) + j * (bv - bo) + 4 * bo + 2) >> 2; - const auto amod = tbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7]; - const uint32_t a = clampu8( base + amod * mul ); - if( ( ( r | g | b ) & ~0xFF ) == 0 ) - { - dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 ); - } - else - { - const auto rc = clampu8( r ); - const auto gc = clampu8( g ); - const auto bc = clampu8( b ); - dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 ); - } - } - } -#endif -} - -} - -BitmapPtr BlockData::Decode() -{ - switch( m_type ) - { - case Etc1: - case Etc2_RGB: - return DecodeRGB(); - case Etc2_RGBA: - return DecodeRGBA(); - case Dxt1: - return DecodeDxt1(); - case Dxt5: - return DecodeDxt5(); - default: - assert( false ); - return nullptr; - } -} - -static etcpak_force_inline uint64_t ConvertByteOrder( uint64_t d ) -{ - uint32_t word[2]; - memcpy( word, &d, 8 ); - word[0] = _bswap( word[0] ); - word[1] = _bswap( word[1] ); - memcpy( &d, word, 8 ); - return d; -} - -static etcpak_force_inline void DecodeRGBPart( uint64_t d, uint32_t* dst, uint32_t w ) -{ - d = ConvertByteOrder( d ); - - uint32_t br[2], bg[2], bb[2]; - - if( d & 0x2 ) - { - int32_t dr, dg, db; - - uint32_t r0 = ( d & 0xF8000000 ) >> 27; - uint32_t g0 = ( d & 0x00F80000 ) >> 19; - uint32_t b0 = ( d & 0x0000F800 ) >> 11; - - dr = ( int32_t(d) << 5 ) >> 29; - dg = ( int32_t(d) << 13 ) >> 29; - db = ( int32_t(d) << 21 ) >> 29; - - int32_t r1 = int32_t(r0) + dr; - int32_t g1 = int32_t(g0) + dg; - int32_t b1 = int32_t(b0) + db; - - // T mode - if ( (r1 < 0) || (r1 > 31) ) - { - DecodeT( d, dst, w ); - return; - } - - // H mode - if ((g1 < 0) || (g1 > 31)) - { - DecodeH( d, dst, w ); - return; - } - - // P mode - if( (b1 < 0) || (b1 > 31) ) - { - DecodePlanar( d, dst, w ); - return; - } - - br[0] = ( r0 << 3 ) | ( r0 >> 2 ); - br[1] = ( r1 << 3 ) | ( r1 >> 2 ); - bg[0] = ( g0 << 3 ) | ( g0 >> 2 ); - bg[1] = ( g1 << 3 ) | ( g1 >> 2 ); - bb[0] = ( b0 << 3 ) | ( b0 >> 2 ); - bb[1] = ( b1 << 3 ) | ( b1 >> 2 ); - } - else - { - br[0] = ( ( d & 0xF0000000 ) >> 24 ) | ( ( d & 0xF0000000 ) >> 28 ); - br[1] = ( ( d & 0x0F000000 ) >> 20 ) | ( ( d & 0x0F000000 ) >> 24 ); - bg[0] = ( ( d & 0x00F00000 ) >> 16 ) | ( ( d & 0x00F00000 ) >> 20 ); - bg[1] = ( ( d & 0x000F0000 ) >> 12 ) | ( ( d & 0x000F0000 ) >> 16 ); - bb[0] = ( ( d & 0x0000F000 ) >> 8 ) | ( ( d & 0x0000F000 ) >> 12 ); - bb[1] = ( ( d & 0x00000F00 ) >> 4 ) | ( ( d & 0x00000F00 ) >> 8 ); - } - - unsigned int tcw[2]; - tcw[0] = ( d & 0xE0 ) >> 5; - tcw[1] = ( d & 0x1C ) >> 2; - - uint32_t b1 = ( d >> 32 ) & 0xFFFF; - uint32_t b2 = ( d >> 48 ); - - b1 = ( b1 | ( b1 << 8 ) ) & 0x00FF00FF; - b1 = ( b1 | ( b1 << 4 ) ) & 0x0F0F0F0F; - b1 = ( b1 | ( b1 << 2 ) ) & 0x33333333; - b1 = ( b1 | ( b1 << 1 ) ) & 0x55555555; - - b2 = ( b2 | ( b2 << 8 ) ) & 0x00FF00FF; - b2 = ( b2 | ( b2 << 4 ) ) & 0x0F0F0F0F; - b2 = ( b2 | ( b2 << 2 ) ) & 0x33333333; - b2 = ( b2 | ( b2 << 1 ) ) & 0x55555555; - - uint32_t idx = b1 | ( b2 << 1 ); - - if( d & 0x1 ) - { - for( int i=0; i<4; i++ ) - { - for( int j=0; j<4; j++ ) - { - const auto mod = g_table[tcw[j/2]][idx & 0x3]; - const auto r = br[j/2] + mod; - const auto g = bg[j/2] + mod; - const auto b = bb[j/2] + mod; - if( ( ( r | g | b ) & ~0xFF ) == 0 ) - { - dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000; - } - else - { - const auto rc = clampu8( r ); - const auto gc = clampu8( g ); - const auto bc = clampu8( b ); - dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000; - } - idx >>= 2; - } - } - } - else - { - for( int i=0; i<4; i++ ) - { - const auto tbl = g_table[tcw[i/2]]; - const auto cr = br[i/2]; - const auto cg = bg[i/2]; - const auto cb = bb[i/2]; - - for( int j=0; j<4; j++ ) - { - const auto mod = tbl[idx & 0x3]; - const auto r = cr + mod; - const auto g = cg + mod; - const auto b = cb + mod; - if( ( ( r | g | b ) & ~0xFF ) == 0 ) - { - dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | 0xFF000000; - } - else - { - const auto rc = clampu8( r ); - const auto gc = clampu8( g ); - const auto bc = clampu8( b ); - dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | 0xFF000000; - } - idx >>= 2; - } - } - } -} - -static etcpak_force_inline void DecodeRGBAPart( uint64_t d, uint64_t alpha, uint32_t* dst, uint32_t w ) -{ - d = ConvertByteOrder( d ); - alpha = _bswap64( alpha ); - - uint32_t br[2], bg[2], bb[2]; - - if( d & 0x2 ) - { - int32_t dr, dg, db; - - uint32_t r0 = ( d & 0xF8000000 ) >> 27; - uint32_t g0 = ( d & 0x00F80000 ) >> 19; - uint32_t b0 = ( d & 0x0000F800 ) >> 11; - - dr = ( int32_t(d) << 5 ) >> 29; - dg = ( int32_t(d) << 13 ) >> 29; - db = ( int32_t(d) << 21 ) >> 29; - - int32_t r1 = int32_t(r0) + dr; - int32_t g1 = int32_t(g0) + dg; - int32_t b1 = int32_t(b0) + db; - - // T mode - if ( (r1 < 0) || (r1 > 31) ) - { - DecodeTAlpha( d, alpha, dst, w ); - return; - } - - // H mode - if ( (g1 < 0) || (g1 > 31) ) - { - DecodeHAlpha( d, alpha, dst, w ); - return; - } - - // P mode - if ( (b1 < 0) || (b1 > 31) ) - { - DecodePlanarAlpha( d, alpha, dst, w ); - return; - } - - br[0] = ( r0 << 3 ) | ( r0 >> 2 ); - br[1] = ( r1 << 3 ) | ( r1 >> 2 ); - bg[0] = ( g0 << 3 ) | ( g0 >> 2 ); - bg[1] = ( g1 << 3 ) | ( g1 >> 2 ); - bb[0] = ( b0 << 3 ) | ( b0 >> 2 ); - bb[1] = ( b1 << 3 ) | ( b1 >> 2 ); - } - else - { - br[0] = ( ( d & 0xF0000000 ) >> 24 ) | ( ( d & 0xF0000000 ) >> 28 ); - br[1] = ( ( d & 0x0F000000 ) >> 20 ) | ( ( d & 0x0F000000 ) >> 24 ); - bg[0] = ( ( d & 0x00F00000 ) >> 16 ) | ( ( d & 0x00F00000 ) >> 20 ); - bg[1] = ( ( d & 0x000F0000 ) >> 12 ) | ( ( d & 0x000F0000 ) >> 16 ); - bb[0] = ( ( d & 0x0000F000 ) >> 8 ) | ( ( d & 0x0000F000 ) >> 12 ); - bb[1] = ( ( d & 0x00000F00 ) >> 4 ) | ( ( d & 0x00000F00 ) >> 8 ); - } - - unsigned int tcw[2]; - tcw[0] = ( d & 0xE0 ) >> 5; - tcw[1] = ( d & 0x1C ) >> 2; - - uint32_t b1 = ( d >> 32 ) & 0xFFFF; - uint32_t b2 = ( d >> 48 ); - - b1 = ( b1 | ( b1 << 8 ) ) & 0x00FF00FF; - b1 = ( b1 | ( b1 << 4 ) ) & 0x0F0F0F0F; - b1 = ( b1 | ( b1 << 2 ) ) & 0x33333333; - b1 = ( b1 | ( b1 << 1 ) ) & 0x55555555; - - b2 = ( b2 | ( b2 << 8 ) ) & 0x00FF00FF; - b2 = ( b2 | ( b2 << 4 ) ) & 0x0F0F0F0F; - b2 = ( b2 | ( b2 << 2 ) ) & 0x33333333; - b2 = ( b2 | ( b2 << 1 ) ) & 0x55555555; - - uint32_t idx = b1 | ( b2 << 1 ); - - const int32_t base = alpha >> 56; - const int32_t mul = ( alpha >> 52 ) & 0xF; - const auto atbl = g_alpha[( alpha >> 48 ) & 0xF]; - - if( d & 0x1 ) - { - for( int i=0; i<4; i++ ) - { - for( int j=0; j<4; j++ ) - { - const auto mod = g_table[tcw[j/2]][idx & 0x3]; - const auto r = br[j/2] + mod; - const auto g = bg[j/2] + mod; - const auto b = bb[j/2] + mod; - const auto amod = atbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7]; - const uint32_t a = clampu8( base + amod * mul ); - if( ( ( r | g | b ) & ~0xFF ) == 0 ) - { - dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 ); - } - else - { - const auto rc = clampu8( r ); - const auto gc = clampu8( g ); - const auto bc = clampu8( b ); - dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 ); - } - idx >>= 2; - } - } - } - else - { - for( int i=0; i<4; i++ ) - { - const auto tbl = g_table[tcw[i/2]]; - const auto cr = br[i/2]; - const auto cg = bg[i/2]; - const auto cb = bb[i/2]; - - for( int j=0; j<4; j++ ) - { - const auto mod = tbl[idx & 0x3]; - const auto r = cr + mod; - const auto g = cg + mod; - const auto b = cb + mod; - const auto amod = atbl[(alpha >> ( 45 - j*3 - i*12 )) & 0x7]; - const uint32_t a = clampu8( base + amod * mul ); - if( ( ( r | g | b ) & ~0xFF ) == 0 ) - { - dst[j*w+i] = r | ( g << 8 ) | ( b << 16 ) | ( a << 24 ); - } - else - { - const auto rc = clampu8( r ); - const auto gc = clampu8( g ); - const auto bc = clampu8( b ); - dst[j*w+i] = rc | ( gc << 8 ) | ( bc << 16 ) | ( a << 24 ); - } - idx >>= 2; - } - } - } -} - -BitmapPtr BlockData::DecodeRGB() -{ - auto ret = std::make_shared<Bitmap>( m_size ); - - const uint64_t* src = (const uint64_t*)( m_data + m_dataOffset ); - uint32_t* dst = ret->Data(); - - for( int y=0; y<m_size.y/4; y++ ) - { - for( int x=0; x<m_size.x/4; x++ ) - { - uint64_t d = *src++; - DecodeRGBPart( d, dst, m_size.x ); - dst += 4; - } - dst += m_size.x*3; - } - - return ret; -} - -BitmapPtr BlockData::DecodeRGBA() -{ - auto ret = std::make_shared<Bitmap>( m_size ); - - const uint64_t* src = (const uint64_t*)( m_data + m_dataOffset ); - uint32_t* dst = ret->Data(); - - for( int y=0; y<m_size.y/4; y++ ) - { - for( int x=0; x<m_size.x/4; x++ ) - { - uint64_t a = *src++; - uint64_t d = *src++; - DecodeRGBAPart( d, a, dst, m_size.x ); - dst += 4; - } - dst += m_size.x*3; - } - - return ret; -} - -static etcpak_force_inline void DecodeDxt1Part( uint64_t d, uint32_t* dst, uint32_t w ) -{ - uint8_t* in = (uint8_t*)&d; - uint16_t c0, c1; - uint32_t idx; - memcpy( &c0, in, 2 ); - memcpy( &c1, in+2, 2 ); - memcpy( &idx, in+4, 4 ); - - uint8_t r0 = ( ( c0 & 0xF800 ) >> 8 ) | ( ( c0 & 0xF800 ) >> 13 ); - uint8_t g0 = ( ( c0 & 0x07E0 ) >> 3 ) | ( ( c0 & 0x07E0 ) >> 9 ); - uint8_t b0 = ( ( c0 & 0x001F ) << 3 ) | ( ( c0 & 0x001F ) >> 2 ); - - uint8_t r1 = ( ( c1 & 0xF800 ) >> 8 ) | ( ( c1 & 0xF800 ) >> 13 ); - uint8_t g1 = ( ( c1 & 0x07E0 ) >> 3 ) | ( ( c1 & 0x07E0 ) >> 9 ); - uint8_t b1 = ( ( c1 & 0x001F ) << 3 ) | ( ( c1 & 0x001F ) >> 2 ); - - uint32_t dict[4]; - - dict[0] = 0xFF000000 | ( b0 << 16 ) | ( g0 << 8 ) | r0; - dict[1] = 0xFF000000 | ( b1 << 16 ) | ( g1 << 8 ) | r1; - - uint32_t r, g, b; - if( c0 > c1 ) - { - r = (2*r0+r1)/3; - g = (2*g0+g1)/3; - b = (2*b0+b1)/3; - dict[2] = 0xFF000000 | ( b << 16 ) | ( g << 8 ) | r; - r = (2*r1+r0)/3; - g = (2*g1+g0)/3; - b = (2*b1+b0)/3; - dict[3] = 0xFF000000 | ( b << 16 ) | ( g << 8 ) | r; - } - else - { - r = (int(r0)+r1)/2; - g = (int(g0)+g1)/2; - b = (int(b0)+b1)/2; - dict[2] = 0xFF000000 | ( b << 16 ) | ( g << 8 ) | r; - dict[3] = 0xFF000000; - } - - memcpy( dst+0, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+1, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+2, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+3, dict + (idx & 0x3), 4 ); - idx >>= 2; - dst += w; - - memcpy( dst+0, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+1, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+2, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+3, dict + (idx & 0x3), 4 ); - idx >>= 2; - dst += w; - - memcpy( dst+0, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+1, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+2, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+3, dict + (idx & 0x3), 4 ); - idx >>= 2; - dst += w; - - memcpy( dst+0, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+1, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+2, dict + (idx & 0x3), 4 ); - idx >>= 2; - memcpy( dst+3, dict + (idx & 0x3), 4 ); -} - -static etcpak_force_inline void DecodeDxt5Part( uint64_t a, uint64_t d, uint32_t* dst, uint32_t w ) -{ - uint8_t* ain = (uint8_t*)&a; - uint8_t a0, a1; - uint64_t aidx = 0; - memcpy( &a0, ain, 1 ); - memcpy( &a1, ain+1, 1 ); - memcpy( &aidx, ain+2, 6 ); - - uint8_t* in = (uint8_t*)&d; - uint16_t c0, c1; - uint32_t idx; - memcpy( &c0, in, 2 ); - memcpy( &c1, in+2, 2 ); - memcpy( &idx, in+4, 4 ); - - uint32_t adict[8]; - adict[0] = a0 << 24; - adict[1] = a1 << 24; - if( a0 > a1 ) - { - adict[2] = ( (6*a0+1*a1)/7 ) << 24; - adict[3] = ( (5*a0+2*a1)/7 ) << 24; - adict[4] = ( (4*a0+3*a1)/7 ) << 24; - adict[5] = ( (3*a0+4*a1)/7 ) << 24; - adict[6] = ( (2*a0+5*a1)/7 ) << 24; - adict[7] = ( (1*a0+6*a1)/7 ) << 24; - } - else - { - adict[2] = ( (4*a0+1*a1)/5 ) << 24; - adict[3] = ( (3*a0+2*a1)/5 ) << 24; - adict[4] = ( (2*a0+3*a1)/5 ) << 24; - adict[5] = ( (1*a0+4*a1)/5 ) << 24; - adict[6] = 0; - adict[7] = 0xFF000000; - } - - uint8_t r0 = ( ( c0 & 0xF800 ) >> 8 ) | ( ( c0 & 0xF800 ) >> 13 ); - uint8_t g0 = ( ( c0 & 0x07E0 ) >> 3 ) | ( ( c0 & 0x07E0 ) >> 9 ); - uint8_t b0 = ( ( c0 & 0x001F ) << 3 ) | ( ( c0 & 0x001F ) >> 2 ); - - uint8_t r1 = ( ( c1 & 0xF800 ) >> 8 ) | ( ( c1 & 0xF800 ) >> 13 ); - uint8_t g1 = ( ( c1 & 0x07E0 ) >> 3 ) | ( ( c1 & 0x07E0 ) >> 9 ); - uint8_t b1 = ( ( c1 & 0x001F ) << 3 ) | ( ( c1 & 0x001F ) >> 2 ); - - uint32_t dict[4]; - - dict[0] = ( b0 << 16 ) | ( g0 << 8 ) | r0; - dict[1] = ( b1 << 16 ) | ( g1 << 8 ) | r1; - - uint32_t r, g, b; - if( c0 > c1 ) - { - r = (2*r0+r1)/3; - g = (2*g0+g1)/3; - b = (2*b0+b1)/3; - dict[2] = ( b << 16 ) | ( g << 8 ) | r; - r = (2*r1+r0)/3; - g = (2*g1+g0)/3; - b = (2*b1+b0)/3; - dict[3] = ( b << 16 ) | ( g << 8 ) | r; - } - else - { - r = (int(r0)+r1)/2; - g = (int(g0)+g1)/2; - b = (int(b0)+b1)/2; - dict[2] = ( b << 16 ) | ( g << 8 ) | r; - dict[3] = 0; - } - - dst[0] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[1] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[2] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[3] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst += w; - - dst[0] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[1] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[2] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[3] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst += w; - - dst[0] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[1] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[2] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[3] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst += w; - - dst[0] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[1] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[2] = dict[idx & 0x3] | adict[aidx & 0x7]; - idx >>= 2; - aidx >>= 3; - dst[3] = dict[idx & 0x3] | adict[aidx & 0x7]; -} - -BitmapPtr BlockData::DecodeDxt1() -{ - auto ret = std::make_shared<Bitmap>( m_size ); - - const uint64_t* src = (const uint64_t*)( m_data + m_dataOffset ); - uint32_t* dst = ret->Data(); - - for( int y=0; y<m_size.y/4; y++ ) - { - for( int x=0; x<m_size.x/4; x++ ) - { - uint64_t d = *src++; - DecodeDxt1Part( d, dst, m_size.x ); - dst += 4; - } - dst += m_size.x*3; - } - - return ret; -} - -BitmapPtr BlockData::DecodeDxt5() -{ - auto ret = std::make_shared<Bitmap>( m_size ); - - const uint64_t* src = (const uint64_t*)( m_data + m_dataOffset ); - uint32_t* dst = ret->Data(); - - for( int y=0; y<m_size.y/4; y++ ) - { - for( int x=0; x<m_size.x/4; x++ ) - { - uint64_t a = *src++; - uint64_t d = *src++; - DecodeDxt5Part( a, d, dst, m_size.x ); - dst += 4; - } - dst += m_size.x*3; - } - - return ret; -} diff --git a/thirdparty/etcpak/BlockData.hpp b/thirdparty/etcpak/BlockData.hpp deleted file mode 100644 index 209e35b4e6..0000000000 --- a/thirdparty/etcpak/BlockData.hpp +++ /dev/null @@ -1,56 +0,0 @@ -#ifndef __BLOCKDATA_HPP__ -#define __BLOCKDATA_HPP__ - -#include <condition_variable> -#include <future> -#include <memory> -#include <mutex> -#include <stdint.h> -#include <stdio.h> -#include <vector> - -#include "Bitmap.hpp" -#include "ForceInline.hpp" -#include "Vector.hpp" - -class BlockData -{ -public: - enum Type - { - Etc1, - Etc2_RGB, - Etc2_RGBA, - Dxt1, - Dxt5 - }; - - BlockData( const char* fn ); - BlockData( const char* fn, const v2i& size, bool mipmap, Type type ); - BlockData( const v2i& size, bool mipmap, Type type ); - ~BlockData(); - - BitmapPtr Decode(); - - void Process( const uint32_t* src, uint32_t blocks, size_t offset, size_t width, Channels type, bool dither ); - void ProcessRGBA( const uint32_t* src, uint32_t blocks, size_t offset, size_t width ); - - const v2i& Size() const { return m_size; } - -private: - etcpak_no_inline BitmapPtr DecodeRGB(); - etcpak_no_inline BitmapPtr DecodeRGBA(); - etcpak_no_inline BitmapPtr DecodeDxt1(); - etcpak_no_inline BitmapPtr DecodeDxt5(); - - uint8_t* m_data; - v2i m_size; - size_t m_dataOffset; - FILE* m_file; - size_t m_maplen; - Type m_type; -}; - -typedef std::shared_ptr<BlockData> BlockDataPtr; - -#endif diff --git a/thirdparty/etcpak/ColorSpace.cpp b/thirdparty/etcpak/ColorSpace.cpp deleted file mode 100644 index 0411541066..0000000000 --- a/thirdparty/etcpak/ColorSpace.cpp +++ /dev/null @@ -1,114 +0,0 @@ -#include <math.h> -#include <stdint.h> - -#include "Math.hpp" -#include "ColorSpace.hpp" - -namespace Color -{ - - static const XYZ white( v3b( 255, 255, 255 ) ); - static const v3f rwhite( 1.f / white.x, 1.f / white.y, 1.f / white.z ); - - - XYZ::XYZ( float _x, float _y, float _z ) - : x( _x ) - , y( _y ) - , z( _z ) - { - } - - XYZ::XYZ( const v3b& rgb ) - { - const float r = rgb.x / 255.f; - const float g = rgb.y / 255.f; - const float b = rgb.z / 255.f; - - const float rl = sRGB2linear( r ); - const float gl = sRGB2linear( g ); - const float bl = sRGB2linear( b ); - - x = 0.4124f * rl + 0.3576f * gl + 0.1805f * bl; - y = 0.2126f * rl + 0.7152f * gl + 0.0722f * bl; - z = 0.0193f * rl + 0.1192f * gl + 0.9505f * bl; - } - - static float revlab( float t ) - { - const float p1 = 6.f/29.f; - const float p2 = 4.f/29.f; - - if( t > p1 ) - { - return t*t*t; - } - else - { - return 3 * sq( p1 ) * ( t - p2 ); - } - } - - XYZ::XYZ( const Lab& lab ) - { - y = white.y * revlab( 1.f/116.f * ( lab.L + 16 ) ); - x = white.x * revlab( 1.f/116.f * ( lab.L + 16 ) + 1.f/500.f * lab.a ); - z = white.z * revlab( 1.f/116.f * ( lab.L + 16 ) - 1.f/200.f * lab.b ); - } - - v3i XYZ::RGB() const - { - const float rl = 3.2406f * x - 1.5372f * y - 0.4986f * z; - const float gl = -0.9689f * x + 1.8758f * y + 0.0415f * z; - const float bl = 0.0557f * x - 0.2040f * y + 1.0570f * z; - - const float r = linear2sRGB( rl ); - const float g = linear2sRGB( gl ); - const float b = linear2sRGB( bl ); - - return v3i( clampu8( int32_t( r * 255 ) ), clampu8( int32_t( g * 255 ) ), clampu8( int32_t( b * 255 ) ) ); - } - - - Lab::Lab() - : L( 0 ) - , a( 0 ) - , b( 0 ) - { - } - - Lab::Lab( float L, float a, float b ) - : L( L ) - , a( a ) - , b( b ) - { - } - - static float labfunc( float t ) - { - const float p1 = (6.f/29.f)*(6.f/29.f)*(6.f/29.f); - const float p2 = (1.f/3.f)*(29.f/6.f)*(29.f/6.f); - const float p3 = (4.f/29.f); - - if( t > p1 ) - { - return pow( t, 1.f/3.f ); - } - else - { - return p2 * t + p3; - } - } - - Lab::Lab( const XYZ& xyz ) - { - L = 116 * labfunc( xyz.y * rwhite.y ) - 16; - a = 500 * ( labfunc( xyz.x * rwhite.x ) - labfunc( xyz.y * rwhite.y ) ); - b = 200 * ( labfunc( xyz.y * rwhite.y ) - labfunc( xyz.z * rwhite.z ) ); - } - - Lab::Lab( const v3b& rgb ) - { - new(this) Lab( XYZ( rgb ) ); - } - -} diff --git a/thirdparty/etcpak/ColorSpace.hpp b/thirdparty/etcpak/ColorSpace.hpp deleted file mode 100644 index c9d0a9cf3f..0000000000 --- a/thirdparty/etcpak/ColorSpace.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef __DARKRL__COLORSPACE_HPP__ -#define __DARKRL__COLORSPACE_HPP__ - -#include "Vector.hpp" - -namespace Color -{ - - class Lab; - - class XYZ - { - public: - XYZ( float x, float y, float z ); - XYZ( const v3b& rgb ); - XYZ( const Lab& lab ); - - v3i RGB() const; - - float x, y, z; - }; - - class Lab - { - public: - Lab(); - Lab( float L, float a, float b ); - Lab( const XYZ& xyz ); - Lab( const v3b& rgb ); - - float L, a, b; - }; - -} - -#endif diff --git a/thirdparty/etcpak/DataProvider.cpp b/thirdparty/etcpak/DataProvider.cpp deleted file mode 100644 index 6bd4b105ed..0000000000 --- a/thirdparty/etcpak/DataProvider.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include <assert.h> -#include <utility> - -#include "BitmapDownsampled.hpp" -#include "DataProvider.hpp" -#include "MipMap.hpp" - -DataProvider::DataProvider( const char* fn, bool mipmap, bool bgr ) - : m_offset( 0 ) - , m_mipmap( mipmap ) - , m_done( false ) - , m_lines( 32 ) -{ - m_bmp.emplace_back( new Bitmap( fn, m_lines, bgr ) ); - m_current = m_bmp[0].get(); -} - -DataProvider::~DataProvider() -{ -} - -unsigned int DataProvider::NumberOfParts() const -{ - unsigned int parts = ( ( m_bmp[0]->Size().y / 4 ) + m_lines - 1 ) / m_lines; - - if( m_mipmap ) - { - v2i current = m_bmp[0]->Size(); - int levels = NumberOfMipLevels( current ); - unsigned int lines = m_lines; - for( int i=1; i<levels; i++ ) - { - assert( current.x != 1 || current.y != 1 ); - current.x = std::max( 1, current.x / 2 ); - current.y = std::max( 1, current.y / 2 ); - lines *= 2; - parts += ( ( std::max( 4, current.y ) / 4 ) + lines - 1 ) / lines; - } - assert( current.x == 1 && current.y == 1 ); - } - - return parts; -} - -DataPart DataProvider::NextPart() -{ - assert( !m_done ); - - unsigned int lines = m_lines; - bool done; - - const auto ptr = m_current->NextBlock( lines, done ); - DataPart ret = { - ptr, - std::max<unsigned int>( 4, m_current->Size().x ), - lines, - m_offset - }; - - m_offset += m_current->Size().x / 4 * lines; - - if( done ) - { - if( m_mipmap && ( m_current->Size().x != 1 || m_current->Size().y != 1 ) ) - { - m_lines *= 2; - m_bmp.emplace_back( new BitmapDownsampled( *m_current, m_lines ) ); - m_current = m_bmp[m_bmp.size()-1].get(); - } - else - { - m_done = true; - } - } - - return ret; -} diff --git a/thirdparty/etcpak/DataProvider.hpp b/thirdparty/etcpak/DataProvider.hpp deleted file mode 100644 index e773801ed6..0000000000 --- a/thirdparty/etcpak/DataProvider.hpp +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef __DATAPROVIDER_HPP__ -#define __DATAPROVIDER_HPP__ - -#include <memory> -#include <stdint.h> -#include <vector> - -#include "Bitmap.hpp" - -struct DataPart -{ - const uint32_t* src; - unsigned int width; - unsigned int lines; - unsigned int offset; -}; - -class DataProvider -{ -public: - DataProvider( const char* fn, bool mipmap, bool bgr ); - ~DataProvider(); - - unsigned int NumberOfParts() const; - - DataPart NextPart(); - - bool Alpha() const { return m_bmp[0]->Alpha(); } - const v2i& Size() const { return m_bmp[0]->Size(); } - const Bitmap& ImageData() const { return *m_bmp[0]; } - -private: - std::vector<std::unique_ptr<Bitmap>> m_bmp; - Bitmap* m_current; - unsigned int m_offset; - unsigned int m_lines; - bool m_mipmap; - bool m_done; -}; - -#endif diff --git a/thirdparty/etcpak/Debug.cpp b/thirdparty/etcpak/Debug.cpp deleted file mode 100644 index 72dc4e0526..0000000000 --- a/thirdparty/etcpak/Debug.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include <algorithm> -#include <vector> -#include "Debug.hpp" - -static std::vector<DebugLog::Callback*> s_callbacks; - -void DebugLog::Message( const char* msg ) -{ - for( auto it = s_callbacks.begin(); it != s_callbacks.end(); ++it ) - { - (*it)->OnDebugMessage( msg ); - } -} - -void DebugLog::AddCallback( Callback* c ) -{ - const auto it = std::find( s_callbacks.begin(), s_callbacks.end(), c ); - if( it == s_callbacks.end() ) - { - s_callbacks.push_back( c ); - } -} - -void DebugLog::RemoveCallback( Callback* c ) -{ - const auto it = std::find( s_callbacks.begin(), s_callbacks.end(), c ); - if( it != s_callbacks.end() ) - { - s_callbacks.erase( it ); - } -} diff --git a/thirdparty/etcpak/Debug.hpp b/thirdparty/etcpak/Debug.hpp deleted file mode 100644 index 524eaa7baf..0000000000 --- a/thirdparty/etcpak/Debug.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef __DARKRL__DEBUG_HPP__ -#define __DARKRL__DEBUG_HPP__ - -#ifdef DEBUG -# include <sstream> -# define DBGPRINT(msg) { std::stringstream __buf; __buf << msg; DebugLog::Message( __buf.str().c_str() ); } -#else -# define DBGPRINT(msg) ((void)0) -#endif - -class DebugLog -{ -public: - struct Callback - { - virtual void OnDebugMessage( const char* msg ) = 0; - }; - - static void Message( const char* msg ); - static void AddCallback( Callback* c ); - static void RemoveCallback( Callback* c ); - -private: - DebugLog() {} -}; - -#endif diff --git a/thirdparty/etcpak/Error.cpp b/thirdparty/etcpak/Error.cpp deleted file mode 100644 index 014ecdab66..0000000000 --- a/thirdparty/etcpak/Error.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include <stdint.h> - -#include "Error.hpp" -#include "Math.hpp" - -float CalcMSE3( const Bitmap& bmp, const Bitmap& out ) -{ - float err = 0; - - const uint32_t* p1 = bmp.Data(); - const uint32_t* p2 = out.Data(); - size_t cnt = bmp.Size().x * bmp.Size().y; - - for( size_t i=0; i<cnt; i++ ) - { - uint32_t c1 = *p1++; - uint32_t c2 = *p2++; - - err += sq( ( c1 & 0x000000FF ) - ( c2 & 0x000000FF ) ); - err += sq( ( ( c1 & 0x0000FF00 ) >> 8 ) - ( ( c2 & 0x0000FF00 ) >> 8 ) ); - err += sq( ( ( c1 & 0x00FF0000 ) >> 16 ) - ( ( c2 & 0x00FF0000 ) >> 16 ) ); - } - - err /= cnt * 3; - - return err; -} - -float CalcMSE1( const Bitmap& bmp, const Bitmap& out ) -{ - float err = 0; - - const uint32_t* p1 = bmp.Data(); - const uint32_t* p2 = out.Data(); - size_t cnt = bmp.Size().x * bmp.Size().y; - - for( size_t i=0; i<cnt; i++ ) - { - uint32_t c1 = *p1++; - uint32_t c2 = *p2++; - - err += sq( ( c1 >> 24 ) - ( c2 & 0xFF ) ); - } - - err /= cnt; - - return err; -} diff --git a/thirdparty/etcpak/Error.hpp b/thirdparty/etcpak/Error.hpp deleted file mode 100644 index 9817754b74..0000000000 --- a/thirdparty/etcpak/Error.hpp +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ERROR_HPP__ -#define __ERROR_HPP__ - -#include "Bitmap.hpp" - -float CalcMSE3( const Bitmap& bmp, const Bitmap& out ); -float CalcMSE1( const Bitmap& bmp, const Bitmap& out ); - -#endif diff --git a/thirdparty/etcpak/MipMap.hpp b/thirdparty/etcpak/MipMap.hpp deleted file mode 100644 index d3b4bc9e7c..0000000000 --- a/thirdparty/etcpak/MipMap.hpp +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __MIPMAP_HPP__ -#define __MIPMAP_HPP__ - -#include "Vector.hpp" - -inline int NumberOfMipLevels( const v2i& size ) -{ - return (int)floor( log2( std::max( size.x, size.y ) ) ) + 1; -} - -#endif diff --git a/thirdparty/etcpak/Semaphore.hpp b/thirdparty/etcpak/Semaphore.hpp deleted file mode 100644 index 9e42dbb9e0..0000000000 --- a/thirdparty/etcpak/Semaphore.hpp +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef __DARKRL__SEMAPHORE_HPP__ -#define __DARKRL__SEMAPHORE_HPP__ - -#include <condition_variable> -#include <mutex> - -class Semaphore -{ -public: - Semaphore( int count ) : m_count( count ) {} - - void lock() - { - std::unique_lock<std::mutex> lock( m_mutex ); - m_cv.wait( lock, [this](){ return m_count != 0; } ); - m_count--; - } - - void unlock() - { - std::lock_guard<std::mutex> lock( m_mutex ); - m_count++; - m_cv.notify_one(); - } - - bool try_lock() - { - std::lock_guard<std::mutex> lock( m_mutex ); - if( m_count == 0 ) - { - return false; - } - else - { - m_count--; - return true; - } - } - -private: - std::mutex m_mutex; - std::condition_variable m_cv; - unsigned int m_count; -}; - -#endif diff --git a/thirdparty/etcpak/System.cpp b/thirdparty/etcpak/System.cpp deleted file mode 100644 index 041f2676e8..0000000000 --- a/thirdparty/etcpak/System.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include <algorithm> -#ifdef _WIN32 -# include <windows.h> -#else -# include <unistd.h> -#endif - -#include "System.hpp" - -unsigned int System::CPUCores() -{ - static unsigned int cores = 0; - if( cores == 0 ) - { - int tmp; -#ifdef _WIN32 - SYSTEM_INFO info; - GetSystemInfo( &info ); - tmp = (int)info.dwNumberOfProcessors; -#else -# ifndef _SC_NPROCESSORS_ONLN -# ifdef _SC_NPROC_ONLN -# define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN -# elif defined _SC_CRAY_NCPU -# define _SC_NPROCESSORS_ONLN _SC_CRAY_NCPU -# endif -# endif - tmp = (int)(long)sysconf( _SC_NPROCESSORS_ONLN ); -#endif - cores = (unsigned int)std::max( tmp, 1 ); - } - return cores; -} - -void System::SetThreadName( std::thread& thread, const char* name ) -{ -#ifdef _MSC_VER - const DWORD MS_VC_EXCEPTION=0x406D1388; - -# pragma pack( push, 8 ) - struct THREADNAME_INFO - { - DWORD dwType; - LPCSTR szName; - DWORD dwThreadID; - DWORD dwFlags; - }; -# pragma pack(pop) - - DWORD ThreadId = GetThreadId( static_cast<HANDLE>( thread.native_handle() ) ); - THREADNAME_INFO info; - info.dwType = 0x1000; - info.szName = name; - info.dwThreadID = ThreadId; - info.dwFlags = 0; - - __try - { - RaiseException( MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(ULONG_PTR), (ULONG_PTR*)&info ); - } - __except(EXCEPTION_EXECUTE_HANDLER) - { - } -#endif -} diff --git a/thirdparty/etcpak/System.hpp b/thirdparty/etcpak/System.hpp deleted file mode 100644 index 1a09bb15e1..0000000000 --- a/thirdparty/etcpak/System.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef __DARKRL__SYSTEM_HPP__ -#define __DARKRL__SYSTEM_HPP__ - -#include <thread> - -class System -{ -public: - System() = delete; - - static unsigned int CPUCores(); - static void SetThreadName( std::thread& thread, const char* name ); -}; - -#endif diff --git a/thirdparty/etcpak/TaskDispatch.cpp b/thirdparty/etcpak/TaskDispatch.cpp deleted file mode 100644 index b1ba17953b..0000000000 --- a/thirdparty/etcpak/TaskDispatch.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include <assert.h> -#include <stdio.h> -#ifndef _MSC_VER -#include <pthread.h> -#endif - -#include "Debug.hpp" -#include "System.hpp" -#include "TaskDispatch.hpp" - -static TaskDispatch* s_instance = nullptr; - -TaskDispatch::TaskDispatch( size_t workers ) - : m_exit( false ) - , m_jobs( 0 ) -{ - assert( !s_instance ); - s_instance = this; - - assert( workers >= 1 ); - workers--; - - m_workers.reserve( workers ); - for( size_t i=0; i<workers; i++ ) - { - char tmp[16]; - sprintf( tmp, "Worker %zu", i ); -#ifdef _MSC_VER - auto worker = std::thread( [this]{ Worker(); } ); - System::SetThreadName( worker, tmp ); -#else // Using pthread. - auto worker = std::thread( [this, tmp]{ -#ifdef __APPLE__ - pthread_setname_np( tmp ); -#else // Linux or MinGW. - pthread_setname_np( pthread_self(), tmp ); -#endif - Worker(); - } ); -#endif - m_workers.emplace_back( std::move( worker ) ); - } - - DBGPRINT( "Task dispatcher with " << m_workers.size() + 1 << " workers" ); -} - -TaskDispatch::~TaskDispatch() -{ - m_exit = true; - m_queueLock.lock(); - m_cvWork.notify_all(); - m_queueLock.unlock(); - - for( auto& worker : m_workers ) - { - worker.join(); - } - - assert( s_instance ); - s_instance = nullptr; -} - -void TaskDispatch::Queue( const std::function<void(void)>& f ) -{ - std::unique_lock<std::mutex> lock( s_instance->m_queueLock ); - s_instance->m_queue.emplace_back( f ); - const auto size = s_instance->m_queue.size(); - lock.unlock(); - if( size > 1 ) - { - s_instance->m_cvWork.notify_one(); - } -} - -void TaskDispatch::Queue( std::function<void(void)>&& f ) -{ - std::unique_lock<std::mutex> lock( s_instance->m_queueLock ); - s_instance->m_queue.emplace_back( std::move( f ) ); - const auto size = s_instance->m_queue.size(); - lock.unlock(); - if( size > 1 ) - { - s_instance->m_cvWork.notify_one(); - } -} - -void TaskDispatch::Sync() -{ - std::unique_lock<std::mutex> lock( s_instance->m_queueLock ); - while( !s_instance->m_queue.empty() ) - { - auto f = s_instance->m_queue.back(); - s_instance->m_queue.pop_back(); - lock.unlock(); - f(); - lock.lock(); - } - s_instance->m_cvJobs.wait( lock, []{ return s_instance->m_jobs == 0; } ); -} - -void TaskDispatch::Worker() -{ - for(;;) - { - std::unique_lock<std::mutex> lock( m_queueLock ); - m_cvWork.wait( lock, [this]{ return !m_queue.empty() || m_exit; } ); - if( m_exit ) return; - auto f = m_queue.back(); - m_queue.pop_back(); - m_jobs++; - lock.unlock(); - f(); - lock.lock(); - m_jobs--; - bool notify = m_jobs == 0 && m_queue.empty(); - lock.unlock(); - if( notify ) - { - m_cvJobs.notify_all(); - } - } -} diff --git a/thirdparty/etcpak/TaskDispatch.hpp b/thirdparty/etcpak/TaskDispatch.hpp deleted file mode 100644 index b513de4c0c..0000000000 --- a/thirdparty/etcpak/TaskDispatch.hpp +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __DARKRL__TASKDISPATCH_HPP__ -#define __DARKRL__TASKDISPATCH_HPP__ - -#include <atomic> -#include <condition_variable> -#include <functional> -#include <mutex> -#include <thread> -#include <vector> - -class TaskDispatch -{ -public: - TaskDispatch( size_t workers ); - ~TaskDispatch(); - - static void Queue( const std::function<void(void)>& f ); - static void Queue( std::function<void(void)>&& f ); - - static void Sync(); - -private: - void Worker(); - - std::vector<std::function<void(void)>> m_queue; - std::mutex m_queueLock; - std::condition_variable m_cvWork, m_cvJobs; - std::atomic<bool> m_exit; - size_t m_jobs; - - std::vector<std::thread> m_workers; -}; - -#endif diff --git a/thirdparty/etcpak/Timing.cpp b/thirdparty/etcpak/Timing.cpp deleted file mode 100644 index 2af851f9a9..0000000000 --- a/thirdparty/etcpak/Timing.cpp +++ /dev/null @@ -1,8 +0,0 @@ -#include <chrono> - -#include "Timing.hpp" - -uint64_t GetTime() -{ - return std::chrono::time_point_cast<std::chrono::microseconds>( std::chrono::high_resolution_clock::now() ).time_since_epoch().count(); -} diff --git a/thirdparty/etcpak/Timing.hpp b/thirdparty/etcpak/Timing.hpp deleted file mode 100644 index 3767e20f24..0000000000 --- a/thirdparty/etcpak/Timing.hpp +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __DARKRL__TIMING_HPP__ -#define __DARKRL__TIMING_HPP__ - -#include <stdint.h> - -uint64_t GetTime(); - -#endif diff --git a/thirdparty/etcpak/lz4/lz4.c b/thirdparty/etcpak/lz4/lz4.c deleted file mode 100644 index 08cf6b5cd7..0000000000 --- a/thirdparty/etcpak/lz4/lz4.c +++ /dev/null @@ -1,1516 +0,0 @@ -/* - LZ4 - Fast LZ compression algorithm - Copyright (C) 2011-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 source repository : https://github.com/Cyan4973/lz4 - - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c -*/ - - -/************************************** -* Tuning parameters -**************************************/ -/* - * HEAPMODE : - * Select how default compression functions will allocate memory for their hash table, - * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). - */ -#define HEAPMODE 0 - -/* - * ACCELERATION_DEFAULT : - * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 - */ -#define ACCELERATION_DEFAULT 1 - - -/************************************** -* CPU Feature Detection -**************************************/ -/* - * LZ4_FORCE_SW_BITCOUNT - * Define this parameter if your target system or compiler does not support hardware bit count - */ -#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ -# define LZ4_FORCE_SW_BITCOUNT -#endif - - -/************************************** -* Includes -**************************************/ -#include "lz4.h" - - -/************************************** -* Compiler Options -**************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include <intrin.h> -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */ -#else -# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -# if defined(__GNUC__) || defined(__clang__) -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif /* _MSC_VER */ - -/* LZ4_GCC_VERSION is defined into lz4.h */ -#if (LZ4_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) -# define expect(expr,value) (__builtin_expect ((expr),(value)) ) -#else -# define expect(expr,value) (expr) -#endif - -#define likely(expr) expect((expr) != 0, 1) -#define unlikely(expr) expect((expr) != 0, 0) - - -/************************************** -* Memory routines -**************************************/ -#include <stdlib.h> /* malloc, calloc, free */ -#define ALLOCATOR(n,s) calloc(n,s) -#define FREEMEM free -#include <string.h> /* memset, memcpy */ -#define MEM_INIT memset - - -/************************************** -* Basic Types -**************************************/ -#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -# include <stdint.h> - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; -#endif - - -/************************************** -* Reading and writing into memory -**************************************/ -#define STEPSIZE sizeof(size_t) - -static unsigned LZ4_64bits(void) { return sizeof(void*)==8; } - -static unsigned LZ4_isLittleEndian(void) -{ - const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} - - -static U16 LZ4_read16(const void* memPtr) -{ - U16 val16; - memcpy(&val16, memPtr, 2); - return val16; -} - -static U16 LZ4_readLE16(const void* memPtr) -{ - if (LZ4_isLittleEndian()) - { - return LZ4_read16(memPtr); - } - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U16)((U16)p[0] + (p[1]<<8)); - } -} - -static void LZ4_writeLE16(void* memPtr, U16 value) -{ - if (LZ4_isLittleEndian()) - { - memcpy(memPtr, &value, 2); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE) value; - p[1] = (BYTE)(value>>8); - } -} - -static U32 LZ4_read32(const void* memPtr) -{ - U32 val32; - memcpy(&val32, memPtr, 4); - return val32; -} - -static U64 LZ4_read64(const void* memPtr) -{ - U64 val64; - memcpy(&val64, memPtr, 8); - return val64; -} - -static size_t LZ4_read_ARCH(const void* p) -{ - if (LZ4_64bits()) - return (size_t)LZ4_read64(p); - else - return (size_t)LZ4_read32(p); -} - - -static void LZ4_copy4(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 4); } - -static void LZ4_copy8(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 8); } - -/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */ -static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) -{ - BYTE* d = (BYTE*)dstPtr; - const BYTE* s = (const BYTE*)srcPtr; - BYTE* e = (BYTE*)dstEnd; - do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e); -} - - -/************************************** -* Common Constants -**************************************/ -#define MINMATCH 4 - -#define COPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (COPYLENGTH+MINMATCH) -static const int LZ4_minLength = (MFLIMIT+1); - -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -#define MAXD_LOG 16 -#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) - -#define ML_BITS 4 -#define ML_MASK ((1U<<ML_BITS)-1) -#define RUN_BITS (8-ML_BITS) -#define RUN_MASK ((1U<<RUN_BITS)-1) - - -/************************************** -* Common Utils -**************************************/ -#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ - - -/************************************** -* Common functions -**************************************/ -static unsigned LZ4_NbCommonBytes (register size_t val) -{ - if (LZ4_isLittleEndian()) - { - if (LZ4_64bits()) - { -# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanForward64( &r, (U64)val ); - return (int)(r>>3); -# elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } - else /* 32 bits */ - { -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, (U32)val ); - return (int)(r>>3); -# elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } - else /* Big Endian CPU */ - { - if (LZ4_64bits()) - { -# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); -# elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clzll((U64)val) >> 3); -# else - unsigned r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } - else /* 32 bits */ - { -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); -# elif (defined(__clang__) || (LZ4_GCC_VERSION >= 304)) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } - } -} - -static unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) -{ - const BYTE* const pStart = pIn; - - while (likely(pIn<pInLimit-(STEPSIZE-1))) - { - size_t diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); - if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } - pIn += LZ4_NbCommonBytes(diff); - return (unsigned)(pIn - pStart); - } - - if (LZ4_64bits()) if ((pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } - if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } - if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; - return (unsigned)(pIn - pStart); -} - - -#ifndef LZ4_COMMONDEFS_ONLY -/************************************** -* Local Constants -**************************************/ -#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) -#define HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) -#define HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ - -static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1)); -static const U32 LZ4_skipTrigger = 6; /* Increase this value ==> compression run slower on incompressible data */ - - -/************************************** -* Local Structures and types -**************************************/ -typedef struct { - U32 hashTable[HASH_SIZE_U32]; - U32 currentOffset; - U32 initCheck; - const BYTE* dictionary; - BYTE* bufferStart; /* obsolete, used for slideInputBuffer */ - U32 dictSize; -} LZ4_stream_t_internal; - -typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive; -typedef enum { byPtr, byU32, byU16 } tableType_t; - -typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; -typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; - -typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; -typedef enum { full = 0, partial = 1 } earlyEnd_directive; - - -/************************************** -* Local Utils -**************************************/ -int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } -int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } -int LZ4_sizeofState() { return LZ4_STREAMSIZE; } - - - -/******************************** -* Compression functions -********************************/ - -static U32 LZ4_hashSequence(U32 sequence, tableType_t const tableType) -{ - if (tableType == byU16) - return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); - else - return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); -} - -static const U64 prime5bytes = 889523592379ULL; -static U32 LZ4_hashSequence64(size_t sequence, tableType_t const tableType) -{ - const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; - const U32 hashMask = (1<<hashLog) - 1; - return ((sequence * prime5bytes) >> (40 - hashLog)) & hashMask; -} - -static U32 LZ4_hashSequenceT(size_t sequence, tableType_t const tableType) -{ - if (LZ4_64bits()) - return LZ4_hashSequence64(sequence, tableType); - return LZ4_hashSequence((U32)sequence, tableType); -} - -static U32 LZ4_hashPosition(const void* p, tableType_t tableType) { return LZ4_hashSequenceT(LZ4_read_ARCH(p), tableType); } - -static void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase) -{ - switch (tableType) - { - case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } - case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } - case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } - } -} - -static void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) -{ - U32 h = LZ4_hashPosition(p, tableType); - LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); -} - -static const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase) -{ - if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; } - if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; } - { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ -} - -static const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) -{ - U32 h = LZ4_hashPosition(p, tableType); - return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); -} - -FORCE_INLINE int LZ4_compress_generic( - void* const ctx, - const char* const source, - char* const dest, - const int inputSize, - const int maxOutputSize, - const limitedOutput_directive outputLimited, - const tableType_t tableType, - const dict_directive dict, - const dictIssue_directive dictIssue, - const U32 acceleration) -{ - LZ4_stream_t_internal* const dictPtr = (LZ4_stream_t_internal*)ctx; - - const BYTE* ip = (const BYTE*) source; - const BYTE* base; - const BYTE* lowLimit; - const BYTE* const lowRefLimit = ip - dictPtr->dictSize; - const BYTE* const dictionary = dictPtr->dictionary; - const BYTE* const dictEnd = dictionary + dictPtr->dictSize; - const size_t dictDelta = dictEnd - (const BYTE*)source; - const BYTE* anchor = (const BYTE*) source; - const BYTE* const iend = ip + inputSize; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = iend - LASTLITERALS; - - BYTE* op = (BYTE*) dest; - BYTE* const olimit = op + maxOutputSize; - - U32 forwardH; - size_t refDelta=0; - - /* Init conditions */ - if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ - switch(dict) - { - case noDict: - default: - base = (const BYTE*)source; - lowLimit = (const BYTE*)source; - break; - case withPrefix64k: - base = (const BYTE*)source - dictPtr->currentOffset; - lowLimit = (const BYTE*)source - dictPtr->dictSize; - break; - case usingExtDict: - base = (const BYTE*)source - dictPtr->currentOffset; - lowLimit = (const BYTE*)source; - break; - } - if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */ - if (inputSize<LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ - - /* First Byte */ - LZ4_putPosition(ip, ctx, tableType, base); - ip++; forwardH = LZ4_hashPosition(ip, tableType); - - /* Main Loop */ - for ( ; ; ) - { - const BYTE* match; - BYTE* token; - { - const BYTE* forwardIp = ip; - unsigned step = 1; - unsigned searchMatchNb = acceleration << LZ4_skipTrigger; - - /* Find a match */ - do { - U32 h = forwardH; - ip = forwardIp; - forwardIp += step; - step = (searchMatchNb++ >> LZ4_skipTrigger); - - if (unlikely(forwardIp > mflimit)) goto _last_literals; - - match = LZ4_getPositionOnHash(h, ctx, tableType, base); - if (dict==usingExtDict) - { - if (match<(const BYTE*)source) - { - refDelta = dictDelta; - lowLimit = dictionary; - } - else - { - refDelta = 0; - lowLimit = (const BYTE*)source; - } - } - forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putPositionOnHash(ip, h, ctx, tableType, base); - - } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0) - || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip)) - || (LZ4_read32(match+refDelta) != LZ4_read32(ip)) ); - } - - /* Catch up */ - while ((ip>anchor) && (match+refDelta > lowLimit) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; } - - { - /* Encode Literal length */ - unsigned litLength = (unsigned)(ip - anchor); - token = op++; - if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit))) - return 0; /* Check output limit */ - if (litLength>=RUN_MASK) - { - int len = (int)litLength-RUN_MASK; - *token=(RUN_MASK<<ML_BITS); - for(; len >= 255 ; len-=255) *op++ = 255; - *op++ = (BYTE)len; - } - else *token = (BYTE)(litLength<<ML_BITS); - - /* Copy Literals */ - LZ4_wildCopy(op, anchor, op+litLength); - op+=litLength; - } - -_next_match: - /* Encode Offset */ - LZ4_writeLE16(op, (U16)(ip-match)); op+=2; - - /* Encode MatchLength */ - { - unsigned matchLength; - - if ((dict==usingExtDict) && (lowLimit==dictionary)) - { - const BYTE* limit; - match += refDelta; - limit = ip + (dictEnd-match); - if (limit > matchlimit) limit = matchlimit; - matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); - ip += MINMATCH + matchLength; - if (ip==limit) - { - unsigned more = LZ4_count(ip, (const BYTE*)source, matchlimit); - matchLength += more; - ip += more; - } - } - else - { - matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); - ip += MINMATCH + matchLength; - } - - if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit))) - return 0; /* Check output limit */ - if (matchLength>=ML_MASK) - { - *token += ML_MASK; - matchLength -= ML_MASK; - for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; } - if (matchLength >= 255) { matchLength-=255; *op++ = 255; } - *op++ = (BYTE)matchLength; - } - else *token += (BYTE)(matchLength); - } - - anchor = ip; - - /* Test end of chunk */ - if (ip > mflimit) break; - - /* Fill table */ - LZ4_putPosition(ip-2, ctx, tableType, base); - - /* Test next position */ - match = LZ4_getPosition(ip, ctx, tableType, base); - if (dict==usingExtDict) - { - if (match<(const BYTE*)source) - { - refDelta = dictDelta; - lowLimit = dictionary; - } - else - { - refDelta = 0; - lowLimit = (const BYTE*)source; - } - } - LZ4_putPosition(ip, ctx, tableType, base); - if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1) - && (match+MAX_DISTANCE>=ip) - && (LZ4_read32(match+refDelta)==LZ4_read32(ip)) ) - { token=op++; *token=0; goto _next_match; } - - /* Prepare next loop */ - forwardH = LZ4_hashPosition(++ip, tableType); - } - -_last_literals: - /* Encode Last Literals */ - { - const size_t lastRun = (size_t)(iend - anchor); - if ((outputLimited) && ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) - return 0; /* Check output limit */ - if (lastRun >= RUN_MASK) - { - size_t accumulator = lastRun - RUN_MASK; - *op++ = RUN_MASK << ML_BITS; - for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; - *op++ = (BYTE) accumulator; - } - else - { - *op++ = (BYTE)(lastRun<<ML_BITS); - } - memcpy(op, anchor, lastRun); - op += lastRun; - } - - /* End */ - return (int) (((char*)op)-dest); -} - - -int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) -{ - LZ4_resetStream((LZ4_stream_t*)state); - if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; - - if (maxOutputSize >= LZ4_compressBound(inputSize)) - { - if (inputSize < LZ4_64Klimit) - return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue, acceleration); - else - return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); - } - else - { - if (inputSize < LZ4_64Klimit) - return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); - else - return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); - } -} - - -int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) -{ -#if (HEAPMODE) - void* ctxPtr = ALLOCATOR(1, sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ -#else - LZ4_stream_t ctx; - void* ctxPtr = &ctx; -#endif - - int result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); - -#if (HEAPMODE) - FREEMEM(ctxPtr); -#endif - return result; -} - - -int LZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize) -{ - return LZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1); -} - - -/* hidden debug function */ -/* strangely enough, gcc generates faster code when this function is uncommented, even if unused */ -int LZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) -{ - LZ4_stream_t ctx; - - LZ4_resetStream(&ctx); - - if (inputSize < LZ4_64Klimit) - return LZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); - else - return LZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); -} - - -/******************************** -* destSize variant -********************************/ - -static int LZ4_compress_destSize_generic( - void* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - const int targetDstSize, - const tableType_t tableType) -{ - const BYTE* ip = (const BYTE*) src; - const BYTE* base = (const BYTE*) src; - const BYTE* lowLimit = (const BYTE*) src; - const BYTE* anchor = ip; - const BYTE* const iend = ip + *srcSizePtr; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = iend - LASTLITERALS; - - BYTE* op = (BYTE*) dst; - BYTE* const oend = op + targetDstSize; - BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */; - BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */); - BYTE* const oMaxSeq = oMaxLit - 1 /* token */; - - U32 forwardH; - - - /* Init conditions */ - if (targetDstSize < 1) return 0; /* Impossible to store anything */ - if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */ - if ((tableType == byU16) && (*srcSizePtr>=LZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */ - if (*srcSizePtr<LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ - - /* First Byte */ - *srcSizePtr = 0; - LZ4_putPosition(ip, ctx, tableType, base); - ip++; forwardH = LZ4_hashPosition(ip, tableType); - - /* Main Loop */ - for ( ; ; ) - { - const BYTE* match; - BYTE* token; - { - const BYTE* forwardIp = ip; - unsigned step = 1; - unsigned searchMatchNb = 1 << LZ4_skipTrigger; - - /* Find a match */ - do { - U32 h = forwardH; - ip = forwardIp; - forwardIp += step; - step = (searchMatchNb++ >> LZ4_skipTrigger); - - if (unlikely(forwardIp > mflimit)) - goto _last_literals; - - match = LZ4_getPositionOnHash(h, ctx, tableType, base); - forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putPositionOnHash(ip, h, ctx, tableType, base); - - } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip)) - || (LZ4_read32(match) != LZ4_read32(ip)) ); - } - - /* Catch up */ - while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; } - - { - /* Encode Literal length */ - unsigned litLength = (unsigned)(ip - anchor); - token = op++; - if (op + ((litLength+240)/255) + litLength > oMaxLit) - { - /* Not enough space for a last match */ - op--; - goto _last_literals; - } - if (litLength>=RUN_MASK) - { - unsigned len = litLength - RUN_MASK; - *token=(RUN_MASK<<ML_BITS); - for(; len >= 255 ; len-=255) *op++ = 255; - *op++ = (BYTE)len; - } - else *token = (BYTE)(litLength<<ML_BITS); - - /* Copy Literals */ - LZ4_wildCopy(op, anchor, op+litLength); - op += litLength; - } - -_next_match: - /* Encode Offset */ - LZ4_writeLE16(op, (U16)(ip-match)); op+=2; - - /* Encode MatchLength */ - { - size_t matchLength; - - matchLength = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); - - if (op + ((matchLength+240)/255) > oMaxMatch) - { - /* Match description too long : reduce it */ - matchLength = (15-1) + (oMaxMatch-op) * 255; - } - //printf("offset %5i, matchLength%5i \n", (int)(ip-match), matchLength + MINMATCH); - ip += MINMATCH + matchLength; - - if (matchLength>=ML_MASK) - { - *token += ML_MASK; - matchLength -= ML_MASK; - while (matchLength >= 255) { matchLength-=255; *op++ = 255; } - *op++ = (BYTE)matchLength; - } - else *token += (BYTE)(matchLength); - } - - anchor = ip; - - /* Test end of block */ - if (ip > mflimit) break; - if (op > oMaxSeq) break; - - /* Fill table */ - LZ4_putPosition(ip-2, ctx, tableType, base); - - /* Test next position */ - match = LZ4_getPosition(ip, ctx, tableType, base); - LZ4_putPosition(ip, ctx, tableType, base); - if ( (match+MAX_DISTANCE>=ip) - && (LZ4_read32(match)==LZ4_read32(ip)) ) - { token=op++; *token=0; goto _next_match; } - - /* Prepare next loop */ - forwardH = LZ4_hashPosition(++ip, tableType); - } - -_last_literals: - /* Encode Last Literals */ - { - size_t lastRunSize = (size_t)(iend - anchor); - if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend) - { - /* adapt lastRunSize to fill 'dst' */ - lastRunSize = (oend-op) - 1; - lastRunSize -= (lastRunSize+240)/255; - } - ip = anchor + lastRunSize; - - if (lastRunSize >= RUN_MASK) - { - size_t accumulator = lastRunSize - RUN_MASK; - *op++ = RUN_MASK << ML_BITS; - for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; - *op++ = (BYTE) accumulator; - } - else - { - *op++ = (BYTE)(lastRunSize<<ML_BITS); - } - memcpy(op, anchor, lastRunSize); - op += lastRunSize; - } - - /* End */ - *srcSizePtr = (int) (((const char*)ip)-src); - return (int) (((char*)op)-dst); -} - - -static int LZ4_compress_destSize_extState (void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize) -{ - LZ4_resetStream((LZ4_stream_t*)state); - - if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) /* compression success is guaranteed */ - { - return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1); - } - else - { - if (*srcSizePtr < LZ4_64Klimit) - return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, byU16); - else - return LZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, LZ4_64bits() ? byU32 : byPtr); - } -} - - -int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) -{ -#if (HEAPMODE) - void* ctx = ALLOCATOR(1, sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ -#else - LZ4_stream_t ctxBody; - void* ctx = &ctxBody; -#endif - - int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize); - -#if (HEAPMODE) - FREEMEM(ctx); -#endif - return result; -} - - - -/******************************** -* Streaming functions -********************************/ - -LZ4_stream_t* LZ4_createStream(void) -{ - LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(8, LZ4_STREAMSIZE_U64); - LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ - LZ4_resetStream(lz4s); - return lz4s; -} - -void LZ4_resetStream (LZ4_stream_t* LZ4_stream) -{ - MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t)); -} - -int LZ4_freeStream (LZ4_stream_t* LZ4_stream) -{ - FREEMEM(LZ4_stream); - return (0); -} - - -#define HASH_UNIT sizeof(size_t) -int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) -{ - LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict; - const BYTE* p = (const BYTE*)dictionary; - const BYTE* const dictEnd = p + dictSize; - const BYTE* base; - - if ((dict->initCheck) || (dict->currentOffset > 1 GB)) /* Uninitialized structure, or reuse overflow */ - LZ4_resetStream(LZ4_dict); - - if (dictSize < (int)HASH_UNIT) - { - dict->dictionary = NULL; - dict->dictSize = 0; - return 0; - } - - if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; - dict->currentOffset += 64 KB; - base = p - dict->currentOffset; - dict->dictionary = p; - dict->dictSize = (U32)(dictEnd - p); - dict->currentOffset += dict->dictSize; - - while (p <= dictEnd-HASH_UNIT) - { - LZ4_putPosition(p, dict->hashTable, byU32, base); - p+=3; - } - - return dict->dictSize; -} - - -static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src) -{ - if ((LZ4_dict->currentOffset > 0x80000000) || - ((size_t)LZ4_dict->currentOffset > (size_t)src)) /* address space overflow */ - { - /* rescale hash table */ - U32 delta = LZ4_dict->currentOffset - 64 KB; - const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; - int i; - for (i=0; i<HASH_SIZE_U32; i++) - { - if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0; - else LZ4_dict->hashTable[i] -= delta; - } - LZ4_dict->currentOffset = 64 KB; - if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; - LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; - } -} - - -int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) -{ - LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_stream; - const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; - - const BYTE* smallest = (const BYTE*) source; - if (streamPtr->initCheck) return 0; /* Uninitialized structure detected */ - if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd; - LZ4_renormDictT(streamPtr, smallest); - if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; - - /* Check overlapping input/dictionary space */ - { - const BYTE* sourceEnd = (const BYTE*) source + inputSize; - if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd)) - { - streamPtr->dictSize = (U32)(dictEnd - sourceEnd); - if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; - if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; - streamPtr->dictionary = dictEnd - streamPtr->dictSize; - } - } - - /* prefix mode : source data follows dictionary */ - if (dictEnd == (const BYTE*)source) - { - int result; - if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration); - else - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration); - streamPtr->dictSize += (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; - return result; - } - - /* external dictionary mode */ - { - int result; - if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration); - else - result = LZ4_compress_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration); - streamPtr->dictionary = (const BYTE*)source; - streamPtr->dictSize = (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; - return result; - } -} - - -/* Hidden debug function, to force external dictionary mode */ -int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize) -{ - LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict; - int result; - const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize; - - const BYTE* smallest = dictEnd; - if (smallest > (const BYTE*) source) smallest = (const BYTE*) source; - LZ4_renormDictT((LZ4_stream_t_internal*)LZ4_dict, smallest); - - result = LZ4_compress_generic(LZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); - - streamPtr->dictionary = (const BYTE*)source; - streamPtr->dictSize = (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; - - return result; -} - - -int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) -{ - LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict; - const BYTE* previousDictEnd = dict->dictionary + dict->dictSize; - - if ((U32)dictSize > 64 KB) dictSize = 64 KB; /* useless to define a dictionary > 64 KB */ - if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize; - - memmove(safeBuffer, previousDictEnd - dictSize, dictSize); - - dict->dictionary = (const BYTE*)safeBuffer; - dict->dictSize = (U32)dictSize; - - return dictSize; -} - - - -/******************************* -* Decompression functions -*******************************/ -/* - * This generic decompression function cover all use cases. - * It shall be instantiated several times, using different sets of directives - * Note that it is essential this generic function is really inlined, - * in order to remove useless branches during compilation optimization. - */ -FORCE_INLINE int LZ4_decompress_generic( - const char* const source, - char* const dest, - int inputSize, - int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */ - - int endOnInput, /* endOnOutputSize, endOnInputSize */ - int partialDecoding, /* full, partial */ - int targetOutputSize, /* only used if partialDecoding==partial */ - int dict, /* noDict, withPrefix64k, usingExtDict */ - const BYTE* const lowPrefix, /* == dest if dict == noDict */ - const BYTE* const dictStart, /* only if dict==usingExtDict */ - const size_t dictSize /* note : = 0 if noDict */ - ) -{ - /* Local Variables */ - const BYTE* ip = (const BYTE*) source; - const BYTE* const iend = ip + inputSize; - - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + outputSize; - BYTE* cpy; - BYTE* oexit = op + targetOutputSize; - const BYTE* const lowLimit = lowPrefix - dictSize; - - const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize; - const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4}; - const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; - - const int safeDecode = (endOnInput==endOnInputSize); - const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); - - - /* Special cases */ - if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ - if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ - if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); - - - /* Main Loop */ - while (1) - { - unsigned token; - size_t length; - const BYTE* match; - - /* get literal length */ - token = *ip++; - if ((length=(token>>ML_BITS)) == RUN_MASK) - { - unsigned s; - do - { - s = *ip++; - length += s; - } - while (likely((endOnInput)?ip<iend-RUN_MASK:1) && (s==255)); - if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error; /* overflow detection */ - if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error; /* overflow detection */ - } - - /* copy literals */ - cpy = op+length; - if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) - || ((!endOnInput) && (cpy>oend-COPYLENGTH))) - { - if (partialDecoding) - { - if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */ - if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */ - } - else - { - if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ - if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */ - } - memcpy(op, ip, length); - ip += length; - op += length; - break; /* Necessarily EOF, due to parsing restrictions */ - } - LZ4_wildCopy(op, ip, cpy); - ip += length; op = cpy; - - /* get offset */ - match = cpy - LZ4_readLE16(ip); ip+=2; - if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside destination buffer */ - - /* get matchlength */ - length = token & ML_MASK; - if (length == ML_MASK) - { - unsigned s; - do - { - if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error; - s = *ip++; - length += s; - } while (s==255); - if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error; /* overflow detection */ - } - length += MINMATCH; - - /* check external dictionary */ - if ((dict==usingExtDict) && (match < lowPrefix)) - { - if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error; /* doesn't respect parsing restriction */ - - if (length <= (size_t)(lowPrefix-match)) - { - /* match can be copied as a single segment from external dictionary */ - match = dictEnd - (lowPrefix-match); - memmove(op, match, length); op += length; - } - else - { - /* match encompass external dictionary and current segment */ - size_t copySize = (size_t)(lowPrefix-match); - memcpy(op, dictEnd - copySize, copySize); - op += copySize; - copySize = length - copySize; - if (copySize > (size_t)(op-lowPrefix)) /* overlap within current segment */ - { - BYTE* const endOfMatch = op + copySize; - const BYTE* copyFrom = lowPrefix; - while (op < endOfMatch) *op++ = *copyFrom++; - } - else - { - memcpy(op, lowPrefix, copySize); - op += copySize; - } - } - continue; - } - - /* copy repeated sequence */ - cpy = op + length; - if (unlikely((op-match)<8)) - { - const size_t dec64 = dec64table[op-match]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[op-match]; - LZ4_copy4(op+4, match); - op += 8; match -= dec64; - } else { LZ4_copy8(op, match); op+=8; match+=8; } - - if (unlikely(cpy>oend-12)) - { - if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals */ - if (op < oend-8) - { - LZ4_wildCopy(op, match, oend-8); - match += (oend-8) - op; - op = oend-8; - } - while (op<cpy) *op++ = *match++; - } - else - LZ4_wildCopy(op, match, cpy); - op=cpy; /* correction */ - } - - /* end of decoding */ - if (endOnInput) - return (int) (((char*)op)-dest); /* Nb of output bytes decoded */ - else - return (int) (((const char*)ip)-source); /* Nb of input bytes read */ - - /* Overflow error detected */ -_output_error: - return (int) (-(((const char*)ip)-source))-1; -} - - -int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0); -} - -int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0); -} - -int LZ4_decompress_fast(const char* source, char* dest, int originalSize) -{ - return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB); -} - - -/* streaming decompression functions */ - -typedef struct -{ - const BYTE* externalDict; - size_t extDictSize; - const BYTE* prefixEnd; - size_t prefixSize; -} LZ4_streamDecode_t_internal; - -/* - * If you prefer dynamic allocation methods, - * LZ4_createStreamDecode() - * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure. - */ -LZ4_streamDecode_t* LZ4_createStreamDecode(void) -{ - LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(1, sizeof(LZ4_streamDecode_t)); - return lz4s; -} - -int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) -{ - FREEMEM(LZ4_stream); - return 0; -} - -/* - * LZ4_setStreamDecode - * Use this function to instruct where to find the dictionary - * This function is not necessary if previous data is still available where it was decoded. - * Loading a size of 0 is allowed (same effect as no dictionary). - * Return : 1 if OK, 0 if error - */ -int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) -{ - LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode; - lz4sd->prefixSize = (size_t) dictSize; - lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; - lz4sd->externalDict = NULL; - lz4sd->extDictSize = 0; - return 1; -} - -/* -*_continue() : - These decoding functions allow decompression of multiple blocks in "streaming" mode. - Previously decoded blocks must still be available at the memory position where they were decoded. - If it's not possible, save the relevant part of decoded data into a safe buffer, - and indicate where it stands using LZ4_setStreamDecode() -*/ -int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) -{ - LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode; - int result; - - if (lz4sd->prefixEnd == (BYTE*)dest) - { - result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize += result; - lz4sd->prefixEnd += result; - } - else - { - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize = result; - lz4sd->prefixEnd = (BYTE*)dest + result; - } - - return result; -} - -int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) -{ - LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode; - int result; - - if (lz4sd->prefixEnd == (BYTE*)dest) - { - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize += originalSize; - lz4sd->prefixEnd += originalSize; - } - else - { - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize = originalSize; - lz4sd->prefixEnd = (BYTE*)dest + originalSize; - } - - return result; -} - - -/* -Advanced decoding functions : -*_usingDict() : - These decoding functions work the same as "_continue" ones, - the dictionary must be explicitly provided within parameters -*/ - -FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize) -{ - if (dictSize==0) - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0); - if (dictStart+dictSize == dest) - { - if (dictSize >= (int)(64 KB - 1)) - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0); - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0); - } - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); -} - -int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) -{ - return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize); -} - -int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) -{ - return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize); -} - -/* debug function */ -int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); -} - - -/*************************************************** -* Obsolete Functions -***************************************************/ -/* obsolete compression functions */ -int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_default(source, dest, inputSize, maxOutputSize); } -int LZ4_compress(const char* source, char* dest, int inputSize) { return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize)); } -int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); } -int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); } -int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, maxDstSize, 1); } -int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); } - -/* -These function names are deprecated and should no longer be used. -They are only provided here for compatibility with older user programs. -- LZ4_uncompress is totally equivalent to LZ4_decompress_fast -- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe -*/ -int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); } -int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); } - - -/* Obsolete Streaming functions */ - -int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; } - -static void LZ4_init(LZ4_stream_t_internal* lz4ds, BYTE* base) -{ - MEM_INIT(lz4ds, 0, LZ4_STREAMSIZE); - lz4ds->bufferStart = base; -} - -int LZ4_resetStreamState(void* state, char* inputBuffer) -{ - if ((((size_t)state) & 3) != 0) return 1; /* Error : pointer is not aligned on 4-bytes boundary */ - LZ4_init((LZ4_stream_t_internal*)state, (BYTE*)inputBuffer); - return 0; -} - -void* LZ4_create (char* inputBuffer) -{ - void* lz4ds = ALLOCATOR(8, LZ4_STREAMSIZE_U64); - LZ4_init ((LZ4_stream_t_internal*)lz4ds, (BYTE*)inputBuffer); - return lz4ds; -} - -char* LZ4_slideInputBuffer (void* LZ4_Data) -{ - LZ4_stream_t_internal* ctx = (LZ4_stream_t_internal*)LZ4_Data; - int dictSize = LZ4_saveDict((LZ4_stream_t*)LZ4_Data, (char*)ctx->bufferStart, 64 KB); - return (char*)(ctx->bufferStart + dictSize); -} - -/* Obsolete streaming decompression functions */ - -int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB); -} - -int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) -{ - return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB); -} - -#endif /* LZ4_COMMONDEFS_ONLY */ - diff --git a/thirdparty/etcpak/lz4/lz4.h b/thirdparty/etcpak/lz4/lz4.h deleted file mode 100644 index 3e74002256..0000000000 --- a/thirdparty/etcpak/lz4/lz4.h +++ /dev/null @@ -1,360 +0,0 @@ -/* - LZ4 - Fast LZ compression algorithm - Header File - Copyright (C) 2011-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - LZ4 source repository : https://github.com/Cyan4973/lz4 - - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c -*/ -#pragma once - -#if defined (__cplusplus) -extern "C" { -#endif - -/* - * lz4.h provides block compression functions, and gives full buffer control to programmer. - * If you need to generate inter-operable compressed data (respecting LZ4 frame specification), - * and can let the library handle its own memory, please use lz4frame.h instead. -*/ - -/************************************** -* Version -**************************************/ -#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ -#define LZ4_VERSION_MINOR 7 /* for new (non-breaking) interface capabilities */ -#define LZ4_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */ -#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) -int LZ4_versionNumber (void); - -/************************************** -* Tuning parameter -**************************************/ -/* - * LZ4_MEMORY_USAGE : - * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) - * Increasing memory usage improves compression ratio - * Reduced memory usage can improve speed, due to cache effect - * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache - */ -#define LZ4_MEMORY_USAGE 14 - - -/************************************** -* Simple Functions -**************************************/ - -int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize); -int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize); - -/* -LZ4_compress_default() : - Compresses 'sourceSize' bytes from buffer 'source' - into already allocated 'dest' buffer of size 'maxDestSize'. - Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize). - It also runs faster, so it's a recommended setting. - If the function cannot compress 'source' into a more limited 'dest' budget, - compression stops *immediately*, and the function result is zero. - As a consequence, 'dest' content is not valid. - This function never writes outside 'dest' buffer, nor read outside 'source' buffer. - sourceSize : Max supported value is LZ4_MAX_INPUT_VALUE - maxDestSize : full or partial size of buffer 'dest' (which must be already allocated) - return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize) - or 0 if compression fails - -LZ4_decompress_safe() : - compressedSize : is the precise full size of the compressed block. - maxDecompressedSize : is the size of destination buffer, which must be already allocated. - return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize) - If destination buffer is not large enough, decoding will stop and output an error code (<0). - If the source stream is detected malformed, the function will stop decoding and return a negative result. - This function is protected against buffer overflow exploits, including malicious data packets. - It never writes outside output buffer, nor reads outside input buffer. -*/ - - -/************************************** -* Advanced Functions -**************************************/ -#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ -#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) - -/* -LZ4_compressBound() : - Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) - This function is primarily useful for memory allocation purposes (destination buffer size). - Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). - Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize) - inputSize : max supported value is LZ4_MAX_INPUT_SIZE - return : maximum output size in a "worst case" scenario - or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE) -*/ -int LZ4_compressBound(int inputSize); - -/* -LZ4_compress_fast() : - Same as LZ4_compress_default(), but allows to select an "acceleration" factor. - The larger the acceleration value, the faster the algorithm, but also the lesser the compression. - It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. - An acceleration value of "1" is the same as regular LZ4_compress_default() - Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1. -*/ -int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration); - - -/* -LZ4_compress_fast_extState() : - Same compression function, just using an externally allocated memory space to store compression state. - Use LZ4_sizeofState() to know how much memory must be allocated, - and allocate it on 8-bytes boundaries (using malloc() typically). - Then, provide it as 'void* state' to compression function. -*/ -int LZ4_sizeofState(void); -int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration); - - -/* -LZ4_compress_destSize() : - Reverse the logic, by compressing as much data as possible from 'source' buffer - into already allocated buffer 'dest' of size 'targetDestSize'. - This function either compresses the entire 'source' content into 'dest' if it's large enough, - or fill 'dest' buffer completely with as much data as possible from 'source'. - *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'. - New value is necessarily <= old value. - return : Nb bytes written into 'dest' (necessarily <= targetDestSize) - or 0 if compression fails -*/ -int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize); - - -/* -LZ4_decompress_fast() : - originalSize : is the original and therefore uncompressed size - return : the number of bytes read from the source buffer (in other words, the compressed size) - If the source stream is detected malformed, the function will stop decoding and return a negative result. - Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes. - note : This function fully respect memory boundaries for properly formed compressed data. - It is a bit faster than LZ4_decompress_safe(). - However, it does not provide any protection against intentionally modified data stream (malicious input). - Use this function in trusted environment only (data to decode comes from a trusted source). -*/ -int LZ4_decompress_fast (const char* source, char* dest, int originalSize); - -/* -LZ4_decompress_safe_partial() : - This function decompress a compressed block of size 'compressedSize' at position 'source' - into destination buffer 'dest' of size 'maxDecompressedSize'. - The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached, - reducing decompression time. - return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize) - Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller. - Always control how many bytes were decoded. - If the source stream is detected malformed, the function will stop decoding and return a negative result. - This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets -*/ -int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize); - - -/*********************************************** -* Streaming Compression Functions -***********************************************/ -#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4) -#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(long long)) -/* - * LZ4_stream_t - * information structure to track an LZ4 stream. - * important : init this structure content before first use ! - * note : only allocated directly the structure if you are statically linking LZ4 - * If you are using liblz4 as a DLL, please use below construction methods instead. - */ -typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t; - -/* - * LZ4_resetStream - * Use this function to init an allocated LZ4_stream_t structure - */ -void LZ4_resetStream (LZ4_stream_t* streamPtr); - -/* - * LZ4_createStream will allocate and initialize an LZ4_stream_t structure - * LZ4_freeStream releases its memory. - * In the context of a DLL (liblz4), please use these methods rather than the static struct. - * They are more future proof, in case of a change of LZ4_stream_t size. - */ -LZ4_stream_t* LZ4_createStream(void); -int LZ4_freeStream (LZ4_stream_t* streamPtr); - -/* - * LZ4_loadDict - * Use this function to load a static dictionary into LZ4_stream. - * Any previous data will be forgotten, only 'dictionary' will remain in memory. - * Loading a size of 0 is allowed. - * Return : dictionary size, in bytes (necessarily <= 64 KB) - */ -int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); - -/* - * LZ4_compress_fast_continue - * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio. - * Important : Previous data blocks are assumed to still be present and unmodified ! - * 'dst' buffer must be already allocated. - * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. - * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero. - */ -int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration); - -/* - * LZ4_saveDict - * If previously compressed data block is not guaranteed to remain available at its memory location - * save it into a safer place (char* safeBuffer) - * Note : you don't need to call LZ4_loadDict() afterwards, - * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue() - * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error - */ -int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize); - - -/************************************************ -* Streaming Decompression Functions -************************************************/ - -#define LZ4_STREAMDECODESIZE_U64 4 -#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long)) -typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t; -/* - * LZ4_streamDecode_t - * information structure to track an LZ4 stream. - * init this structure content using LZ4_setStreamDecode or memset() before first use ! - * - * In the context of a DLL (liblz4) please prefer usage of construction methods below. - * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future. - * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure - * LZ4_freeStreamDecode releases its memory. - */ -LZ4_streamDecode_t* LZ4_createStreamDecode(void); -int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); - -/* - * LZ4_setStreamDecode - * Use this function to instruct where to find the dictionary. - * Setting a size of 0 is allowed (same effect as reset). - * Return : 1 if OK, 0 if error - */ -int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); - -/* -*_continue() : - These decoding functions allow decompression of multiple blocks in "streaming" mode. - Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB) - In the case of a ring buffers, decoding buffer must be either : - - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions) - In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB). - - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. - maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block. - In which case, encoding and decoding buffers do not need to be synchronized, - and encoding ring buffer can have any size, including small ones ( < 64 KB). - - _At least_ 64 KB + 8 bytes + maxBlockSize. - In which case, encoding and decoding buffers do not need to be synchronized, - and encoding ring buffer can have any size, including larger than decoding buffer. - Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer, - and indicate where it is saved using LZ4_setStreamDecode() -*/ -int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize); -int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize); - - -/* -Advanced decoding functions : -*_usingDict() : - These decoding functions work the same as - a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue() - They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure. -*/ -int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize); -int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize); - - - -/************************************** -* Obsolete Functions -**************************************/ -/* Deprecate Warnings */ -/* Should these warnings messages be a problem, - it is generally possible to disable them, - with -Wno-deprecated-declarations for gcc - or _CRT_SECURE_NO_WARNINGS in Visual for example. - You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */ -#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK -# define LZ4_DEPRECATE_WARNING_DEFBLOCK -# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -# if (LZ4_GCC_VERSION >= 405) || defined(__clang__) -# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) -# elif (LZ4_GCC_VERSION >= 301) -# define LZ4_DEPRECATED(message) __attribute__((deprecated)) -# elif defined(_MSC_VER) -# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) -# else -# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler") -# define LZ4_DEPRECATED(message) -# endif -#endif /* LZ4_DEPRECATE_WARNING_DEFBLOCK */ - -/* Obsolete compression functions */ -/* These functions are planned to start generate warnings by r131 approximately */ -int LZ4_compress (const char* source, char* dest, int sourceSize); -int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize); -int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); -int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); -int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); -int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); - -/* Obsolete decompression functions */ -/* These function names are completely deprecated and must no longer be used. - They are only provided here for compatibility with older programs. - - LZ4_uncompress is the same as LZ4_decompress_fast - - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe - These function prototypes are now disabled; uncomment them only if you really need them. - It is highly recommended to stop using these prototypes and migrate to maintained ones */ -/* int LZ4_uncompress (const char* source, char* dest, int outputSize); */ -/* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */ - -/* Obsolete streaming functions; use new streaming interface whenever possible */ -LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer); -LZ4_DEPRECATED("use LZ4_createStream() instead") int LZ4_sizeofStreamState(void); -LZ4_DEPRECATED("use LZ4_resetStream() instead") int LZ4_resetStreamState(void* state, char* inputBuffer); -LZ4_DEPRECATED("use LZ4_saveDict() instead") char* LZ4_slideInputBuffer (void* state); - -/* Obsolete streaming decoding functions */ -LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); -LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); - - -#if defined (__cplusplus) -} -#endif diff --git a/thirdparty/etcpak/mmap.cpp b/thirdparty/etcpak/mmap.cpp deleted file mode 100644 index c2460ee9e4..0000000000 --- a/thirdparty/etcpak/mmap.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include "mmap.hpp" - -#ifdef _WIN32 -# include <io.h> -# include <windows.h> - -void* mmap( void* addr, size_t length, int prot, int flags, int fd, off_t offset ) -{ - HANDLE hnd; - void* map = nullptr; - - switch( prot ) - { - case PROT_READ: - if( hnd = CreateFileMapping( HANDLE( _get_osfhandle( fd ) ), nullptr, PAGE_READONLY, 0, DWORD( length ), nullptr ) ) - { - map = MapViewOfFile( hnd, FILE_MAP_READ, 0, 0, length ); - CloseHandle( hnd ); - } - break; - case PROT_WRITE: - if( hnd = CreateFileMapping( HANDLE( _get_osfhandle( fd ) ), nullptr, PAGE_READWRITE, 0, DWORD( length ), nullptr ) ) - { - map = MapViewOfFile( hnd, FILE_MAP_WRITE, 0, 0, length ); - CloseHandle( hnd ); - } - break; - } - - return map ? (char*)map + offset : (void*)-1; -} - -int munmap( void* addr, size_t length ) -{ - return UnmapViewOfFile( addr ) != 0 ? 0 : -1; -} - -#endif diff --git a/thirdparty/etcpak/mmap.hpp b/thirdparty/etcpak/mmap.hpp deleted file mode 100644 index e4cfe7759c..0000000000 --- a/thirdparty/etcpak/mmap.hpp +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef __MMAP_HPP__ -#define __MMAP_HPP__ - -#ifndef _WIN32 -# include <sys/mman.h> -#else -# include <string.h> -# include <sys/types.h> - -# define PROT_READ 1 -# define PROT_WRITE 2 -# define MAP_SHARED 0 - -void* mmap( void* addr, size_t length, int prot, int flags, int fd, off_t offset ); -int munmap( void* addr, size_t length ); - -#endif - -#endif diff --git a/thirdparty/etcpak/patches/libpng-unbundle.patch b/thirdparty/etcpak/patches/libpng-unbundle.patch deleted file mode 100644 index e3c07412c6..0000000000 --- a/thirdparty/etcpak/patches/libpng-unbundle.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/thirdparty/etcpak/Bitmap.cpp b/thirdparty/etcpak/Bitmap.cpp -index 6aa36f5caa..ef318318ac 100644 ---- a/thirdparty/etcpak/Bitmap.cpp -+++ b/thirdparty/etcpak/Bitmap.cpp -@@ -3,7 +3,7 @@ - #include <string.h> - #include <assert.h> - --#include "libpng/png.h" -+#include <png.h> - #include "lz4/lz4.h" - - #include "Bitmap.hpp" diff --git a/thirdparty/etcpak/patches/llvm-c++11-narrowing-errors.patch b/thirdparty/etcpak/patches/llvm-c++11-narrowing-errors.patch deleted file mode 100644 index ab0d1e63a2..0000000000 --- a/thirdparty/etcpak/patches/llvm-c++11-narrowing-errors.patch +++ /dev/null @@ -1,64 +0,0 @@ -diff --git a/thirdparty/etcpak/BlockData.cpp b/thirdparty/etcpak/BlockData.cpp -index bd738085f3..395b55246b 100644 ---- a/thirdparty/etcpak/BlockData.cpp -+++ b/thirdparty/etcpak/BlockData.cpp -@@ -334,10 +334,10 @@ static etcpak_force_inline void DecodeT( uint64_t block, uint32_t* dst, uint32_t - const auto c3b = clampu8( cb1 - table59T58H[codeword] ); - - const uint32_t col_tab[4] = { -- cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) | 0xFF000000, -- c2r | ( c2g << 8 ) | ( c2b << 16 ) | 0xFF000000, -- cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) | 0xFF000000, -- c3r | ( c3g << 8 ) | ( c3b << 16 ) | 0xFF000000 -+ uint32_t(cr0 | ( cg0 << 8 ) | ( cb0 << 16 ) | 0xFF000000), -+ uint32_t(c2r | ( c2g << 8 ) | ( c2b << 16 ) | 0xFF000000), -+ uint32_t(cr1 | ( cg1 << 8 ) | ( cb1 << 16 ) | 0xFF000000), -+ uint32_t(c3r | ( c3g << 8 ) | ( c3b << 16 ) | 0xFF000000) - }; - - const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF; -@@ -389,10 +389,10 @@ static etcpak_force_inline void DecodeTAlpha( uint64_t block, uint64_t alpha, ui - const auto c3b = clampu8( cb1 - table59T58H[codeword] ); - - const uint32_t col_tab[4] = { -- cr0 | ( cg0 << 8 ) | ( cb0 << 16 ), -- c2r | ( c2g << 8 ) | ( c2b << 16 ), -- cr1 | ( cg1 << 8 ) | ( cb1 << 16 ), -- c3r | ( c3g << 8 ) | ( c3b << 16 ) -+ uint32_t(cr0 | ( cg0 << 8 ) | ( cb0 << 16 )), -+ uint32_t(c2r | ( c2g << 8 ) | ( c2b << 16 )), -+ uint32_t(cr1 | ( cg1 << 8 ) | ( cb1 << 16 )), -+ uint32_t(c3r | ( c3g << 8 ) | ( c3b << 16 )) - }; - - const uint32_t indexes = ( block >> 32 ) & 0xFFFFFFFF; -@@ -436,10 +436,10 @@ static etcpak_force_inline void DecodeH( uint64_t block, uint32_t* dst, uint32_t - const auto codeword = codeword_hi | codeword_lo; - - const uint32_t col_tab[] = { -- clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 ), -- clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 ), -- clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 ), -- clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ) -+ uint32_t(clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 )), -+ uint32_t(clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 )), -+ uint32_t(clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 )), -+ uint32_t(clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 )) - }; - - for( uint8_t j = 0; j < 4; j++ ) -@@ -483,10 +483,10 @@ static etcpak_force_inline void DecodeHAlpha( uint64_t block, uint64_t alpha, ui - const auto tbl = g_alpha[(alpha >> 48) & 0xF]; - - const uint32_t col_tab[] = { -- clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 ), -- clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 ), -- clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 ), -- clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 ) -+ uint32_t(clampu8( r0 + table59T58H[codeword] ) | ( clampu8( g0 + table59T58H[codeword] ) << 8 ) | ( clampu8( b0 + table59T58H[codeword] ) << 16 )), -+ uint32_t(clampu8( r0 - table59T58H[codeword] ) | ( clampu8( g0 - table59T58H[codeword] ) << 8 ) | ( clampu8( b0 - table59T58H[codeword] ) << 16 )), -+ uint32_t(clampu8( r1 + table59T58H[codeword] ) | ( clampu8( g1 + table59T58H[codeword] ) << 8 ) | ( clampu8( b1 + table59T58H[codeword] ) << 16 )), -+ uint32_t(clampu8( r1 - table59T58H[codeword] ) | ( clampu8( g1 - table59T58H[codeword] ) << 8 ) | ( clampu8( b1 - table59T58H[codeword] ) << 16 )) - }; - - for( uint8_t j = 0; j < 4; j++ ) diff --git a/thirdparty/etcpak/patches/pthread-setname.patch b/thirdparty/etcpak/patches/pthread-setname.patch deleted file mode 100644 index e2b009a1b3..0000000000 --- a/thirdparty/etcpak/patches/pthread-setname.patch +++ /dev/null @@ -1,66 +0,0 @@ -diff --git a/thirdparty/etcpak/System.cpp b/thirdparty/etcpak/System.cpp -index 1383d0ecd0..041f2676e8 100644 ---- a/thirdparty/etcpak/System.cpp -+++ b/thirdparty/etcpak/System.cpp -@@ -2,7 +2,6 @@ - #ifdef _WIN32 - # include <windows.h> - #else --# include <pthread.h> - # include <unistd.h> - #endif - -@@ -35,7 +34,7 @@ unsigned int System::CPUCores() - - void System::SetThreadName( std::thread& thread, const char* name ) - { --#ifdef _WIN32 -+#ifdef _MSC_VER - const DWORD MS_VC_EXCEPTION=0x406D1388; - - # pragma pack( push, 8 ) -@@ -62,7 +61,5 @@ void System::SetThreadName( std::thread& thread, const char* name ) - __except(EXCEPTION_EXECUTE_HANDLER) - { - } --#elif !defined(__APPLE__) -- pthread_setname_np( thread.native_handle(), name ); - #endif - } -diff --git a/thirdparty/etcpak/TaskDispatch.cpp b/thirdparty/etcpak/TaskDispatch.cpp -index 7287da4de2..b1ba17953b 100644 ---- a/thirdparty/etcpak/TaskDispatch.cpp -+++ b/thirdparty/etcpak/TaskDispatch.cpp -@@ -1,5 +1,8 @@ - #include <assert.h> - #include <stdio.h> -+#ifndef _MSC_VER -+#include <pthread.h> -+#endif - - #include "Debug.hpp" - #include "System.hpp" -@@ -22,15 +25,19 @@ TaskDispatch::TaskDispatch( size_t workers ) - { - char tmp[16]; - sprintf( tmp, "Worker %zu", i ); --#ifdef __APPLE__ -+#ifdef _MSC_VER -+ auto worker = std::thread( [this]{ Worker(); } ); -+ System::SetThreadName( worker, tmp ); -+#else // Using pthread. - auto worker = std::thread( [this, tmp]{ -+#ifdef __APPLE__ - pthread_setname_np( tmp ); -+#else // Linux or MinGW. -+ pthread_setname_np( pthread_self(), tmp ); -+#endif - Worker(); - } ); --#else -- auto worker = std::thread( [this]{ Worker(); } ); - #endif -- System::SetThreadName( worker, tmp ); - m_workers.emplace_back( std::move( worker ) ); - } - diff --git a/thirdparty/etcpak/patches/windows-mingw-bswap.patch b/thirdparty/etcpak/patches/windows-mingw-bswap.patch deleted file mode 100644 index c09192f573..0000000000 --- a/thirdparty/etcpak/patches/windows-mingw-bswap.patch +++ /dev/null @@ -1,50 +0,0 @@ -diff --git a/thirdparty/etcpak/BlockData.cpp b/thirdparty/etcpak/BlockData.cpp -index a2cd032c5b..bd738085f3 100644 ---- a/thirdparty/etcpak/BlockData.cpp -+++ b/thirdparty/etcpak/BlockData.cpp -@@ -15,7 +15,7 @@ - # include <arm_neon.h> - #endif - --#ifdef __SSE4_1__ -+#if defined __SSE4_1__ || defined __AVX2__ || defined _MSC_VER - # ifdef _MSC_VER - # include <intrin.h> - # include <Windows.h> -@@ -24,12 +24,6 @@ - # else - # include <x86intrin.h> - # endif --#else --# ifndef _MSC_VER --# include <byteswap.h> --# define _bswap(x) bswap_32(x) --# define _bswap64(x) bswap_64(x) --# endif - #endif - - #ifndef _bswap -diff --git a/thirdparty/etcpak/ProcessRGB.cpp b/thirdparty/etcpak/ProcessRGB.cpp -index 220d5c55e2..9dc5a78b67 100644 ---- a/thirdparty/etcpak/ProcessRGB.cpp -+++ b/thirdparty/etcpak/ProcessRGB.cpp -@@ -1,5 +1,6 @@ - #include <array> - #include <string.h> -+#include <limits> - - #ifdef __ARM_NEON - # include <arm_neon.h> -@@ -21,12 +22,6 @@ - # else - # include <x86intrin.h> - # endif --#else --# ifndef _MSC_VER --# include <byteswap.h> --# define _bswap(x) bswap_32(x) --# define _bswap64(x) bswap_64(x) --# endif - #endif - - #ifndef _bswap diff --git a/thirdparty/meshoptimizer/LICENSE.md b/thirdparty/meshoptimizer/LICENSE.md index 4fcd766d22..3c52415f62 100644 --- a/thirdparty/meshoptimizer/LICENSE.md +++ b/thirdparty/meshoptimizer/LICENSE.md @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2016-2020 Arseny Kapoulkine +Copyright (c) 2016-2021 Arseny Kapoulkine Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/thirdparty/meshoptimizer/clusterizer.cpp b/thirdparty/meshoptimizer/clusterizer.cpp index f7d88c5136..f8aad7b49c 100644 --- a/thirdparty/meshoptimizer/clusterizer.cpp +++ b/thirdparty/meshoptimizer/clusterizer.cpp @@ -2,6 +2,7 @@ #include "meshoptimizer.h" #include <assert.h> +#include <float.h> #include <math.h> #include <string.h> @@ -12,6 +13,68 @@ namespace meshopt { +// This must be <= 255 since index 0xff is used internally to indice a vertex that doesn't belong to a meshlet +const size_t kMeshletMaxVertices = 255; + +// A reasonable limit is around 2*max_vertices or less +const size_t kMeshletMaxTriangles = 512; + +struct TriangleAdjacency2 +{ + unsigned int* counts; + unsigned int* offsets; + unsigned int* data; +}; + +static void buildTriangleAdjacency(TriangleAdjacency2& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +{ + size_t face_count = index_count / 3; + + // allocate arrays + adjacency.counts = allocator.allocate<unsigned int>(vertex_count); + adjacency.offsets = allocator.allocate<unsigned int>(vertex_count); + adjacency.data = allocator.allocate<unsigned int>(index_count); + + // fill triangle counts + memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + assert(indices[i] < vertex_count); + + adjacency.counts[indices[i]]++; + } + + // fill offset table + unsigned int offset = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + adjacency.offsets[i] = offset; + offset += adjacency.counts[i]; + } + + assert(offset == index_count); + + // fill triangle data + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + + adjacency.data[adjacency.offsets[a]++] = unsigned(i); + adjacency.data[adjacency.offsets[b]++] = unsigned(i); + adjacency.data[adjacency.offsets[c]++] = unsigned(i); + } + + // fix offsets that have been disturbed by the previous pass + for (size_t i = 0; i < vertex_count; ++i) + { + assert(adjacency.offsets[i] >= adjacency.counts[i]); + + adjacency.offsets[i] -= adjacency.counts[i]; + } +} + static void computeBoundingSphere(float result[4], const float points[][3], size_t count) { assert(count > 0); @@ -82,13 +145,310 @@ static void computeBoundingSphere(float result[4], const float points[][3], size result[3] = radius; } +struct Cone +{ + float px, py, pz; + float nx, ny, nz; +}; + +static float getMeshletScore(float distance2, float spread, float cone_weight, float expected_radius) +{ + float cone = 1.f - spread * cone_weight; + float cone_clamped = cone < 1e-3f ? 1e-3f : cone; + + return (1 + sqrtf(distance2) / expected_radius * (1 - cone_weight)) * cone_clamped; +} + +static Cone getMeshletCone(const Cone& acc, unsigned int triangle_count) +{ + Cone result = acc; + + float center_scale = triangle_count == 0 ? 0.f : 1.f / float(triangle_count); + + result.px *= center_scale; + result.py *= center_scale; + result.pz *= center_scale; + + float axis_length = result.nx * result.nx + result.ny * result.ny + result.nz * result.nz; + float axis_scale = axis_length == 0.f ? 0.f : 1.f / sqrtf(axis_length); + + result.nx *= axis_scale; + result.ny *= axis_scale; + result.nz *= axis_scale; + + return result; +} + +static float computeTriangleCones(Cone* triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + (void)vertex_count; + + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + size_t face_count = index_count / 3; + + float mesh_area = 0; + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + const float* p0 = vertex_positions + vertex_stride_float * a; + const float* p1 = vertex_positions + vertex_stride_float * b; + const float* p2 = vertex_positions + vertex_stride_float * c; + + float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; + float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; + + float normalx = p10[1] * p20[2] - p10[2] * p20[1]; + float normaly = p10[2] * p20[0] - p10[0] * p20[2]; + float normalz = p10[0] * p20[1] - p10[1] * p20[0]; + + float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); + float invarea = (area == 0.f) ? 0.f : 1.f / area; + + triangles[i].px = (p0[0] + p1[0] + p2[0]) / 3.f; + triangles[i].py = (p0[1] + p1[1] + p2[1]) / 3.f; + triangles[i].pz = (p0[2] + p1[2] + p2[2]) / 3.f; + + triangles[i].nx = normalx * invarea; + triangles[i].ny = normaly * invarea; + triangles[i].nz = normalz * invarea; + + mesh_area += area; + } + + return mesh_area; +} + +static void finishMeshlet(meshopt_Meshlet& meshlet, unsigned char* meshlet_triangles) +{ + size_t offset = meshlet.triangle_offset + meshlet.triangle_count * 3; + + // fill 4b padding with 0 + while (offset & 3) + meshlet_triangles[offset++] = 0; +} + +static bool appendMeshlet(meshopt_Meshlet& meshlet, unsigned int a, unsigned int b, unsigned int c, unsigned char* used, meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, size_t meshlet_offset, size_t max_vertices, size_t max_triangles) +{ + unsigned char& av = used[a]; + unsigned char& bv = used[b]; + unsigned char& cv = used[c]; + + bool result = false; + + unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff); + + if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles) + { + meshlets[meshlet_offset] = meshlet; + + for (size_t j = 0; j < meshlet.vertex_count; ++j) + used[meshlet_vertices[meshlet.vertex_offset + j]] = 0xff; + + finishMeshlet(meshlet, meshlet_triangles); + + meshlet.vertex_offset += meshlet.vertex_count; + meshlet.triangle_offset += (meshlet.triangle_count * 3 + 3) & ~3; // 4b padding + meshlet.vertex_count = 0; + meshlet.triangle_count = 0; + + result = true; + } + + if (av == 0xff) + { + av = (unsigned char)meshlet.vertex_count; + meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = a; + } + + if (bv == 0xff) + { + bv = (unsigned char)meshlet.vertex_count; + meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = b; + } + + if (cv == 0xff) + { + cv = (unsigned char)meshlet.vertex_count; + meshlet_vertices[meshlet.vertex_offset + meshlet.vertex_count++] = c; + } + + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 0] = av; + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 1] = bv; + meshlet_triangles[meshlet.triangle_offset + meshlet.triangle_count * 3 + 2] = cv; + meshlet.triangle_count++; + + return result; +} + +struct KDNode +{ + union + { + float split; + unsigned int index; + }; + + // leaves: axis = 3, children = number of extra points after this one (0 if 'index' is the only point) + // branches: axis != 3, left subtree = skip 1, right subtree = skip 1+children + unsigned int axis : 2; + unsigned int children : 30; +}; + +static size_t kdtreePartition(unsigned int* indices, size_t count, const float* points, size_t stride, unsigned int axis, float pivot) +{ + size_t m = 0; + + // invariant: elements in range [0, m) are < pivot, elements in range [m, i) are >= pivot + for (size_t i = 0; i < count; ++i) + { + float v = points[indices[i] * stride + axis]; + + // swap(m, i) unconditionally + unsigned int t = indices[m]; + indices[m] = indices[i]; + indices[i] = t; + + // when v >= pivot, we swap i with m without advancing it, preserving invariants + m += v < pivot; + } + + return m; +} + +static size_t kdtreeBuildLeaf(size_t offset, KDNode* nodes, size_t node_count, unsigned int* indices, size_t count) +{ + assert(offset + count <= node_count); + (void)node_count; + + KDNode& result = nodes[offset]; + + result.index = indices[0]; + result.axis = 3; + result.children = unsigned(count - 1); + + // all remaining points are stored in nodes immediately following the leaf + for (size_t i = 1; i < count; ++i) + { + KDNode& tail = nodes[offset + i]; + + tail.index = indices[i]; + tail.axis = 3; + tail.children = ~0u >> 2; // bogus value to prevent misuse + } + + return offset + count; +} + +static size_t kdtreeBuild(size_t offset, KDNode* nodes, size_t node_count, const float* points, size_t stride, unsigned int* indices, size_t count, size_t leaf_size) +{ + assert(count > 0); + assert(offset < node_count); + + if (count <= leaf_size) + return kdtreeBuildLeaf(offset, nodes, node_count, indices, count); + + float mean[3] = {}; + float vars[3] = {}; + float runc = 1, runs = 1; + + // gather statistics on the points in the subtree using Welford's algorithm + for (size_t i = 0; i < count; ++i, runc += 1.f, runs = 1.f / runc) + { + const float* point = points + indices[i] * stride; + + for (int k = 0; k < 3; ++k) + { + float delta = point[k] - mean[k]; + mean[k] += delta * runs; + vars[k] += delta * (point[k] - mean[k]); + } + } + + // split axis is one where the variance is largest + unsigned int axis = vars[0] >= vars[1] && vars[0] >= vars[2] ? 0 : vars[1] >= vars[2] ? 1 + : 2; + + float split = mean[axis]; + size_t middle = kdtreePartition(indices, count, points, stride, axis, split); + + // when the partition is degenerate simply consolidate the points into a single node + if (middle <= leaf_size / 2 || middle >= count - leaf_size / 2) + return kdtreeBuildLeaf(offset, nodes, node_count, indices, count); + + KDNode& result = nodes[offset]; + + result.split = split; + result.axis = axis; + + // left subtree is right after our node + size_t next_offset = kdtreeBuild(offset + 1, nodes, node_count, points, stride, indices, middle, leaf_size); + + // distance to the right subtree is represented explicitly + result.children = unsigned(next_offset - offset - 1); + + return kdtreeBuild(next_offset, nodes, node_count, points, stride, indices + middle, count - middle, leaf_size); +} + +static void kdtreeNearest(KDNode* nodes, unsigned int root, const float* points, size_t stride, const unsigned char* emitted_flags, const float* position, unsigned int& result, float& limit) +{ + const KDNode& node = nodes[root]; + + if (node.axis == 3) + { + // leaf + for (unsigned int i = 0; i <= node.children; ++i) + { + unsigned int index = nodes[root + i].index; + + if (emitted_flags[index]) + continue; + + const float* point = points + index * stride; + + float distance2 = + (point[0] - position[0]) * (point[0] - position[0]) + + (point[1] - position[1]) * (point[1] - position[1]) + + (point[2] - position[2]) * (point[2] - position[2]); + float distance = sqrtf(distance2); + + if (distance < limit) + { + result = index; + limit = distance; + } + } + } + else + { + // branch; we order recursion to process the node that search position is in first + float delta = position[node.axis] - node.split; + unsigned int first = (delta <= 0) ? 0 : node.children; + unsigned int second = first ^ node.children; + + kdtreeNearest(nodes, root + 1 + first, points, stride, emitted_flags, position, result, limit); + + // only process the other node if it can have a match based on closest distance so far + if (fabsf(delta) <= limit) + kdtreeNearest(nodes, root + 1 + second, points, stride, emitted_flags, position, result, limit); + } +} + } // namespace meshopt size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles) { + using namespace meshopt; + assert(index_count % 3 == 0); - assert(max_vertices >= 3); - assert(max_triangles >= 1); + assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); + assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); + assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned + + (void)kMeshletMaxVertices; + (void)kMeshletMaxTriangles; // meshlet construction is limited by max vertices and max triangles per meshlet // the worst case is that the input is an unindexed stream since this equally stresses both limits @@ -100,77 +460,226 @@ size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_ return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles; } -size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) { + using namespace meshopt; + assert(index_count % 3 == 0); - assert(max_vertices >= 3); - assert(max_triangles >= 1); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); + assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); + assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned meshopt_Allocator allocator; - meshopt_Meshlet meshlet; - memset(&meshlet, 0, sizeof(meshlet)); + TriangleAdjacency2 adjacency = {}; + buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); + + unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); + memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); + + size_t face_count = index_count / 3; + + unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); + memset(emitted_flags, 0, face_count); + + // for each triangle, precompute centroid & normal to use for scoring + Cone* triangles = allocator.allocate<Cone>(face_count); + float mesh_area = computeTriangleCones(triangles, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride); + + // assuming each meshlet is a square patch, expected radius is sqrt(expected area) + float triangle_area_avg = face_count == 0 ? 0.f : mesh_area / float(face_count) * 0.5f; + float meshlet_expected_radius = sqrtf(triangle_area_avg * max_triangles) * 0.5f; + + // build a kd-tree for nearest neighbor lookup + unsigned int* kdindices = allocator.allocate<unsigned int>(face_count); + for (size_t i = 0; i < face_count; ++i) + kdindices[i] = unsigned(i); - assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0])); - assert(max_triangles <= sizeof(meshlet.indices) / 3); + KDNode* nodes = allocator.allocate<KDNode>(face_count * 2); + kdtreeBuild(0, nodes, face_count * 2, &triangles[0].px, sizeof(Cone) / sizeof(float), kdindices, face_count, /* leaf_size= */ 8); // index of the vertex in the meshlet, 0xff if the vertex isn't used unsigned char* used = allocator.allocate<unsigned char>(vertex_count); memset(used, -1, vertex_count); - size_t offset = 0; + meshopt_Meshlet meshlet = {}; + size_t meshlet_offset = 0; - for (size_t i = 0; i < index_count; i += 3) - { - unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; - assert(a < vertex_count && b < vertex_count && c < vertex_count); + Cone meshlet_cone_acc = {}; - unsigned char& av = used[a]; - unsigned char& bv = used[b]; - unsigned char& cv = used[c]; + for (;;) + { + unsigned int best_triangle = ~0u; + unsigned int best_extra = 5; + float best_score = FLT_MAX; - unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff); + Cone meshlet_cone = getMeshletCone(meshlet_cone_acc, meshlet.triangle_count); - if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles) + for (size_t i = 0; i < meshlet.vertex_count; ++i) { - destination[offset++] = meshlet; + unsigned int index = meshlet_vertices[meshlet.vertex_offset + i]; + + unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbours_size = adjacency.counts[index]; + + for (size_t j = 0; j < neighbours_size; ++j) + { + unsigned int triangle = neighbours[j]; + assert(!emitted_flags[triangle]); + + unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + unsigned int extra = (used[a] == 0xff) + (used[b] == 0xff) + (used[c] == 0xff); + + // triangles that don't add new vertices to meshlets are max. priority + if (extra != 0) + { + // artificially increase the priority of dangling triangles as they're expensive to add to new meshlets + if (live_triangles[a] == 1 || live_triangles[b] == 1 || live_triangles[c] == 1) + extra = 0; + + extra++; + } + + // since topology-based priority is always more important than the score, we can skip scoring in some cases + if (extra > best_extra) + continue; + + const Cone& tri_cone = triangles[triangle]; + + float distance2 = + (tri_cone.px - meshlet_cone.px) * (tri_cone.px - meshlet_cone.px) + + (tri_cone.py - meshlet_cone.py) * (tri_cone.py - meshlet_cone.py) + + (tri_cone.pz - meshlet_cone.pz) * (tri_cone.pz - meshlet_cone.pz); - for (size_t j = 0; j < meshlet.vertex_count; ++j) - used[meshlet.vertices[j]] = 0xff; + float spread = tri_cone.nx * meshlet_cone.nx + tri_cone.ny * meshlet_cone.ny + tri_cone.nz * meshlet_cone.nz; - memset(&meshlet, 0, sizeof(meshlet)); + float score = getMeshletScore(distance2, spread, cone_weight, meshlet_expected_radius); + + // note that topology-based priority is always more important than the score + // this helps maintain reasonable effectiveness of meshlet data and reduces scoring cost + if (extra < best_extra || score < best_score) + { + best_triangle = triangle; + best_extra = extra; + best_score = score; + } + } } - if (av == 0xff) + if (best_triangle == ~0u) { - av = meshlet.vertex_count; - meshlet.vertices[meshlet.vertex_count++] = a; + float position[3] = {meshlet_cone.px, meshlet_cone.py, meshlet_cone.pz}; + unsigned int index = ~0u; + float limit = FLT_MAX; + + kdtreeNearest(nodes, 0, &triangles[0].px, sizeof(Cone) / sizeof(float), emitted_flags, position, index, limit); + + best_triangle = index; } - if (bv == 0xff) + if (best_triangle == ~0u) + break; + + unsigned int a = indices[best_triangle * 3 + 0], b = indices[best_triangle * 3 + 1], c = indices[best_triangle * 3 + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + // add meshlet to the output; when the current meshlet is full we reset the accumulated bounds + if (appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles)) { - bv = meshlet.vertex_count; - meshlet.vertices[meshlet.vertex_count++] = b; + meshlet_offset++; + memset(&meshlet_cone_acc, 0, sizeof(meshlet_cone_acc)); } - if (cv == 0xff) + live_triangles[a]--; + live_triangles[b]--; + live_triangles[c]--; + + // remove emitted triangle from adjacency data + // this makes sure that we spend less time traversing these lists on subsequent iterations + for (size_t k = 0; k < 3; ++k) { - cv = meshlet.vertex_count; - meshlet.vertices[meshlet.vertex_count++] = c; + unsigned int index = indices[best_triangle * 3 + k]; + + unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbours_size = adjacency.counts[index]; + + for (size_t i = 0; i < neighbours_size; ++i) + { + unsigned int tri = neighbours[i]; + + if (tri == best_triangle) + { + neighbours[i] = neighbours[neighbours_size - 1]; + adjacency.counts[index]--; + break; + } + } } - meshlet.indices[meshlet.triangle_count][0] = av; - meshlet.indices[meshlet.triangle_count][1] = bv; - meshlet.indices[meshlet.triangle_count][2] = cv; - meshlet.triangle_count++; + // update aggregated meshlet cone data for scoring subsequent triangles + meshlet_cone_acc.px += triangles[best_triangle].px; + meshlet_cone_acc.py += triangles[best_triangle].py; + meshlet_cone_acc.pz += triangles[best_triangle].pz; + meshlet_cone_acc.nx += triangles[best_triangle].nx; + meshlet_cone_acc.ny += triangles[best_triangle].ny; + meshlet_cone_acc.nz += triangles[best_triangle].nz; + + emitted_flags[best_triangle] = 1; + } + + if (meshlet.triangle_count) + { + finishMeshlet(meshlet, meshlet_triangles); + + meshlets[meshlet_offset++] = meshlet; + } + + assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + return meshlet_offset; +} + +size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + + assert(max_vertices >= 3 && max_vertices <= kMeshletMaxVertices); + assert(max_triangles >= 1 && max_triangles <= kMeshletMaxTriangles); + assert(max_triangles % 4 == 0); // ensures the caller will compute output space properly as index data is 4b aligned + + meshopt_Allocator allocator; + + // index of the vertex in the meshlet, 0xff if the vertex isn't used + unsigned char* used = allocator.allocate<unsigned char>(vertex_count); + memset(used, -1, vertex_count); + + meshopt_Meshlet meshlet = {}; + size_t meshlet_offset = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + // appends triangle to the meshlet and writes previous meshlet to the output if full + meshlet_offset += appendMeshlet(meshlet, a, b, c, used, meshlets, meshlet_vertices, meshlet_triangles, meshlet_offset, max_vertices, max_triangles); } if (meshlet.triangle_count) - destination[offset++] = meshlet; + { + finishMeshlet(meshlet, meshlet_triangles); - assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + meshlets[meshlet_offset++] = meshlet; + } - return offset; + assert(meshlet_offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + return meshlet_offset; } meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) @@ -178,18 +687,17 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t using namespace meshopt; assert(index_count % 3 == 0); + assert(index_count / 3 <= kMeshletMaxTriangles); assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); - assert(index_count / 3 <= 256); - (void)vertex_count; size_t vertex_stride_float = vertex_positions_stride / sizeof(float); // compute triangle normals and gather triangle corners - float normals[256][3]; - float corners[256][3][3]; + float normals[kMeshletMaxTriangles][3]; + float corners[kMeshletMaxTriangles][3][3]; size_t triangles = 0; for (size_t i = 0; i < index_count; i += 3) @@ -327,25 +835,23 @@ meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t return bounds; } -meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) { + using namespace meshopt; + + assert(triangle_count <= kMeshletMaxTriangles); assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); assert(vertex_positions_stride % sizeof(float) == 0); - unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])]; + unsigned int indices[kMeshletMaxTriangles * 3]; - for (size_t i = 0; i < meshlet->triangle_count; ++i) + for (size_t i = 0; i < triangle_count * 3; ++i) { - unsigned int a = meshlet->vertices[meshlet->indices[i][0]]; - unsigned int b = meshlet->vertices[meshlet->indices[i][1]]; - unsigned int c = meshlet->vertices[meshlet->indices[i][2]]; - - assert(a < vertex_count && b < vertex_count && c < vertex_count); + unsigned int index = meshlet_vertices[meshlet_triangles[i]]; + assert(index < vertex_count); - indices[i * 3 + 0] = a; - indices[i * 3 + 1] = b; - indices[i * 3 + 2] = c; + indices[i] = index; } - return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); + return meshopt_computeClusterBounds(indices, triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); } diff --git a/thirdparty/meshoptimizer/indexgenerator.cpp b/thirdparty/meshoptimizer/indexgenerator.cpp index aa4a30efa4..f60db0dc4f 100644 --- a/thirdparty/meshoptimizer/indexgenerator.cpp +++ b/thirdparty/meshoptimizer/indexgenerator.cpp @@ -4,6 +4,8 @@ #include <assert.h> #include <string.h> +// This work is based on: +// John McDonald, Mark Kilgard. Crack-Free Point-Normal Triangles using Adjacent Edge Normals. 2010 namespace meshopt { @@ -83,10 +85,49 @@ struct VertexStreamHasher } }; +struct EdgeHasher +{ + const unsigned int* remap; + + size_t hash(unsigned long long edge) const + { + unsigned int e0 = unsigned(edge >> 32); + unsigned int e1 = unsigned(edge); + + unsigned int h1 = remap[e0]; + unsigned int h2 = remap[e1]; + + const unsigned int m = 0x5bd1e995; + + // MurmurHash64B finalizer + h1 ^= h2 >> 18; + h1 *= m; + h2 ^= h1 >> 22; + h2 *= m; + h1 ^= h2 >> 17; + h1 *= m; + h2 ^= h1 >> 19; + h2 *= m; + + return h2; + } + + bool equal(unsigned long long lhs, unsigned long long rhs) const + { + unsigned int l0 = unsigned(lhs >> 32); + unsigned int l1 = unsigned(lhs); + + unsigned int r0 = unsigned(rhs >> 32); + unsigned int r1 = unsigned(rhs); + + return remap[l0] == remap[r0] && remap[l1] == remap[r1]; + } +}; + static size_t hashBuckets(size_t count) { size_t buckets = 1; - while (buckets < count) + while (buckets < count + count / 4) buckets *= 2; return buckets; @@ -119,6 +160,26 @@ static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, c return 0; } +static void buildPositionRemap(unsigned int* remap, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator) +{ + VertexHasher vertex_hasher = {reinterpret_cast<const unsigned char*>(vertex_positions), 3 * sizeof(float), vertex_positions_stride}; + + size_t vertex_table_size = hashBuckets(vertex_count); + unsigned int* vertex_table = allocator.allocate<unsigned int>(vertex_table_size); + memset(vertex_table, -1, vertex_table_size * sizeof(unsigned int)); + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int index = unsigned(i); + unsigned int* entry = hashLookup(vertex_table, vertex_table_size, vertex_hasher, index, ~0u); + + if (*entry == ~0u) + *entry = index; + + remap[index] = *entry; + } +} + } // namespace meshopt size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) @@ -345,3 +406,146 @@ void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const uns destination[i] = remap[index]; } } + +void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + static const int next[4] = {1, 2, 0, 1}; + + // build position remap: for each vertex, which other (canonical) vertex does it map to? + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator); + + // build edge set; this stores all triangle edges but we can look these up by any other wedge + EdgeHasher edge_hasher = {remap}; + + size_t edge_table_size = hashBuckets(index_count); + unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size); + unsigned int* edge_vertex_table = allocator.allocate<unsigned int>(edge_table_size); + + memset(edge_table, -1, edge_table_size * sizeof(unsigned long long)); + memset(edge_vertex_table, -1, edge_table_size * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; i += 3) + { + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + unsigned int i2 = indices[i + next[e + 1]]; + assert(i0 < vertex_count && i1 < vertex_count && i2 < vertex_count); + + unsigned long long edge = ((unsigned long long)i0 << 32) | i1; + unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); + + if (*entry == ~0ull) + { + *entry = edge; + + // store vertex opposite to the edge + edge_vertex_table[entry - edge_table] = i2; + } + } + } + + // build resulting index buffer: 6 indices for each input triangle + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int patch[6]; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + assert(i0 < vertex_count && i1 < vertex_count); + + // note: this refers to the opposite edge! + unsigned long long edge = ((unsigned long long)i1 << 32) | i0; + unsigned long long* oppe = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); + + patch[e * 2 + 0] = i0; + patch[e * 2 + 1] = (*oppe == ~0ull) ? i0 : edge_vertex_table[oppe - edge_table]; + } + + memcpy(destination + i * 2, patch, sizeof(patch)); + } +} + +void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + static const int next[3] = {1, 2, 0}; + + // build position remap: for each vertex, which other (canonical) vertex does it map to? + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + buildPositionRemap(remap, vertex_positions, vertex_count, vertex_positions_stride, allocator); + + // build edge set; this stores all triangle edges but we can look these up by any other wedge + EdgeHasher edge_hasher = {remap}; + + size_t edge_table_size = hashBuckets(index_count); + unsigned long long* edge_table = allocator.allocate<unsigned long long>(edge_table_size); + memset(edge_table, -1, edge_table_size * sizeof(unsigned long long)); + + for (size_t i = 0; i < index_count; i += 3) + { + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + assert(i0 < vertex_count && i1 < vertex_count); + + unsigned long long edge = ((unsigned long long)i0 << 32) | i1; + unsigned long long* entry = hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); + + if (*entry == ~0ull) + *entry = edge; + } + } + + // build resulting index buffer: 12 indices for each input triangle + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int patch[12]; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + assert(i0 < vertex_count && i1 < vertex_count); + + // note: this refers to the opposite edge! + unsigned long long edge = ((unsigned long long)i1 << 32) | i0; + unsigned long long oppe = *hashLookup(edge_table, edge_table_size, edge_hasher, edge, ~0ull); + + // use the same edge if opposite edge doesn't exist (border) + oppe = (oppe == ~0ull) ? edge : oppe; + + // triangle index (0, 1, 2) + patch[e] = i0; + + // opposite edge (3, 4; 5, 6; 7, 8) + patch[3 + e * 2 + 0] = unsigned(oppe); + patch[3 + e * 2 + 1] = unsigned(oppe >> 32); + + // dominant vertex (9, 10, 11) + patch[9 + e] = remap[i0]; + } + + memcpy(destination + i * 4, patch, sizeof(patch)); + } +} diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h index 1714000384..fe8d349731 100644 --- a/thirdparty/meshoptimizer/meshoptimizer.h +++ b/thirdparty/meshoptimizer/meshoptimizer.h @@ -1,7 +1,7 @@ /** - * meshoptimizer - version 0.15 + * meshoptimizer - version 0.16 * - * Copyright (C) 2016-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Copyright (C) 2016-2021, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) * Report bugs and download new versions at https://github.com/zeux/meshoptimizer * * This library is distributed under the MIT License. See notice at the end of this file. @@ -12,7 +12,7 @@ #include <stddef.h> /* Version macro; major * 1000 + minor * 10 + patch */ -#define MESHOPTIMIZER_VERSION 150 /* 0.15 */ +#define MESHOPTIMIZER_VERSION 160 /* 0.16 */ /* If no API is defined, assume default */ #ifndef MESHOPTIMIZER_API @@ -98,6 +98,35 @@ MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destinati MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); /** + * Generate index buffer that can be used as a geometry shader input with triangle adjacency topology + * Each triangle is converted into a 6-vertex patch with the following layout: + * - 0, 2, 4: original triangle vertices + * - 1, 3, 5: vertices adjacent to edges 02, 24 and 40 + * The resulting patch can be rendered with geometry shaders using e.g. VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY. + * This can be used to implement algorithms like silhouette detection/expansion and other forms of GS-driven rendering. + * + * destination must contain enough space for the resulting index buffer (index_count*2 elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Generate index buffer that can be used for PN-AEN tessellation with crack-free displacement + * Each triangle is converted into a 12-vertex patch with the following layout: + * - 0, 1, 2: original triangle vertices + * - 3, 4: opposing edge for edge 0, 1 + * - 5, 6: opposing edge for edge 1, 2 + * - 7, 8: opposing edge for edge 2, 0 + * - 9, 10, 11: dominant vertices for corners 0, 1, 2 + * The resulting patch can be rendered with hardware tessellation using PN-AEN and displacement mapping. + * See "Tessellation on Any Budget" (John McDonald, GDC 2011) for implementation details. + * + * destination must contain enough space for the resulting index buffer (index_count*4 elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_generateTessellationIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** * Vertex transform cache optimizer * Reorders indices to reduce the number of GPU vertex shader invocations * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. @@ -373,22 +402,31 @@ MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetc struct meshopt_Meshlet { - unsigned int vertices[64]; - unsigned char indices[126][3]; - unsigned char triangle_count; - unsigned char vertex_count; + /* offsets within meshlet_vertices and meshlet_triangles arrays with meshlet data */ + unsigned int vertex_offset; + unsigned int triangle_offset; + + /* number of vertices and triangles used in the meshlet; data is stored in consecutive range defined by offset and count */ + unsigned int vertex_count; + unsigned int triangle_count; }; /** * Experimental: Meshlet builder * Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer * The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers. - * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first. + * When using buildMeshlets, vertex positions need to be provided to minimize the size of the resulting clusters. + * When using buildMeshletsScan, for maximum efficiency the index buffer being converted has to be optimized for vertex cache first. * - * destination must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound - * max_vertices and max_triangles can't exceed limits statically declared in meshopt_Meshlet (max_vertices <= 64, max_triangles <= 126) + * meshlets must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound + * meshlet_vertices must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_vertices + * meshlet_triangles must contain enough space for all meshlets, worst case size is equal to max_meshlets * max_triangles * 3 + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * max_vertices and max_triangles must not exceed implementation limits (max_vertices <= 255 - not 256!, max_triangles <= 512) + * cone_weight should be set to 0 when cone culling is not used, and a value between 0 and 1 otherwise to balance between cluster size and cone culling efficiency */ -MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsScan(struct meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles); struct meshopt_Bounds @@ -426,10 +464,10 @@ struct meshopt_Bounds * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable. * * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer - * index_count should be less than or equal to 256*3 (the function assumes clusters of limited size) + * index_count/3 should be less than or equal to 512 (the function assumes clusters of limited size) */ MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); -MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); /** * Experimental: Spatial sorter @@ -513,6 +551,10 @@ inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, template <typename T> inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); template <typename T> +inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template <typename T> +inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template <typename T> inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count); template <typename T> inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count); @@ -547,7 +589,9 @@ inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size template <typename T> inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size); template <typename T> -inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight); +template <typename T> +inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); template <typename T> inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); template <typename T> @@ -762,6 +806,24 @@ inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indi } template <typename T> +inline void meshopt_generateAdjacencyIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count * 2); + + meshopt_generateAdjacencyIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template <typename T> +inline void meshopt_generateTessellationIndexBuffer(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count * 4); + + meshopt_generateTessellationIndexBuffer(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template <typename T> inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count) { meshopt_IndexAdapter<T> in(0, indices, index_count); @@ -908,11 +970,19 @@ inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices } template <typename T> -inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +inline size_t meshopt_buildMeshlets(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t max_vertices, size_t max_triangles, float cone_weight) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_buildMeshlets(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, max_vertices, max_triangles, cone_weight); +} + +template <typename T> +inline size_t meshopt_buildMeshletsScan(meshopt_Meshlet* meshlets, unsigned int* meshlet_vertices, unsigned char* meshlet_triangles, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) { meshopt_IndexAdapter<T> in(0, indices, index_count); - return meshopt_buildMeshlets(destination, in.data, index_count, vertex_count, max_vertices, max_triangles); + return meshopt_buildMeshletsScan(meshlets, meshlet_vertices, meshlet_triangles, in.data, index_count, vertex_count, max_vertices, max_triangles); } template <typename T> @@ -934,7 +1004,7 @@ inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_ #endif /** - * Copyright (c) 2016-2020 Arseny Kapoulkine + * Copyright (c) 2016-2021 Arseny Kapoulkine * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp index 942db14461..b2cb589462 100644 --- a/thirdparty/meshoptimizer/simplifier.cpp +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -131,7 +131,7 @@ struct PositionHasher static size_t hashBuckets2(size_t count) { size_t buckets = 1; - while (buckets < count) + while (buckets < count + count / 4) buckets *= 2; return buckets; diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp index 2cbfaac367..5f3ec204ab 100644 --- a/thirdparty/meshoptimizer/vertexcodec.cpp +++ b/thirdparty/meshoptimizer/vertexcodec.cpp @@ -710,18 +710,12 @@ static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1) SIMD_TARGET static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1) { - v128_t mask_0 = wasm_v32x4_shuffle(mask, mask, 0, 2, 1, 3); - - uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull; - uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull; + // magic constant found using z3 SMT assuming mask has 8 groups of 0xff or 0x00 + const uint64_t magic = 0x000103070f1f3f80ull; // TODO: This can use v8x16_bitmask in the future - uint64_t mask_2 = mask_1a | mask_1b; - uint64_t mask_4 = mask_2 | (mask_2 >> 16); - uint64_t mask_8 = mask_4 | (mask_4 >> 8); - - mask0 = uint8_t(mask_8); - mask1 = uint8_t(mask_8 >> 32); + mask0 = uint8_t((wasm_i64x2_extract_lane(mask, 0) * magic) >> 56); + mask1 = uint8_t((wasm_i64x2_extract_lane(mask, 1) * magic) >> 56); } SIMD_TARGET |