diff options
Diffstat (limited to 'thirdparty/zstd/common')
-rw-r--r-- | thirdparty/zstd/common/bitstream.h | 48 | ||||
-rw-r--r-- | thirdparty/zstd/common/compiler.h | 49 | ||||
-rw-r--r-- | thirdparty/zstd/common/cpu.h | 5 | ||||
-rw-r--r-- | thirdparty/zstd/common/debug.c | 44 | ||||
-rw-r--r-- | thirdparty/zstd/common/debug.h | 134 | ||||
-rw-r--r-- | thirdparty/zstd/common/entropy_common.c | 17 | ||||
-rw-r--r-- | thirdparty/zstd/common/error_private.c | 6 | ||||
-rw-r--r-- | thirdparty/zstd/common/fse.h | 90 | ||||
-rw-r--r-- | thirdparty/zstd/common/fse_decompress.c | 2 | ||||
-rw-r--r-- | thirdparty/zstd/common/huf.h | 85 | ||||
-rw-r--r-- | thirdparty/zstd/common/mem.h | 22 | ||||
-rw-r--r-- | thirdparty/zstd/common/pool.c | 123 | ||||
-rw-r--r-- | thirdparty/zstd/common/pool.h | 48 | ||||
-rw-r--r-- | thirdparty/zstd/common/xxhash.c | 1 | ||||
-rw-r--r-- | thirdparty/zstd/common/zstd_common.c | 9 | ||||
-rw-r--r-- | thirdparty/zstd/common/zstd_errors.h | 1 | ||||
-rw-r--r-- | thirdparty/zstd/common/zstd_internal.h | 58 |
17 files changed, 524 insertions, 218 deletions
diff --git a/thirdparty/zstd/common/bitstream.h b/thirdparty/zstd/common/bitstream.h index f7f389fe0f..d955bd677b 100644 --- a/thirdparty/zstd/common/bitstream.h +++ b/thirdparty/zstd/common/bitstream.h @@ -1,8 +1,7 @@ /* ****************************************************************** bitstream Part of FSE library - header file (to include) - Copyright (C) 2013-2017, Yann Collet. + Copyright (C) 2013-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -49,21 +48,10 @@ extern "C" { * Dependencies ******************************************/ #include "mem.h" /* unaligned access routines */ +#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ #include "error_private.h" /* error codes and messages */ -/*-************************************* -* Debug -***************************************/ -#if defined(BIT_DEBUG) && (BIT_DEBUG>=1) -# include <assert.h> -#else -# ifndef assert -# define assert(condition) ((void)0) -# endif -#endif - - /*========================================= * Target specific =========================================*/ @@ -83,8 +71,7 @@ extern "C" { * A critical property of these streams is that they encode and decode in **reverse** direction. * So the first bit sequence you add will be the last to be read, like a LIFO stack. */ -typedef struct -{ +typedef struct { size_t bitContainer; unsigned bitPos; char* startPtr; @@ -118,8 +105,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); /*-******************************************** * bitStream decoding API (read backward) **********************************************/ -typedef struct -{ +typedef struct { size_t bitContainer; unsigned bitsConsumed; const char* ptr; @@ -236,7 +222,8 @@ MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, } /*! BIT_addBitsFast() : - * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits) { @@ -352,17 +339,10 @@ MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { -#if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ -# if defined(__x86_64__) - if (sizeof(bitContainer)==8) - return _bextr_u64(bitContainer, start, nbBits); - else -# endif - return _bextr_u32(bitContainer, start, nbBits); -#else + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ assert(nbBits < BIT_MASK_SIZE); - return (bitContainer >> start) & BIT_mask[nbBits]; -#endif + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; } MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) @@ -379,9 +359,13 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) * @return : value extracted */ MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { -#if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */ + /* arbitrate between double-shift and shift+mask */ +#if 1 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); #else + /* this code path is slower on my os-x laptop */ U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); #endif @@ -405,7 +389,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) * Read (consume) next n bits from local register and update. * Pay attention to not read more than nbBits contained into local register. * @return : extracted value. */ -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) { size_t const value = BIT_lookBits(bitD, nbBits); BIT_skipBits(bitD, nbBits); @@ -414,7 +398,7 @@ MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) /*! BIT_readBitsFast() : * unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) { size_t const value = BIT_lookBitsFast(bitD, nbBits); assert(nbBits >= 1); diff --git a/thirdparty/zstd/common/compiler.h b/thirdparty/zstd/common/compiler.h index e90a3bcde3..7f561282ca 100644 --- a/thirdparty/zstd/common/compiler.h +++ b/thirdparty/zstd/common/compiler.h @@ -15,6 +15,8 @@ * Compiler specifics *********************************************************/ /* force inlining */ + +#if !defined(ZSTD_NO_INLINE) #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # define INLINE_KEYWORD inline #else @@ -29,6 +31,13 @@ # define FORCE_INLINE_ATTR #endif +#else + +#define INLINE_KEYWORD +#define FORCE_INLINE_ATTR + +#endif + /** * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant * parameters. They must be inlined for the compiler to elimininate the constant @@ -77,9 +86,9 @@ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. */ #ifndef DYNAMIC_BMI2 - #if (defined(__clang__) && __has_attribute(__target__)) \ + #if ((defined(__clang__) && __has_attribute(__target__)) \ || (defined(__GNUC__) \ - && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ && (defined(__x86_64__) || defined(_M_X86)) \ && !defined(__BMI2__) # define DYNAMIC_BMI2 1 @@ -88,15 +97,35 @@ #endif #endif -/* prefetch */ -#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ -# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) -#elif defined(__GNUC__) -# define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if defined(NO_PREFETCH) +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ #else -# define PREFETCH(ptr) /* disabled */ -#endif +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# else +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ +} /* disable warnings */ #ifdef _MSC_VER /* Visual Studio */ diff --git a/thirdparty/zstd/common/cpu.h b/thirdparty/zstd/common/cpu.h index a109520a33..5f0923fc92 100644 --- a/thirdparty/zstd/common/cpu.h +++ b/thirdparty/zstd/common/cpu.h @@ -72,14 +72,13 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { "cpuid\n\t" "popl %%ebx\n\t" : "=a"(f1a), "=c"(f1c), "=d"(f1d) - : "a"(1) - :); + : "a"(1)); } if (n >= 7) { __asm__( "pushl %%ebx\n\t" "cpuid\n\t" - "movl %%ebx, %%eax\n\r" + "movl %%ebx, %%eax\n\t" "popl %%ebx" : "=a"(f7b), "=c"(f7c) : "a"(7), "c"(0) diff --git a/thirdparty/zstd/common/debug.c b/thirdparty/zstd/common/debug.c new file mode 100644 index 0000000000..3ebdd1cb15 --- /dev/null +++ b/thirdparty/zstd/common/debug.c @@ -0,0 +1,44 @@ +/* ****************************************************************** + debug + Part of FSE library + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + + +/* + * This module only hosts one global variable + * which can be used to dynamically influence the verbosity of traces, + * such as DEBUGLOG and RAWLOG + */ + +#include "debug.h" + +int g_debuglevel = DEBUGLEVEL; diff --git a/thirdparty/zstd/common/debug.h b/thirdparty/zstd/common/debug.h new file mode 100644 index 0000000000..b4fc89d497 --- /dev/null +++ b/thirdparty/zstd/common/debug.h @@ -0,0 +1,134 @@ +/* ****************************************************************** + debug + Part of FSE library + Copyright (C) 2013-present, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy +****************************************************************** */ + + +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ + +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + + +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + + +/* DEBUGFILE can be defined externally, + * typically through compiler command line. + * note : currently useless. + * Value must be stderr or stdout */ +#ifndef DEBUGFILE +# define DEBUGFILE stderr +#endif + + +/* recommended values for DEBUGLEVEL : + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively trigger high verbosity levels + * by modifying g_debug_level. + */ + +#if (DEBUGLEVEL>=1) +# include <assert.h> +#else +# ifndef assert /* assert may be already defined, due to prior #include <assert.h> */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif + +#if (DEBUGLEVEL>=2) +# include <stdio.h> +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ + +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + fprintf(stderr, __VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* DEBUG_H_12987983217 */ diff --git a/thirdparty/zstd/common/entropy_common.c b/thirdparty/zstd/common/entropy_common.c index b37a082fee..b12944e1de 100644 --- a/thirdparty/zstd/common/entropy_common.c +++ b/thirdparty/zstd/common/entropy_common.c @@ -72,7 +72,21 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t unsigned charnum = 0; int previous0 = 0; - if (hbSize < 4) return ERROR(srcSize_wrong); + if (hbSize < 4) { + /* This function only works when hbSize >= 4 */ + char buffer[4]; + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 4); + + /* init */ + memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ bitStream = MEM_readLE32(ip); nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); @@ -105,6 +119,7 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); while (charnum < n0) normalizedCounter[charnum++] = 0; if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ ip += bitCount>>3; bitCount &= 7; bitStream = MEM_readLE32(ip) >> bitCount; diff --git a/thirdparty/zstd/common/error_private.c b/thirdparty/zstd/common/error_private.c index d004ee636c..7c1bb67a23 100644 --- a/thirdparty/zstd/common/error_private.c +++ b/thirdparty/zstd/common/error_private.c @@ -14,6 +14,10 @@ const char* ERR_getErrorString(ERR_enum code) { +#ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; + return "Error strings stripped"; +#else static const char* const notErrorCode = "Unspecified error code"; switch( code ) { @@ -39,10 +43,12 @@ const char* ERR_getErrorString(ERR_enum code) case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; /* following error codes are not stable and may be removed or changed in a future version */ case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; case PREFIX(maxCode): default: return notErrorCode; } +#endif } diff --git a/thirdparty/zstd/common/fse.h b/thirdparty/zstd/common/fse.h index 6a1d272be5..f72c519b25 100644 --- a/thirdparty/zstd/common/fse.h +++ b/thirdparty/zstd/common/fse.h @@ -72,6 +72,7 @@ extern "C" { #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + /*-**************************************** * FSE simple functions ******************************************/ @@ -129,7 +130,7 @@ FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, ******************************************/ /*! FSE_compress() does the following: -1. count symbol occurrence from source[] into table count[] +1. count symbol occurrence from source[] into table count[] (see hist.h) 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) 3. save normalized counters to memory buffer using writeNCount() 4. build encoding table 'CTable' from normalized counters @@ -147,15 +148,6 @@ or to save and provide normalized distribution using external method. /* *** COMPRESSION *** */ -/*! FSE_count(): - Provides the precise count of each byte within a table 'count'. - 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). - *maxSymbolValuePtr will be updated if detected smaller than initial value. - @return : the count of the most frequent symbol (which is not identified). - if return == srcSize, there is only one symbol. - Can also return an error code, which can be tested with FSE_isError(). */ -FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); - /*! FSE_optimalTableLog(): dynamically downsize 'tableLog' when conditions are met. It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. @@ -167,7 +159,8 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). @return : tableLog, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue); +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue); /*! FSE_NCountWriteBound(): Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. @@ -178,8 +171,9 @@ FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tab Compactly save 'normalizedCounter' into 'buffer'. @return : size of the compressed table, or an errorCode, which can be tested using FSE_isError(). */ -FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); /*! Constructor and Destructor of FSE_CTable. Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ @@ -250,7 +244,9 @@ If there is an error, the function will return an ErrorCode (which can be tested @return : size read from 'rBuffer', or an errorCode, which can be tested using FSE_isError(). maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ -FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); /*! Constructor and Destructor of FSE_DTable. Note that its size depends on 'tableLog' */ @@ -325,33 +321,8 @@ If there is an error, the function will return an error code, which can be teste /* ***************************************** -* FSE advanced API -*******************************************/ -/* FSE_count_wksp() : - * Same as FSE_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned - */ -size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, unsigned* workSpace); - -/** FSE_countFast() : - * same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr - */ -size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); - -/* FSE_countFast_wksp() : - * Same as FSE_countFast(), but using an externally provided scratch buffer. - * `workSpace` must be a table of minimum `1024` unsigned - */ -size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); - -/*! FSE_count_simple() : - * Same as FSE_countFast(), but does not use any additional memory (not even on stack). - * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). -*/ -size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); - - + * FSE advanced API + ***************************************** */ unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); /**< same as FSE_optimalTableLog(), which used `minus==2` */ @@ -541,7 +512,7 @@ MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) const U32 tableLog = MEM_read16(ptr); statePtr->value = (ptrdiff_t)1<<tableLog; statePtr->stateTable = u16ptr+2; - statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1)); + statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); statePtr->stateLog = tableLog; } @@ -560,7 +531,7 @@ MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U3 } } -MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol) +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) { FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; const U16* const stateTable = (const U16*)(statePtr->stateTable); @@ -576,6 +547,39 @@ MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePt } +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } +} + + /* ====== Decompression ====== */ typedef struct { diff --git a/thirdparty/zstd/common/fse_decompress.c b/thirdparty/zstd/common/fse_decompress.c index 4c66c3b774..72bbead5be 100644 --- a/thirdparty/zstd/common/fse_decompress.c +++ b/thirdparty/zstd/common/fse_decompress.c @@ -49,7 +49,7 @@ * Error Management ****************************************************************/ #define FSE_isError ERR_isError -#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ /* check and forward error code */ #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } diff --git a/thirdparty/zstd/common/huf.h b/thirdparty/zstd/common/huf.h index b4645b4e51..6b572c448d 100644 --- a/thirdparty/zstd/common/huf.h +++ b/thirdparty/zstd/common/huf.h @@ -1,7 +1,7 @@ /* ****************************************************************** - Huffman coder, part of New Generation Entropy library - header file - Copyright (C) 2013-2016, Yann Collet. + huff0 huffman codec, + part of Finite State Entropy library + Copyright (C) 2013-present, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -163,25 +163,29 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, /* static allocation of HUF's DTable */ typedef U32 HUF_DTable; #define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) -#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } -#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } /* **************************************** * Advanced decompression functions ******************************************/ -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +#endif size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif /* **************************************** @@ -208,7 +212,7 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si typedef enum { HUF_repeat_none, /**< Cannot use the previous table */ HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ - HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ } HUF_repeat; /** HUF_compress4X_repeat() : * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. @@ -227,7 +231,9 @@ size_t HUF_compress4X_repeat(void* dst, size_t dstSize, */ #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) -size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize); +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); /*! HUF_readStats() : * Read compact Huffman tree, saved by HUF_writeCTable(). @@ -242,10 +248,15 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, * Loading a CTable saved with HUF_writeCTable() */ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); +/** HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); /* * HUF_decompress() does the following: - * 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics * 2. build Huffman table from save, using HUF_readDTableX?() * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() */ @@ -253,13 +264,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void /** HUF_selectDecoder() : * Tells which decoder is likely to decode faster, * based on a set of pre-computed metrics. - * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . * Assumption : 0 < dstSize <= 128 KB */ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); /** * The minimum workspace size for the `workSpace` used in - * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp(). + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). * * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. @@ -270,14 +281,22 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); -size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize); -size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif /* ====================== */ @@ -298,25 +317,37 @@ size_t HUF_compress1X_repeat(void* dst, size_t dstSize, void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ -size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +#endif size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); -size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif /* BMI2 variants. * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. */ size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); diff --git a/thirdparty/zstd/common/mem.h b/thirdparty/zstd/common/mem.h index 47d2300177..5da248756f 100644 --- a/thirdparty/zstd/common/mem.h +++ b/thirdparty/zstd/common/mem.h @@ -39,6 +39,10 @@ extern "C" { # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ #endif +#ifndef __has_builtin +# define __has_builtin(x) 0 /* compat. with non-clang compilers */ +#endif + /* code only tested on 32 and 64 bits systems */ #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } @@ -57,11 +61,23 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size typedef uint64_t U64; typedef int64_t S64; #else +# include <limits.h> +#if CHAR_BIT != 8 +# error "this implementation requires char to be exactly 8-bit type" +#endif typedef unsigned char BYTE; +#if USHRT_MAX != 65535 +# error "this implementation requires short to be exactly 16-bit type" +#endif typedef unsigned short U16; typedef signed short S16; +#if UINT_MAX != 4294967295 +# error "this implementation requires int to be exactly 32-bit type" +#endif typedef unsigned int U32; typedef signed int S32; +/* note : there are no limits defined for long long type in C90. + * limits exist in C99, however, in such case, <stdint.h> is preferred */ typedef unsigned long long U64; typedef signed long long S64; #endif @@ -186,7 +202,8 @@ MEM_STATIC U32 MEM_swap32(U32 in) { #if defined(_MSC_VER) /* Visual Studio */ return _byteswap_ulong(in); -#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap32)) return __builtin_bswap32(in); #else return ((in << 24) & 0xff000000 ) | @@ -200,7 +217,8 @@ MEM_STATIC U64 MEM_swap64(U64 in) { #if defined(_MSC_VER) /* Visual Studio */ return _byteswap_uint64(in); -#elif defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403) +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap64)) return __builtin_bswap64(in); #else return ((in << 56) & 0xff00000000000000ULL) | diff --git a/thirdparty/zstd/common/pool.c b/thirdparty/zstd/common/pool.c index 773488b072..7a82945432 100644 --- a/thirdparty/zstd/common/pool.c +++ b/thirdparty/zstd/common/pool.c @@ -10,9 +10,10 @@ /* ====== Dependencies ======= */ -#include <stddef.h> /* size_t */ -#include "pool.h" +#include <stddef.h> /* size_t */ +#include "debug.h" /* assert */ #include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */ +#include "pool.h" /* ====== Compiler specifics ====== */ #if defined(_MSC_VER) @@ -33,8 +34,9 @@ typedef struct POOL_job_s { struct POOL_ctx_s { ZSTD_customMem customMem; /* Keep track of the threads */ - ZSTD_pthread_t *threads; - size_t numThreads; + ZSTD_pthread_t* threads; + size_t threadCapacity; + size_t threadLimit; /* The queue is a circular buffer */ POOL_job *queue; @@ -58,10 +60,10 @@ struct POOL_ctx_s { }; /* POOL_thread() : - Work thread for the thread pool. - Waits for jobs and executes them. - @returns : NULL on failure else non-null. -*/ + * Work thread for the thread pool. + * Waits for jobs and executes them. + * @returns : NULL on failure else non-null. + */ static void* POOL_thread(void* opaque) { POOL_ctx* const ctx = (POOL_ctx*)opaque; if (!ctx) { return NULL; } @@ -69,50 +71,55 @@ static void* POOL_thread(void* opaque) { /* Lock the mutex and wait for a non-empty queue or until shutdown */ ZSTD_pthread_mutex_lock(&ctx->queueMutex); - while (ctx->queueEmpty && !ctx->shutdown) { + while ( ctx->queueEmpty + || (ctx->numThreadsBusy >= ctx->threadLimit) ) { + if (ctx->shutdown) { + /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit), + * a few threads will be shutdown while !queueEmpty, + * but enough threads will remain active to finish the queue */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); } - /* empty => shutting down: so stop */ - if (ctx->queueEmpty) { - ZSTD_pthread_mutex_unlock(&ctx->queueMutex); - return opaque; - } /* Pop a job off the queue */ { POOL_job const job = ctx->queue[ctx->queueHead]; ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; ctx->numThreadsBusy++; ctx->queueEmpty = ctx->queueHead == ctx->queueTail; /* Unlock the mutex, signal a pusher, and run the job */ - ZSTD_pthread_mutex_unlock(&ctx->queueMutex); ZSTD_pthread_cond_signal(&ctx->queuePushCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); job.function(job.opaque); /* If the intended queue size was 0, signal after finishing job */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; if (ctx->queueSize == 1) { - ZSTD_pthread_mutex_lock(&ctx->queueMutex); - ctx->numThreadsBusy--; - ZSTD_pthread_mutex_unlock(&ctx->queueMutex); ZSTD_pthread_cond_signal(&ctx->queuePushCond); - } } + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + } } /* for (;;) */ - /* Unreachable */ + assert(0); /* Unreachable */ } POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); } -POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem) { POOL_ctx* ctx; - /* Check the parameters */ + /* Check parameters */ if (!numThreads) { return NULL; } /* Allocate the context and zero initialize */ ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem); if (!ctx) { return NULL; } /* Initialize the job queue. - * It needs one extra space since one space is wasted to differentiate empty - * and full queues. + * It needs one extra space since one space is wasted to differentiate + * empty and full queues. */ ctx->queueSize = queueSize + 1; ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem); @@ -126,7 +133,7 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM ctx->shutdown = 0; /* Allocate space for the thread handles */ ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem); - ctx->numThreads = 0; + ctx->threadCapacity = 0; ctx->customMem = customMem; /* Check for errors */ if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } @@ -134,11 +141,12 @@ POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customM { size_t i; for (i = 0; i < numThreads; ++i) { if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { - ctx->numThreads = i; + ctx->threadCapacity = i; POOL_free(ctx); return NULL; } } - ctx->numThreads = numThreads; + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; } return ctx; } @@ -156,8 +164,8 @@ static void POOL_join(POOL_ctx* ctx) { ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); /* Join all of the threads */ { size_t i; - for (i = 0; i < ctx->numThreads; ++i) { - ZSTD_pthread_join(ctx->threads[i], NULL); + for (i = 0; i < ctx->threadCapacity; ++i) { + ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ } } } @@ -172,24 +180,68 @@ void POOL_free(POOL_ctx *ctx) { ZSTD_free(ctx, ctx->customMem); } + + size_t POOL_sizeof(POOL_ctx *ctx) { if (ctx==NULL) return 0; /* supports sizeof NULL */ return sizeof(*ctx) + ctx->queueSize * sizeof(POOL_job) - + ctx->numThreads * sizeof(ZSTD_pthread_t); + + ctx->threadCapacity * sizeof(ZSTD_pthread_t); +} + + +/* @return : 0 on success, 1 on error */ +static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) +{ + if (numThreads <= ctx->threadCapacity) { + if (!numThreads) return 1; + ctx->threadLimit = numThreads; + return 0; + } + /* numThreads > threadCapacity */ + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); + if (!threadPool) return 1; + /* replace existing thread pool */ + memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); + ZSTD_free(ctx->threads, ctx->customMem); + ctx->threads = threadPool; + /* Initialize additional threads */ + { size_t threadId; + for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) { + if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = threadId; + return 1; + } } + } } + /* successfully expanded */ + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + return 0; +} + +/* @return : 0 on success, 1 on error */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads) +{ + int result; + if (ctx==NULL) return 1; + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + result = POOL_resize_internal(ctx, numThreads); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return result; } /** * Returns 1 if the queue is full and 0 otherwise. * - * If the queueSize is 1 (the pool was created with an intended queueSize of 0), - * then a queue is empty if there is a thread free and no job is waiting. + * When queueSize is 1 (pool was created with an intended queueSize of 0), + * then a queue is empty if there is a thread free _and_ no job is waiting. */ static int isQueueFull(POOL_ctx const* ctx) { if (ctx->queueSize > 1) { return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); } else { - return ctx->numThreadsBusy == ctx->numThreads || + return (ctx->numThreadsBusy == ctx->threadLimit) || !ctx->queueEmpty; } } @@ -263,6 +315,11 @@ void POOL_free(POOL_ctx* ctx) { (void)ctx; } +int POOL_resize(POOL_ctx* ctx, size_t numThreads) { + (void)ctx; (void)numThreads; + return 0; +} + void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { (void)ctx; function(opaque); diff --git a/thirdparty/zstd/common/pool.h b/thirdparty/zstd/common/pool.h index a57e9b4fab..458d37f13c 100644 --- a/thirdparty/zstd/common/pool.h +++ b/thirdparty/zstd/common/pool.h @@ -30,40 +30,50 @@ typedef struct POOL_ctx_s POOL_ctx; */ POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); -POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem); +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem); /*! POOL_free() : - Free a thread pool returned by POOL_create(). -*/ + * Free a thread pool returned by POOL_create(). + */ void POOL_free(POOL_ctx* ctx); +/*! POOL_resize() : + * Expands or shrinks pool's number of threads. + * This is more efficient than releasing + creating a new context, + * since it tries to preserve and re-use existing threads. + * `numThreads` must be at least 1. + * @return : 0 when resize was successful, + * !0 (typically 1) if there is an error. + * note : only numThreads can be resized, queueSize remains unchanged. + */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads); + /*! POOL_sizeof() : - return memory usage of pool returned by POOL_create(). -*/ + * @return threadpool memory usage + * note : compatible with NULL (returns 0 in this case) + */ size_t POOL_sizeof(POOL_ctx* ctx); /*! POOL_function : - The function type that can be added to a thread pool. -*/ + * The function type that can be added to a thread pool. + */ typedef void (*POOL_function)(void*); -/*! POOL_add_function : - The function type for a generic thread pool add function. -*/ -typedef void (*POOL_add_function)(void*, POOL_function, void*); /*! POOL_add() : - Add the job `function(opaque)` to the thread pool. `ctx` must be valid. - Possibly blocks until there is room in the queue. - Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed. -*/ + * Add the job `function(opaque)` to the thread pool. `ctx` must be valid. + * Possibly blocks until there is room in the queue. + * Note : The function may be executed asynchronously, + * therefore, `opaque` must live until function has been completed. + */ void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); /*! POOL_tryAdd() : - Add the job `function(opaque)` to the thread pool if a worker is available. - return immediately otherwise. - @return : 1 if successful, 0 if not. -*/ + * Add the job `function(opaque)` to thread pool _if_ a worker is available. + * Returns immediately even if not (does not block). + * @return : 1 if successful, 0 if not. + */ int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); diff --git a/thirdparty/zstd/common/xxhash.c b/thirdparty/zstd/common/xxhash.c index 9d9c0e963c..532b816192 100644 --- a/thirdparty/zstd/common/xxhash.c +++ b/thirdparty/zstd/common/xxhash.c @@ -98,6 +98,7 @@ /* Modify the local functions below should you wish to use some other memory routines */ /* for malloc(), free() */ #include <stdlib.h> +#include <stddef.h> /* size_t */ static void* XXH_malloc(size_t s) { return malloc(s); } static void XXH_free (void* p) { free(p); } /* for memcpy() */ diff --git a/thirdparty/zstd/common/zstd_common.c b/thirdparty/zstd/common/zstd_common.c index bccc948892..667f4a27fc 100644 --- a/thirdparty/zstd/common/zstd_common.c +++ b/thirdparty/zstd/common/zstd_common.c @@ -30,8 +30,10 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } /*-**************************************** * ZSTD Error Management ******************************************/ +#undef ZSTD_isError /* defined within zstd_internal.h */ /*! ZSTD_isError() : - * tells if a return value is an error code */ + * tells if a return value is an error code + * symbol is required for external callers */ unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } /*! ZSTD_getErrorName() : @@ -46,11 +48,6 @@ ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } * provides error code string from enum */ const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } -/*! g_debuglog_enable : - * turn on/off debug traces (global switch) */ -#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2) -int g_debuglog_enable = 1; -#endif /*=************************************************************** diff --git a/thirdparty/zstd/common/zstd_errors.h b/thirdparty/zstd/common/zstd_errors.h index 57533f2869..92a3433896 100644 --- a/thirdparty/zstd/common/zstd_errors.h +++ b/thirdparty/zstd/common/zstd_errors.h @@ -72,6 +72,7 @@ typedef enum { ZSTD_error_workSpace_tooSmall= 66, ZSTD_error_dstSize_tooSmall = 70, ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ ZSTD_error_frameIndex_tooLarge = 100, ZSTD_error_seekableIO = 102, diff --git a/thirdparty/zstd/common/zstd_internal.h b/thirdparty/zstd/common/zstd_internal.h index 65c08a8257..edeb74b9c3 100644 --- a/thirdparty/zstd/common/zstd_internal.h +++ b/thirdparty/zstd/common/zstd_internal.h @@ -21,6 +21,7 @@ ***************************************/ #include "compiler.h" #include "mem.h" +#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ #include "error_private.h" #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" @@ -38,43 +39,11 @@ extern "C" { #endif - -/*-************************************* -* Debug -***************************************/ -#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1) -# include <assert.h> -#else -# ifndef assert -# define assert(condition) ((void)0) -# endif -#endif - -#define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; } - -#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2) -# include <stdio.h> -extern int g_debuglog_enable; -/* recommended values for ZSTD_DEBUG display levels : - * 1 : no display, enables assert() only - * 2 : reserved for currently active debug path - * 3 : events once per object lifetime (CCtx, CDict, etc.) - * 4 : events once per frame - * 5 : events once per block - * 6 : events once per sequence (*very* verbose) */ -# define RAWLOG(l, ...) { \ - if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \ - fprintf(stderr, __VA_ARGS__); \ - } } -# define DEBUGLOG(l, ...) { \ - if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \ - fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ - fprintf(stderr, " \n"); \ - } } -#else -# define RAWLOG(l, ...) {} /* disabled */ -# define DEBUGLOG(l, ...) {} /* disabled */ -#endif +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError /*-************************************* @@ -109,12 +78,10 @@ static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; #define BIT0 1 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 -#define ZSTD_WINDOWLOG_DEFAULTMAX 27 /* Default maximum allowed window log */ static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; -#define ZSTD_FRAMEIDSIZE 4 -static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */ +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; @@ -227,6 +194,8 @@ typedef struct { BYTE* llCode; BYTE* mlCode; BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ U32 longLengthPos; } seqStore_t; @@ -275,7 +244,7 @@ typedef struct { blockType_e blockType; U32 lastBlock; U32 origSize; -} blockProperties_t; +} blockProperties_t; /* declared here for decompress and fullbench */ /*! ZSTD_getcBlockSize() : * Provides the size of compressed block from block header `src` */ @@ -283,6 +252,13 @@ typedef struct { size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr); +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); + + #if defined (__cplusplus) } #endif |