diff options
Diffstat (limited to 'thirdparty/zstd/decompress')
-rw-r--r-- | thirdparty/zstd/decompress/huf_decompress.c | 502 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/zstd_ddict.c | 16 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/zstd_ddict.h | 2 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/zstd_decompress.c | 205 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/zstd_decompress_block.c | 184 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/zstd_decompress_block.h | 7 | ||||
-rw-r--r-- | thirdparty/zstd/decompress/zstd_decompress_internal.h | 21 |
7 files changed, 598 insertions, 339 deletions
diff --git a/thirdparty/zstd/decompress/huf_decompress.c b/thirdparty/zstd/decompress/huf_decompress.c index 68293a1309..1418206718 100644 --- a/thirdparty/zstd/decompress/huf_decompress.c +++ b/thirdparty/zstd/decompress/huf_decompress.c @@ -15,7 +15,7 @@ /* ************************************************************** * Dependencies ****************************************************************/ -#include <string.h> /* memcpy, memset */ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ #include "../common/compiler.h" #include "../common/bitstream.h" /* BIT_* */ #include "../common/fse.h" /* to compress headers */ @@ -103,7 +103,7 @@ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) { DTableDesc dtd; - memcpy(&dtd, table, sizeof(dtd)); + ZSTD_memcpy(&dtd, table, sizeof(dtd)); return dtd; } @@ -115,29 +115,51 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) /*-***************************/ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = symbol + (nbBits << 8); + } else { + D4 = (symbol << 8) + nbBits; + } + D4 *= 0x0001000100010001ULL; + return D4; +} + +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + + size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) { + return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) +{ U32 tableLog = 0; U32 nbSymbols = 0; size_t iSize; void* const dtPtr = DTable + 1; HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; - U32* rankVal; - BYTE* huffWeight; - size_t spaceUsed32 = 0; - - rankVal = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); - /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); if (HUF_isError(iSize)) return iSize; /* Table header */ @@ -145,52 +167,117 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ dtd.tableType = 0; dtd.tableLog = (BYTE)tableLog; - memcpy(DTable, &dtd, sizeof(dtd)); + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); } - /* Calculate starting value for each rank */ - { U32 n, nextRankStart = 0; - for (n=1; n<tableLog+1; n++) { - U32 const current = nextRankStart; - nextRankStart += (rankVal[n] << (n-1)); - rankVal[n] = current; - } } + /* Compute symbols and rankStart given rankVal: + * + * rankVal already contains the number of values of each weight. + * + * symbols contains the symbols ordered by weight. First are the rankVal[0] + * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. + * symbols[0] is filled (but unused) to avoid a branch. + * + * rankStart contains the offset where each rank belongs in the DTable. + * rankStart[0] is not filled because there are no entries in the table for + * weight 0. + */ + { + int n; + int nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; + for (n=0; n<(int)tableLog+1; n++) { + U32 const curr = nextRankStart; + nextRankStart += wksp->rankVal[n]; + wksp->rankStart[n] = curr; + } + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); + } + } + for (; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; + } + } - /* fill DTable */ - { U32 n; - size_t const nEnd = nbSymbols; - for (n=0; n<nEnd; n++) { - size_t const w = huffWeight[n]; - size_t const length = (1 << w) >> 1; - size_t const uStart = rankVal[w]; - size_t const uEnd = uStart + length; - size_t u; - HUF_DEltX1 D; - D.byte = (BYTE)n; - D.nbBits = (BYTE)(tableLog + 1 - w); - rankVal[w] = (U32)uEnd; - if (length < 4) { - /* Use length in the loop bound so the compiler knows it is short. */ - for (u = 0; u < length; ++u) - dt[uStart + u] = D; - } else { - /* Unroll the loop 4 times, we know it is a power of 2. */ - for (u = uStart; u < uEnd; u += 4) { - dt[u + 0] = D; - dt[u + 1] = D; - dt[u + 2] = D; - dt[u + 3] = D; - } } } } + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outter loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { + U32 w; + int symbol=wksp->rankVal[0]; + int rankStart=0; + for (w=1; w<tableLog+1; ++w) { + int const symbolCount = wksp->rankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); + int s; + int u; + switch (length) { + case 1: + for (s=0; s<symbolCount; ++s) { + HUF_DEltX1 D; + D.byte = wksp->symbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; s<symbolCount; ++s) { + HUF_DEltX1 D; + D.byte = wksp->symbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; s<symbolCount; ++s) { + U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; s<symbolCount; ++s) { + U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; s<symbolCount; ++s) { + U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } return iSize; } -size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX1_wksp(DTable, src, srcSize, - workSpace, sizeof(workSpace)); -} - FORCE_INLINE_TEMPLATE BYTE HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) { @@ -389,20 +476,6 @@ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, } -size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); -} - size_t HUF_decompress4X1_usingDTable( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, @@ -419,8 +492,7 @@ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, - workSpace, wkspSize); + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; @@ -436,18 +508,6 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, } -size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - #endif /* HUF_FORCE_DECOMPRESS_X2 */ @@ -474,7 +534,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 co U32 rankVal[HUF_TABLELOG_MAX + 1]; /* get pre-calculated rankVal */ - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); /* fill skipped values */ if (minWeight>1) { @@ -516,7 +576,7 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, const U32 minBits = nbBitsBaseline - maxWeight; U32 s; - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal)); /* fill DTable */ for (s=0; s<sortedListSize; s++) { @@ -581,11 +641,11 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); rankStart = rankStart0 + 1; - memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); + ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); if (HUF_isError(iSize)) return iSize; @@ -599,9 +659,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, /* Get start index of each weight */ { U32 w, nextRankStart = 0; for (w=1; w<maxW+1; w++) { - U32 current = nextRankStart; + U32 curr = nextRankStart; nextRankStart += rankStats[w]; - rankStart[w] = current; + rankStart[w] = curr; } rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ sizeOfSort = nextRankStart; @@ -624,9 +684,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, U32 nextRankVal = 0; U32 w; for (w=1; w<maxW+1; w++) { - U32 current = nextRankVal; + U32 curr = nextRankVal; nextRankVal += rankStats[w] << (w+rescale); - rankVal0[w] = current; + rankVal0[w] = curr; } } { U32 const minBits = tableLog+1 - maxW; U32 consumed; @@ -644,23 +704,16 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, dtd.tableLog = (BYTE)maxTableLog; dtd.tableType = 1; - memcpy(DTable, &dtd, sizeof(dtd)); + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); return iSize; } -size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX2_wksp(DTable, src, srcSize, - workSpace, sizeof(workSpace)); -} - FORCE_INLINE_TEMPLATE U32 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 2); + ZSTD_memcpy(op, dt+val, 2); BIT_skipBits(DStream, dt[val].nbBits); return dt[val].length; } @@ -669,7 +722,7 @@ FORCE_INLINE_TEMPLATE U32 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) { size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 1); + ZSTD_memcpy(op, dt+val, 1); if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); else { if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { @@ -890,20 +943,6 @@ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, } -size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - size_t HUF_decompress4X2_usingDTable( void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, @@ -937,20 +976,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, } -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - #endif /* HUF_FORCE_DECOMPRESS_X1 */ @@ -1051,67 +1076,6 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) } -typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); - -size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) - static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; -#endif - - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); -#else - return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); -#endif - } -} - -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#else - return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; -#endif - } -} - -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - - size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, @@ -1145,8 +1109,8 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, /* validation checks */ if (dstSize == 0) return ERROR(dstSize_tooSmall); if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); #if defined(HUF_FORCE_DECOMPRESS_X1) @@ -1168,14 +1132,6 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, } } -size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { @@ -1199,7 +1155,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS { const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); if (HUF_isError(hSize)) return hSize; if (hSize >= cSrcSize) return ERROR(srcSize_wrong); ip += hSize; cSrcSize -= hSize; @@ -1246,3 +1202,149 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds #endif } } + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX1_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif + + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); +#else + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif + } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#else + return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#endif + } +} + +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +#endif diff --git a/thirdparty/zstd/decompress/zstd_ddict.c b/thirdparty/zstd/decompress/zstd_ddict.c index c8cb8ecc95..f5cc23b387 100644 --- a/thirdparty/zstd/decompress/zstd_ddict.c +++ b/thirdparty/zstd/decompress/zstd_ddict.c @@ -14,7 +14,7 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include <string.h> /* memcpy, memmove, memset */ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/cpu.h" /* bmi2 */ #include "../common/mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY @@ -127,11 +127,11 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, ddict->dictContent = dict; if (!dict) dictSize = 0; } else { - void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); + void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); ddict->dictBuffer = internalBuffer; ddict->dictContent = internalBuffer; if (!internalBuffer) return ERROR(memory_allocation); - memcpy(internalBuffer, dict, dictSize); + ZSTD_memcpy(internalBuffer, dict, dictSize); } ddict->dictSize = dictSize; ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ @@ -147,9 +147,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, ZSTD_customMem customMem) { - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; - { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); if (ddict == NULL) return NULL; ddict->cMem = customMem; { size_t const initResult = ZSTD_initDDict_internal(ddict, @@ -198,7 +198,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict( if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ if (sBufferSize < neededSpace) return NULL; if (dictLoadMethod == ZSTD_dlm_byCopy) { - memcpy(ddict+1, dict, dictSize); /* local copy */ + ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ dict = ddict+1; } if (ZSTD_isError( ZSTD_initDDict_internal(ddict, @@ -213,8 +213,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict) { if (ddict==NULL) return 0; /* support free on NULL */ { ZSTD_customMem const cMem = ddict->cMem; - ZSTD_free(ddict->dictBuffer, cMem); - ZSTD_free(ddict, cMem); + ZSTD_customFree(ddict->dictBuffer, cMem); + ZSTD_customFree(ddict, cMem); return 0; } } diff --git a/thirdparty/zstd/decompress/zstd_ddict.h b/thirdparty/zstd/decompress/zstd_ddict.h index af307efd3d..8906a71c94 100644 --- a/thirdparty/zstd/decompress/zstd_ddict.h +++ b/thirdparty/zstd/decompress/zstd_ddict.h @@ -15,7 +15,7 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include <stddef.h> /* size_t */ +#include "../common/zstd_deps.h" /* size_t */ #include "../zstd.h" /* ZSTD_DDict, and several public functions */ diff --git a/thirdparty/zstd/decompress/zstd_decompress.c b/thirdparty/zstd/decompress/zstd_decompress.c index be5c7cfc33..21f846bc77 100644 --- a/thirdparty/zstd/decompress/zstd_decompress.c +++ b/thirdparty/zstd/decompress/zstd_decompress.c @@ -55,7 +55,7 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include <string.h> /* memcpy, memmove, memset */ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/cpu.h" /* bmi2 */ #include "../common/mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY @@ -94,11 +94,18 @@ static size_t ZSTD_startingInputLength(ZSTD_format_e format) return startingInputLength; } +static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) +{ + assert(dctx->streamStage == zdss_init); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->outBufferMode = ZSTD_bm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; +} + static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) { - dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ dctx->staticSize = 0; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; dctx->ddict = NULL; dctx->ddictLocal = NULL; dctx->dictEnd = NULL; @@ -113,7 +120,8 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->noForwardProgress = 0; dctx->oversizedDuration = 0; dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); - dctx->outBufferMode = ZSTD_obm_buffered; + ZSTD_DCtx_resetParameters(dctx); + dctx->validateChecksum = 1; #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION dctx->dictContentEndForFuzzing = NULL; #endif @@ -134,9 +142,9 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) { - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; - { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem); + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); if (!dctx) return NULL; dctx->customMem = customMem; ZSTD_initDCtx_internal(dctx); @@ -164,13 +172,13 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); { ZSTD_customMem const cMem = dctx->customMem; ZSTD_clearDict(dctx); - ZSTD_free(dctx->inBuff, cMem); + ZSTD_customFree(dctx->inBuff, cMem); dctx->inBuff = NULL; #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (dctx->legacyContext) ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); #endif - ZSTD_free(dctx, cMem); + ZSTD_customFree(dctx, cMem); return 0; } } @@ -179,7 +187,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) { size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); - memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ + ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ } @@ -246,7 +254,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s const BYTE* ip = (const BYTE*)src; size_t const minInputSize = ZSTD_startingInputLength(format); - memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ if (srcSize < minInputSize) return minInputSize; RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); @@ -256,7 +264,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s /* skippable frame */ if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ - memset(zfhPtr, 0, sizeof(*zfhPtr)); + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); zfhPtr->frameType = ZSTD_skippableFrame; return 0; @@ -446,7 +454,8 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), dictionary_wrong, ""); #endif - if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); return 0; } @@ -461,7 +470,7 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) { ZSTD_frameSizeInfo frameSizeInfo; - memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (ZSTD_isLegacy(src, srcSize)) @@ -516,7 +525,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize ip += 4; } - frameSizeInfo.compressedSize = ip - ipstart; + frameSizeInfo.compressedSize = (size_t)(ip - ipstart); frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) ? zfh.frameContentSize : nbBlocks * zfh.blockSizeMax; @@ -579,12 +588,12 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (srcSize == 0) return 0; RETURN_ERROR(dstBuffer_null, ""); } - RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); - memcpy(dst, src, srcSize); + ZSTD_memcpy(dst, src, srcSize); return srcSize; } @@ -592,12 +601,12 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, BYTE b, size_t regenSize) { + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (regenSize == 0) return 0; RETURN_ERROR(dstBuffer_null, ""); } - RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); - memset(dst, b, regenSize); + ZSTD_memset(dst, b, regenSize); return regenSize; } @@ -647,13 +656,13 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, switch(blockProperties.blockType) { case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1); + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); break; case bt_raw : - decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); break; case bt_rle : - decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize); + decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); break; case bt_reserved : default: @@ -661,7 +670,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, } if (ZSTD_isError(decodedSize)) return decodedSize; - if (dctx->fParams.checksumFlag) + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, op, decodedSize); if (decodedSize != 0) op += decodedSize; @@ -676,11 +685,13 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, corruption_detected, ""); } if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ - U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); - U32 checkRead; RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); - checkRead = MEM_readLE32(ip); - RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } ip += 4; remainingSrcSize -= 4; } @@ -688,7 +699,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* Allow caller to get size read */ *srcPtr = ip; *srcSizePtr = remainingSrcSize; - return op-ostart; + return (size_t)(op-ostart); } static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, @@ -721,7 +732,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); if (ZSTD_isError(decodedSize)) return decodedSize; - assert(decodedSize <=- dstCapacity); + assert(decodedSize <= dstCapacity); dst = (BYTE*)dst + decodedSize; dstCapacity -= decodedSize; @@ -761,15 +772,13 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) && (moreThan1Frame==1), srcSize_wrong, - "at least one frame successfully completed, but following " - "bytes are garbage: it's more likely to be a srcSize error, " - "specifying more bytes than compressed size of frame(s). This " - "error message replaces ERROR(prefix_unknown), which would be " - "confusing, as the first header is actually correct. Note that " - "one could be unlucky, it might be a corruption error instead, " - "happening right at the place where we expect zstd magic " - "bytes. But this is _much_ less likely than a srcSize field " - "error."); + "At least one frame successfully completed, " + "but following bytes are garbage: " + "it's more likely to be a srcSize error, " + "specifying more input bytes than size of frame(s). " + "Note: one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic bytes. " + "But this is _much_ less likely than a srcSize field error."); if (ZSTD_isError(res)) return res; assert(res <= dstCapacity); if (res != 0) @@ -781,7 +790,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); - return (BYTE*)dst - (BYTE*)dststart; + return (size_t)((BYTE*)dst - (BYTE*)dststart); } size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, @@ -899,21 +908,21 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c if (dctx->format == ZSTD_f_zstd1) { /* allows header */ assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - memcpy(dctx->headerBuffer, src, srcSize); + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ dctx->stage = ZSTDds_decodeSkippableHeader; return 0; } } dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; - memcpy(dctx->headerBuffer, src, srcSize); + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); dctx->expected = dctx->headerSize - srcSize; dctx->stage = ZSTDds_decodeFrameHeader; return 0; case ZSTDds_decodeFrameHeader: assert(src != NULL); - memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); dctx->expected = ZSTD_blockHeaderSize; dctx->stage = ZSTDds_decodeBlockHeader; @@ -977,7 +986,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; - if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); dctx->previousDstEnd = (char*)dst + rSize; /* Stay on the same stage until we are finished streaming the block. */ @@ -1007,10 +1016,13 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_checkChecksum: assert(srcSize == 4); /* guaranteed by dctx->expected */ - { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); - U32 const check32 = MEM_readLE32(src); - DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); - RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + { + if (dctx->validateChecksum) { + U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; @@ -1019,7 +1031,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_decodeSkippableHeader: assert(src != NULL); assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); - memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ dctx->stage = ZSTDds_skipFrame; return 0; @@ -1075,7 +1087,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, workspace, workspaceSize); #else size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, + dictPtr, (size_t)(dictEnd - dictPtr), workspace, workspaceSize); #endif RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); @@ -1084,40 +1096,46 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, { short offcodeNCount[MaxOff+1]; unsigned offcodeMaxValue = MaxOff, offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->OFTable, offcodeNCount, offcodeMaxValue, OF_base, OF_bits, - offcodeLog); + offcodeLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->MLTable, matchlengthNCount, matchlengthMaxValue, ML_base, ML_bits, - matchlengthLog); + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->LLTable, litlengthNCount, litlengthMaxValue, LL_base, LL_bits, - litlengthLog); + litlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); dictPtr += litlengthHeaderSize; } @@ -1131,7 +1149,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, entropy->rep[i] = rep; } } - return dictPtr - (const BYTE*)dict; + return (size_t)(dictPtr - (const BYTE*)dict); } static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) @@ -1170,7 +1188,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->dictID = 0; dctx->bType = bt_reserved; ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ dctx->LLTptr = dctx->entropy.LLTable; dctx->MLTptr = dctx->entropy.MLTable; dctx->OFTptr = dctx->entropy.OFTable; @@ -1394,7 +1412,7 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) { - return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format); } ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) @@ -1411,8 +1429,12 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); return bounds; case ZSTD_d_stableOutBuffer: - bounds.lowerBound = (int)ZSTD_obm_buffered; - bounds.upperBound = (int)ZSTD_obm_stable; + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; return bounds; default:; } @@ -1436,6 +1458,26 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ } +size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) +{ + switch (param) { + case ZSTD_d_windowLogMax: + *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize); + return 0; + case ZSTD_d_format: + *value = (int)dctx->format; + return 0; + case ZSTD_d_stableOutBuffer: + *value = (int)dctx->outBufferMode; + return 0; + case ZSTD_d_forceIgnoreChecksum: + *value = (int)dctx->forceIgnoreChecksum; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) { RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); @@ -1451,7 +1493,11 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value return 0; case ZSTD_d_stableOutBuffer: CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); - dctx->outBufferMode = (ZSTD_outBufferMode_e)value; + dctx->outBufferMode = (ZSTD_bufferMode_e)value; + return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; return 0; default:; } @@ -1469,8 +1515,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) || (reset == ZSTD_reset_session_and_parameters) ) { RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); ZSTD_clearDict(dctx); - dctx->format = ZSTD_f_zstd1; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + ZSTD_DCtx_resetParameters(dctx); } return 0; } @@ -1524,7 +1569,7 @@ static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const ne { if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) zds->oversizedDuration++; - else + else zds->oversizedDuration = 0; } @@ -1538,7 +1583,7 @@ static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* { ZSTD_outBuffer const expect = zds->expectedOutBuffer; /* No requirement when ZSTD_obm_stable is not enabled. */ - if (zds->outBufferMode != ZSTD_obm_stable) + if (zds->outBufferMode != ZSTD_bm_stable) return 0; /* Any buffer is allowed in zdss_init, this must be the same for every other call until * the context is reset. @@ -1548,7 +1593,7 @@ static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* /* The buffer must match our expectation exactly. */ if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) return 0; - RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!"); + RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!"); } /* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() @@ -1560,7 +1605,7 @@ static size_t ZSTD_decompressContinueStream( ZSTD_DStream* zds, char** op, char* oend, void const* src, size_t srcSize) { int const isSkipFrame = ZSTD_isSkipFrame(zds); - if (zds->outBufferMode == ZSTD_obm_buffered) { + if (zds->outBufferMode == ZSTD_bm_buffered) { size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; size_t const decodedSize = ZSTD_decompressContinue(zds, zds->outBuff + zds->outStart, dstSize, src, srcSize); @@ -1573,14 +1618,14 @@ static size_t ZSTD_decompressContinueStream( } } else { /* Write directly into the output buffer */ - size_t const dstSize = isSkipFrame ? 0 : oend - *op; + size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); FORWARD_IF_ERROR(decodedSize, ""); *op += decodedSize; /* Flushing is not needed. */ zds->streamStage = zdss_read; assert(*op <= oend); - assert(zds->outBufferMode == ZSTD_obm_stable); + assert(zds->outBufferMode == ZSTD_bm_stable); } return 0; } @@ -1663,14 +1708,14 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB assert(iend >= ip); if (toLoad > remainingInput) { /* not enough input to load full header */ if (remainingInput > 0) { - memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); zds->lhSize += remainingInput; } input->pos = input->size; return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ } assert(ip != NULL); - memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; break; } } @@ -1678,10 +1723,10 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN && zds->fParams.frameType != ZSTD_skippableFrame && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { - size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); + size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); if (cSize <= (size_t)(iend-istart)) { /* shortcut : using single-pass mode */ - size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds)); + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); if (ZSTD_isError(decompressedSize)) return decompressedSize; DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") ip = istart + cSize; @@ -1693,7 +1738,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB } } /* Check output buffer is large enough for ZSTD_odm_stable. */ - if (zds->outBufferMode == ZSTD_obm_stable + if (zds->outBufferMode == ZSTD_bm_stable && zds->fParams.frameType != ZSTD_skippableFrame && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { @@ -1723,7 +1768,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB /* Adapt buffer sizes to frame header instructions */ { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); - size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) : 0; @@ -1731,7 +1776,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); - + if (tooSmall || tooLarge) { size_t const bufferSize = neededInBuffSize + neededOutBuffSize; DEBUGLOG(4, "inBuff : from %u to %u", @@ -1745,10 +1790,10 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), memory_allocation, ""); } else { - ZSTD_free(zds->inBuff, zds->customMem); + ZSTD_customFree(zds->inBuff, zds->customMem); zds->inBuffSize = 0; zds->outBuffSize = 0; - zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); + zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem); RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); } zds->inBuffSize = neededInBuffSize; @@ -1760,7 +1805,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB case zdss_read: DEBUGLOG(5, "stage zdss_read"); - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)); DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); if (neededInSize==0) { /* end of frame */ zds->streamStage = zdss_init; @@ -1790,7 +1835,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, corruption_detected, "should never happen"); - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); } ip += loadedSize; zds->inPos += loadedSize; @@ -1804,7 +1849,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB } case zdss_flush: { size_t const toFlushSize = zds->outEnd - zds->outStart; - size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); + size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); op += flushedSize; zds->outStart += flushedSize; if (flushedSize == toFlushSize) { /* flush completed */ diff --git a/thirdparty/zstd/decompress/zstd_decompress_block.c b/thirdparty/zstd/decompress/zstd_decompress_block.c index ad3b3d8dbd..19cbdc5c16 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_block.c +++ b/thirdparty/zstd/decompress/zstd_decompress_block.c @@ -14,7 +14,7 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include <string.h> /* memcpy, memmove, memset */ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ #include "../common/compiler.h" /* prefetch */ #include "../common/cpu.h" /* bmi2 */ #include "../common/mem.h" /* low level memory routines */ @@ -44,7 +44,7 @@ /*_******************************************************* * Memory operations **********************************************************/ -static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } +static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } /*-************************************************************* @@ -166,7 +166,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, dctx->litSize = litSize; dctx->litEntropy = 1; if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); return litCSize + lhSize; } @@ -191,10 +191,10 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); - memcpy(dctx->litBuffer, istart+lhSize, litSize); + ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); return lhSize+litSize; } /* direct reference into compressed stream */ @@ -223,7 +223,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, break; } RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); - memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; return lhSize+1; @@ -236,7 +236,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, /* Default FSE distribution tables. * These are pre-calculated FSE decoding tables using default distributions as defined in specification : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions * They were generated programmatically with following method : * - start from default distributions, present in /lib/common/zstd_internal.h * - generate tables normally, using ZSTD_buildFSETable() @@ -364,23 +364,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB * generate FSE decoding table for one symbol (ll, ml or off) * cannot fail if input is valid => * all inputs are presumed validated at this stage */ -void -ZSTD_buildFSETable(ZSTD_seqSymbol* dt, +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog) + unsigned tableLog, void* wksp, size_t wkspSize) { ZSTD_seqSymbol* const tableDecode = dt+1; - U16 symbolNext[MaxSeq+1]; - U32 const maxSV1 = maxSymbolValue + 1; U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize-1; + + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; + /* Sanity Checks */ assert(maxSymbolValue <= MaxSeq); assert(tableLog <= MaxFSELog); - + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; /* Init, lay down lowprob symbols */ { ZSTD_seqSymbol_header DTableH; DTableH.tableLog = tableLog; @@ -396,16 +399,69 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, assert(normalizedCounter[s]>=0); symbolNext[s] = (U16)normalizedCounter[s]; } } } - memcpy(dt, &DTableH, sizeof(DTableH)); + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); } /* Spread symbols */ - { U32 const tableMask = tableSize-1; + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s<maxSV1; ++s, sv += add) { + int i; + int const n = normalizedCounter[s]; + MEM_write64(spread + pos, sv); + for (i = 8; i < n; i += 8) { + MEM_write64(spread + pos + i, sv); + } + pos += n; + } + } + /* Now we spread those positions across the table. + * The benefit of doing it in two stages is that we avoid the the + * variable size inner loop, which caused lots of branch misses. + * Now we can run through all the positions without any branch misses. + * We unroll the loop twice, since that is what emperically worked best. + */ + { + size_t position = 0; + size_t s; + size_t const unroll = 2; + assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ + for (s = 0; s < (size_t)tableSize; s += unroll) { + size_t u; + for (u = 0; u < unroll; ++u) { + size_t const uPosition = (position + (u * step)) & tableMask; + tableDecode[uPosition].baseValue = spread[s + u]; + } + position = (position + (unroll * step)) & tableMask; + } + assert(position == 0); + } + } else { + U32 const tableMask = tableSize-1; U32 const step = FSE_TABLESTEP(tableSize); U32 s, position = 0; for (s=0; s<maxSV1; s++) { int i; - for (i=0; i<normalizedCounter[s]; i++) { + int const n = normalizedCounter[s]; + for (i=0; i<n; i++) { tableDecode[position].baseValue = s; position = (position + step) & tableMask; while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ @@ -414,7 +470,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, } /* Build Decoding table */ - { U32 u; + { + U32 u; for (u=0; u<tableSize; u++) { U32 const symbol = tableDecode[u].baseValue; U32 const nextState = symbolNext[symbol]++; @@ -423,7 +480,46 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, assert(nbAdditionalBits[symbol] < 255); tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol]; tableDecode[u].baseValue = baseValue[symbol]; - } } + } + } +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, + baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue, + baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); +} +#endif + +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue, + baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); + return; + } +#endif + (void)bmi2; + ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue, + baseValue, nbAdditionalBits, tableLog, wksp, wkspSize); } @@ -435,7 +531,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb const void* src, size_t srcSize, const U32* baseValue, const U32* nbAdditionalBits, const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable, - int ddictIsCold, int nbSeq) + int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize, + int bmi2) { switch(type) { @@ -467,7 +564,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); - ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); *DTablePtr = DTableSpace; return headerSize; } @@ -499,7 +596,8 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, if (nbSeq > 0x7F) { if (nbSeq == 0xFF) { RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); - nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + nbSeq = MEM_readLE16(ip) + LONGNBSEQ; + ip+=2; } else { RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); nbSeq = ((nbSeq-0x80)<<8) + *ip++; @@ -520,7 +618,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, LL_base, LL_bits, LL_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += llhSize; } @@ -530,7 +630,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, OF_base, OF_bits, OF_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += ofhSize; } @@ -540,7 +642,9 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, ML_base, ML_bits, ML_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += mlhSize; } @@ -686,12 +790,12 @@ size_t ZSTD_execSequenceEnd(BYTE* op, RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); match = dictEnd - (prefixStart-match); if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); + ZSTD_memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; } /* span extDict & currentPrefixSegment */ { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); + ZSTD_memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; match = prefixStart; @@ -752,12 +856,12 @@ size_t ZSTD_execSequence(BYTE* op, RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); match = dictEnd + (match - prefixStart); if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); + ZSTD_memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; } /* span extDict & currentPrefixSegment */ { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); + ZSTD_memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; match = prefixStart; @@ -948,7 +1052,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c } #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) { size_t const windowSize = dctx->fParams.windowSize; /* No dictionary used. */ @@ -969,6 +1073,7 @@ MEM_STATIC void ZSTD_assertValidSequence( seq_t const seq, BYTE const* prefixStart, BYTE const* virtualStart) { +#if DEBUGLEVEL >= 1 size_t const windowSize = dctx->fParams.windowSize; size_t const sequenceSize = seq.litLength + seq.matchLength; BYTE const* const oLitEnd = op + seq.litLength; @@ -986,6 +1091,9 @@ MEM_STATIC void ZSTD_assertValidSequence( /* Offset must be within our window. */ assert(seq.offset <= windowSize); } +#else + (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; +#endif } #endif @@ -1080,14 +1188,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, #endif DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); BIT_reloadDStream(&(seqState.DStream)); + op += oneSeqSize; /* gcc and clang both don't like early returns in this loop. - * gcc doesn't like early breaks either. - * Instead save an error and report it at the end. - * When there is an error, don't increment op, so we don't - * overwrite. + * Instead break and check for an error at the end of the loop. */ - if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize; - else op += oneSeqSize; + if (UNLIKELY(ZSTD_isError(oneSeqSize))) { + error = oneSeqSize; + break; + } if (UNLIKELY(!--nbSeq)) break; } @@ -1104,7 +1212,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, { size_t const lastLLSize = litEnd - litPtr; RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { - memcpy(op, litPtr, lastLLSize); + ZSTD_memcpy(op, litPtr, lastLLSize); op += lastLLSize; } } @@ -1209,7 +1317,7 @@ ZSTD_decompressSequencesLong_body( { size_t const lastLLSize = litEnd - litPtr; RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); if (op != NULL) { - memcpy(op, litPtr, lastLLSize); + ZSTD_memcpy(op, litPtr, lastLLSize); op += lastLLSize; } } diff --git a/thirdparty/zstd/decompress/zstd_decompress_block.h b/thirdparty/zstd/decompress/zstd_decompress_block.h index bf39b7350c..b5715c168e 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_block.h +++ b/thirdparty/zstd/decompress/zstd_decompress_block.h @@ -15,7 +15,7 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include <stddef.h> /* size_t */ +#include "../common/zstd_deps.h" /* size_t */ #include "../zstd.h" /* DCtx, and some public functions */ #include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */ #include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */ @@ -48,12 +48,15 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, * this function must be called with valid parameters only * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. * Internal use only. */ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog); + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); #endif /* ZSTD_DEC_BLOCK_H */ diff --git a/thirdparty/zstd/decompress/zstd_decompress_internal.h b/thirdparty/zstd/decompress/zstd_decompress_internal.h index 9ad96c5548..f80b471e99 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_internal.h +++ b/thirdparty/zstd/decompress/zstd_decompress_internal.h @@ -27,26 +27,26 @@ /*-******************************************************* * Constants *********************************************************/ -static const U32 LL_base[MaxLL+1] = { +static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000 }; -static const U32 OF_base[MaxOff+1] = { +static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; -static const U32 OF_bits[MaxOff+1] = { +static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 }; -static const U32 ML_base[MaxML+1] = { +static UNUSED_ATTR const U32 ML_base[MaxML+1] = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, @@ -73,12 +73,16 @@ static const U32 ML_base[MaxML+1] = { #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) + typedef struct { ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; } ZSTD_entropyDTables_t; typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, @@ -95,11 +99,6 @@ typedef enum { ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ } ZSTD_dictUses_e; -typedef enum { - ZSTD_obm_buffered = 0, /* Buffer the output */ - ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ -} ZSTD_outBufferMode_e; - struct ZSTD_DCtx_s { const ZSTD_seqSymbol* LLTptr; @@ -122,6 +121,8 @@ struct ZSTD_DCtx_s XXH64_state_t xxhState; size_t headerSize; ZSTD_format_e format; + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ const BYTE* litPtr; ZSTD_customMem customMem; size_t litSize; @@ -152,7 +153,7 @@ struct ZSTD_DCtx_s U32 legacyVersion; U32 hostageByte; int noForwardProgress; - ZSTD_outBufferMode_e outBufferMode; + ZSTD_bufferMode_e outBufferMode; ZSTD_outBuffer expectedOutBuffer; /* workspace */ |