diff options
Diffstat (limited to 'thirdparty/zstd/decompress/huf_decompress.c')
| -rw-r--r-- | thirdparty/zstd/decompress/huf_decompress.c | 502 | 
1 files changed, 302 insertions, 200 deletions
| diff --git a/thirdparty/zstd/decompress/huf_decompress.c b/thirdparty/zstd/decompress/huf_decompress.c index 68293a1309..1418206718 100644 --- a/thirdparty/zstd/decompress/huf_decompress.c +++ b/thirdparty/zstd/decompress/huf_decompress.c @@ -15,7 +15,7 @@  /* **************************************************************  *  Dependencies  ****************************************************************/ -#include <string.h>     /* memcpy, memset */ +#include "../common/zstd_deps.h"  /* ZSTD_memcpy, ZSTD_memset */  #include "../common/compiler.h"  #include "../common/bitstream.h"  /* BIT_* */  #include "../common/fse.h"        /* to compress headers */ @@ -103,7 +103,7 @@ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved;  static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)  {      DTableDesc dtd; -    memcpy(&dtd, table, sizeof(dtd)); +    ZSTD_memcpy(&dtd, table, sizeof(dtd));      return dtd;  } @@ -115,29 +115,51 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)  /*-***************************/  typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */ +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { +    U64 D4; +    if (MEM_isLittleEndian()) { +        D4 = symbol + (nbBits << 8); +    } else { +        D4 = (symbol << 8) + nbBits; +    } +    D4 *= 0x0001000100010001ULL; +    return D4; +} + +typedef struct { +        U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; +        U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; +        U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; +        BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; +        BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + +  size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)  { +    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) +{      U32 tableLog = 0;      U32 nbSymbols = 0;      size_t iSize;      void* const dtPtr = DTable + 1;      HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; +    HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; -    U32* rankVal; -    BYTE* huffWeight; -    size_t spaceUsed32 = 0; - -    rankVal = (U32 *)workSpace + spaceUsed32; -    spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; -    huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); -    spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - -    if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); +    DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); +    if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);      DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); -    /* memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */ +    /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */ -    iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); +    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);      if (HUF_isError(iSize)) return iSize;      /* Table header */ @@ -145,52 +167,117 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize          if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */          dtd.tableType = 0;          dtd.tableLog = (BYTE)tableLog; -        memcpy(DTable, &dtd, sizeof(dtd)); +        ZSTD_memcpy(DTable, &dtd, sizeof(dtd));      } -    /* Calculate starting value for each rank */ -    {   U32 n, nextRankStart = 0; -        for (n=1; n<tableLog+1; n++) { -            U32 const current = nextRankStart; -            nextRankStart += (rankVal[n] << (n-1)); -            rankVal[n] = current; -    }   } +    /* Compute symbols and rankStart given rankVal: +     * +     * rankVal already contains the number of values of each weight. +     * +     * symbols contains the symbols ordered by weight. First are the rankVal[0] +     * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. +     * symbols[0] is filled (but unused) to avoid a branch. +     * +     * rankStart contains the offset where each rank belongs in the DTable. +     * rankStart[0] is not filled because there are no entries in the table for +     * weight 0. +     */ +    { +        int n; +        int nextRankStart = 0; +        int const unroll = 4; +        int const nLimit = (int)nbSymbols - unroll + 1; +        for (n=0; n<(int)tableLog+1; n++) { +            U32 const curr = nextRankStart; +            nextRankStart += wksp->rankVal[n]; +            wksp->rankStart[n] = curr; +        } +        for (n=0; n < nLimit; n += unroll) { +            int u; +            for (u=0; u < unroll; ++u) { +                size_t const w = wksp->huffWeight[n+u]; +                wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); +            } +        } +        for (; n < (int)nbSymbols; ++n) { +            size_t const w = wksp->huffWeight[n]; +            wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; +        } +    } -    /* fill DTable */ -    {   U32 n; -        size_t const nEnd = nbSymbols; -        for (n=0; n<nEnd; n++) { -            size_t const w = huffWeight[n]; -            size_t const length = (1 << w) >> 1; -            size_t const uStart = rankVal[w]; -            size_t const uEnd = uStart + length; -            size_t u; -            HUF_DEltX1 D; -            D.byte = (BYTE)n; -            D.nbBits = (BYTE)(tableLog + 1 - w); -            rankVal[w] = (U32)uEnd; -            if (length < 4) { -                /* Use length in the loop bound so the compiler knows it is short. */ -                for (u = 0; u < length; ++u) -                    dt[uStart + u] = D; -            } else { -                /* Unroll the loop 4 times, we know it is a power of 2. */ -                for (u = uStart; u < uEnd; u += 4) { -                    dt[u + 0] = D; -                    dt[u + 1] = D; -                    dt[u + 2] = D; -                    dt[u + 3] = D; -    }   }   }   } +    /* fill DTable +     * We fill all entries of each weight in order. +     * That way length is a constant for each iteration of the outter loop. +     * We can switch based on the length to a different inner loop which is +     * optimized for that particular case. +     */ +    { +        U32 w; +        int symbol=wksp->rankVal[0]; +        int rankStart=0; +        for (w=1; w<tableLog+1; ++w) { +            int const symbolCount = wksp->rankVal[w]; +            int const length = (1 << w) >> 1; +            int uStart = rankStart; +            BYTE const nbBits = (BYTE)(tableLog + 1 - w); +            int s; +            int u; +            switch (length) { +            case 1: +                for (s=0; s<symbolCount; ++s) { +                    HUF_DEltX1 D; +                    D.byte = wksp->symbols[symbol + s]; +                    D.nbBits = nbBits; +                    dt[uStart] = D; +                    uStart += 1; +                } +                break; +            case 2: +                for (s=0; s<symbolCount; ++s) { +                    HUF_DEltX1 D; +                    D.byte = wksp->symbols[symbol + s]; +                    D.nbBits = nbBits; +                    dt[uStart+0] = D; +                    dt[uStart+1] = D; +                    uStart += 2; +                } +                break; +            case 4: +                for (s=0; s<symbolCount; ++s) { +                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); +                    MEM_write64(dt + uStart, D4); +                    uStart += 4; +                } +                break; +            case 8: +                for (s=0; s<symbolCount; ++s) { +                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); +                    MEM_write64(dt + uStart, D4); +                    MEM_write64(dt + uStart + 4, D4); +                    uStart += 8; +                } +                break; +            default: +                for (s=0; s<symbolCount; ++s) { +                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits); +                    for (u=0; u < length; u += 16) { +                        MEM_write64(dt + uStart + u + 0, D4); +                        MEM_write64(dt + uStart + u + 4, D4); +                        MEM_write64(dt + uStart + u + 8, D4); +                        MEM_write64(dt + uStart + u + 12, D4); +                    } +                    assert(u == length); +                    uStart += length; +                } +                break; +            } +            symbol += symbolCount; +            rankStart += symbolCount * length; +        } +    }      return iSize;  } -size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) -{ -    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; -    return HUF_readDTableX1_wksp(DTable, src, srcSize, -                                 workSpace, sizeof(workSpace)); -} -  FORCE_INLINE_TEMPLATE BYTE  HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)  { @@ -389,20 +476,6 @@ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,  } -size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, -                              const void* cSrc, size_t cSrcSize) -{ -    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; -    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, -                                       workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); -    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); -} -  size_t HUF_decompress4X1_usingDTable(            void* dst,  size_t dstSize,      const void* cSrc, size_t cSrcSize, @@ -419,8 +492,7 @@ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size  {      const BYTE* ip = (const BYTE*) cSrc; -    size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, -                                                workSpace, wkspSize); +    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);      if (HUF_isError(hSize)) return hSize;      if (hSize >= cSrcSize) return ERROR(srcSize_wrong);      ip += hSize; cSrcSize -= hSize; @@ -436,18 +508,6 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,  } -size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; -    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, -                                       workSpace, sizeof(workSpace)); -} -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); -    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} -  #endif /* HUF_FORCE_DECOMPRESS_X2 */ @@ -474,7 +534,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 co      U32 rankVal[HUF_TABLELOG_MAX + 1];      /* get pre-calculated rankVal */ -    memcpy(rankVal, rankValOrigin, sizeof(rankVal)); +    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));      /* fill skipped values */      if (minWeight>1) { @@ -516,7 +576,7 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,      const U32 minBits  = nbBitsBaseline - maxWeight;      U32 s; -    memcpy(rankVal, rankValOrigin, sizeof(rankVal)); +    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));      /* fill DTable */      for (s=0; s<sortedListSize; s++) { @@ -581,11 +641,11 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,      if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);      rankStart = rankStart0 + 1; -    memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); +    ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));      DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */      if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); -    /* memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */ +    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */      iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);      if (HUF_isError(iSize)) return iSize; @@ -599,9 +659,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,      /* Get start index of each weight */      {   U32 w, nextRankStart = 0;          for (w=1; w<maxW+1; w++) { -            U32 current = nextRankStart; +            U32 curr = nextRankStart;              nextRankStart += rankStats[w]; -            rankStart[w] = current; +            rankStart[w] = curr;          }          rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/          sizeOfSort = nextRankStart; @@ -624,9 +684,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,              U32 nextRankVal = 0;              U32 w;              for (w=1; w<maxW+1; w++) { -                U32 current = nextRankVal; +                U32 curr = nextRankVal;                  nextRankVal += rankStats[w] << (w+rescale); -                rankVal0[w] = current; +                rankVal0[w] = curr;          }   }          {   U32 const minBits = tableLog+1 - maxW;              U32 consumed; @@ -644,23 +704,16 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,      dtd.tableLog = (BYTE)maxTableLog;      dtd.tableType = 1; -    memcpy(DTable, &dtd, sizeof(dtd)); +    ZSTD_memcpy(DTable, &dtd, sizeof(dtd));      return iSize;  } -size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) -{ -  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; -  return HUF_readDTableX2_wksp(DTable, src, srcSize, -                               workSpace, sizeof(workSpace)); -} -  FORCE_INLINE_TEMPLATE U32  HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)  {      size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ -    memcpy(op, dt+val, 2); +    ZSTD_memcpy(op, dt+val, 2);      BIT_skipBits(DStream, dt[val].nbBits);      return dt[val].length;  } @@ -669,7 +722,7 @@ FORCE_INLINE_TEMPLATE U32  HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)  {      size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */ -    memcpy(op, dt+val, 1); +    ZSTD_memcpy(op, dt+val, 1);      if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);      else {          if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { @@ -890,20 +943,6 @@ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,  } -size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, -                              const void* cSrc, size_t cSrcSize) -{ -    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; -    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, -                                       workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); -    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} -  size_t HUF_decompress4X2_usingDTable(            void* dst,  size_t dstSize,      const void* cSrc, size_t cSrcSize, @@ -937,20 +976,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,  } -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, -                              const void* cSrc, size_t cSrcSize) -{ -    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; -    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, -                                       workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); -    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} -  #endif /* HUF_FORCE_DECOMPRESS_X1 */ @@ -1051,67 +1076,6 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)  } -typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); - -size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) -    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; -#endif - -    /* validation checks */ -    if (dstSize == 0) return ERROR(dstSize_tooSmall); -    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */ -    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */ -    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */ - -    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) -        (void)algoNb; -        assert(algoNb == 0); -        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) -        (void)algoNb; -        assert(algoNb == 1); -        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); -#else -        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); -#endif -    } -} - -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -    /* validation checks */ -    if (dstSize == 0) return ERROR(dstSize_tooSmall); -    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */ -    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */ -    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */ - -    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) -        (void)algoNb; -        assert(algoNb == 0); -        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) -        (void)algoNb; -        assert(algoNb == 1); -        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#else -        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : -                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; -#endif -    } -} - -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; -    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, -                                         workSpace, sizeof(workSpace)); -} - -  size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,                                       size_t dstSize, const void* cSrc,                                       size_t cSrcSize, void* workSpace, @@ -1145,8 +1109,8 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,      /* validation checks */      if (dstSize == 0) return ERROR(dstSize_tooSmall);      if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */ -    if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */ -    if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */ +    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */ +    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */      {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);  #if defined(HUF_FORCE_DECOMPRESS_X1) @@ -1168,14 +1132,6 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,      }  } -size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, -                             const void* cSrc, size_t cSrcSize) -{ -    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; -    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, -                                      workSpace, sizeof(workSpace)); -} -  size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)  { @@ -1199,7 +1155,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS  {      const BYTE* ip = (const BYTE*) cSrc; -    size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); +    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);      if (HUF_isError(hSize)) return hSize;      if (hSize >= cSrcSize) return ERROR(srcSize_wrong);      ip += hSize; cSrcSize -= hSize; @@ -1246,3 +1202,149 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds  #endif      }  } + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) +{ +    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; +    return HUF_readDTableX1_wksp(DTable, src, srcSize, +                                 workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, +                              const void* cSrc, size_t cSrcSize) +{ +    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; +    return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, +                                       workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); +    return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif  + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ +  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; +  return HUF_readDTableX2_wksp(DTable, src, srcSize, +                               workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, +                              const void* cSrc, size_t cSrcSize) +{ +    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; +    return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, +                                       workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); +    return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; +    return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, +                                       workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +    HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); +    return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, +                              const void* cSrc, size_t cSrcSize) +{ +    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; +    return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, +                                       workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +    HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); +    return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) +    static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif + +    /* validation checks */ +    if (dstSize == 0) return ERROR(dstSize_tooSmall); +    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */ +    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */ +    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */ + +    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) +        (void)algoNb; +        assert(algoNb == 0); +        return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) +        (void)algoNb; +        assert(algoNb == 1); +        return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); +#else +        return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif +    } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +    /* validation checks */ +    if (dstSize == 0) return ERROR(dstSize_tooSmall); +    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */ +    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */ +    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */ + +    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) +        (void)algoNb; +        assert(algoNb == 0); +        return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) +        (void)algoNb; +        assert(algoNb == 1); +        return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#else +        return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : +                        HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#endif +    } +} + +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; +    return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, +                                         workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, +                             const void* cSrc, size_t cSrcSize) +{ +    U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; +    return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, +                                      workSpace, sizeof(workSpace)); +} +#endif |