diff options
Diffstat (limited to 'thirdparty/zstd/compress/zstd_compress_internal.h')
| -rw-r--r-- | thirdparty/zstd/compress/zstd_compress_internal.h | 257 | 
1 files changed, 174 insertions, 83 deletions
diff --git a/thirdparty/zstd/compress/zstd_compress_internal.h b/thirdparty/zstd/compress/zstd_compress_internal.h index 3b04fd09f6..c406e794bd 100644 --- a/thirdparty/zstd/compress/zstd_compress_internal.h +++ b/thirdparty/zstd/compress/zstd_compress_internal.h @@ -63,7 +63,7 @@ typedef struct {  } ZSTD_localDict;  typedef struct { -    HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)]; +    HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];      HUF_repeat repeatMode;  } ZSTD_hufCTables_t; @@ -129,7 +129,7 @@ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,  *********************************/  typedef struct { -    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */ +    U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */      U32 len;            /* Raw length of match */  } ZSTD_match_t; @@ -179,7 +179,7 @@ typedef struct {      U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */      ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */      const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */ -    ZSTD_literalCompressionMode_e literalCompressionMode; +    ZSTD_paramSwitch_e literalCompressionMode;  } optState_t;  typedef struct { @@ -199,6 +199,8 @@ typedef struct {                                  */  } ZSTD_window_t; +#define ZSTD_WINDOW_START_INDEX 2 +  typedef struct ZSTD_matchState_t ZSTD_matchState_t;  #define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */ @@ -264,7 +266,7 @@ typedef struct {  } ldmState_t;  typedef struct { -    U32 enableLdm;          /* 1 if enable long distance matching */ +    ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */      U32 hashLog;            /* Log size of hashTable */      U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */      U32 minMatchLength;     /* Minimum match length */ @@ -295,7 +297,7 @@ struct ZSTD_CCtx_params_s {                                  * There is no guarantee that hint is close to actual source size */      ZSTD_dictAttachPref_e attachDictPref; -    ZSTD_literalCompressionMode_e literalCompressionMode; +    ZSTD_paramSwitch_e literalCompressionMode;      /* Multithreading: used to pass parameters to mtctx */      int nbWorkers; @@ -318,10 +320,10 @@ struct ZSTD_CCtx_params_s {      int validateSequences;      /* Block splitting */ -    int splitBlocks; +    ZSTD_paramSwitch_e useBlockSplitter;      /* Param for deciding whether to use row-based matchfinder */ -    ZSTD_useRowMatchFinderMode_e useRowMatchFinder; +    ZSTD_paramSwitch_e useRowMatchFinder;      /* Always load a dictionary in ext-dict mode (not prefix mode)? */      int deterministicRefPrefix; @@ -343,6 +345,22 @@ typedef enum {      ZSTDb_buffered  } ZSTD_buffered_policy_e; +/** + * Struct that contains all elements of block splitter that should be allocated + * in a wksp. + */ +#define ZSTD_MAX_NB_BLOCK_SPLITS 196 +typedef struct { +    seqStore_t fullSeqStoreChunk; +    seqStore_t firstHalfSeqStore; +    seqStore_t secondHalfSeqStore; +    seqStore_t currSeqStore; +    seqStore_t nextSeqStore; + +    U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; +    ZSTD_entropyCTablesMetadata_t entropyMetadata; +} ZSTD_blockSplitCtx; +  struct ZSTD_CCtx_s {      ZSTD_compressionStage_e stage;      int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ @@ -374,7 +392,7 @@ struct ZSTD_CCtx_s {      ZSTD_blockState_t blockState;      U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ -    /* Wether we are streaming or not */ +    /* Whether we are streaming or not */      ZSTD_buffered_policy_e bufferedPolicy;      /* streaming */ @@ -408,6 +426,9 @@ struct ZSTD_CCtx_s {  #if ZSTD_TRACE      ZSTD_TraceCtx traceCtx;  #endif + +    /* Workspace for block splitter */ +    ZSTD_blockSplitCtx blockSplitCtx;  };  typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; @@ -442,7 +463,7 @@ typedef enum {  typedef size_t (*ZSTD_blockCompressor) (          ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],          void const* src, size_t srcSize); -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);  MEM_STATIC U32 ZSTD_LLcode(U32 litLength) @@ -476,31 +497,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)      return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];  } -typedef struct repcodes_s { -    U32 rep[3]; -} repcodes_t; - -MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) -{ -    repcodes_t newReps; -    if (offset >= ZSTD_REP_NUM) {  /* full offset */ -        newReps.rep[2] = rep[1]; -        newReps.rep[1] = rep[0]; -        newReps.rep[0] = offset - ZSTD_REP_MOVE; -    } else {   /* repcode */ -        U32 const repCode = offset + ll0; -        if (repCode > 0) {  /* note : if repCode==0, no change */ -            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; -            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; -            newReps.rep[1] = rep[0]; -            newReps.rep[0] = currentOffset; -        } else {   /* repCode == 0 */ -            ZSTD_memcpy(&newReps, rep, sizeof(newReps)); -        } -    } -    return newReps; -} -  /* ZSTD_cParam_withinBounds:   * @return 1 if value is within cParam bounds,   * 0 otherwise */ @@ -549,17 +545,17 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)      return (srcSize >> minlog) + 2;  } -MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)  {      switch (cctxParams->literalCompressionMode) { -    case ZSTD_lcm_huffman: +    case ZSTD_ps_enable:          return 0; -    case ZSTD_lcm_uncompressed: +    case ZSTD_ps_disable:          return 1;      default:          assert(0 /* impossible: pre-validated */); -        /* fall-through */ -    case ZSTD_lcm_auto: +        ZSTD_FALLTHROUGH; +    case ZSTD_ps_auto:          return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);      }  } @@ -569,7 +565,9 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam   *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single   *  large copies.   */ -static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { +static void +ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) +{      assert(iend > ilimit_w);      if (ip <= ilimit_w) {          ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); @@ -579,14 +577,30 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie      while (ip < iend) *op++ = *ip++;  } +#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1) +#define STORE_REPCODE_1 STORE_REPCODE(1) +#define STORE_REPCODE_2 STORE_REPCODE(2) +#define STORE_REPCODE_3 STORE_REPCODE(3) +#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) +#define STORE_OFFSET(o)  (assert((o)>0), o + ZSTD_REP_MOVE) +#define STORED_IS_OFFSET(o)  ((o) > ZSTD_REP_MOVE) +#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) +#define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) +#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */ +#define STORED_TO_OFFBASE(o) ((o)+1) +#define OFFBASE_TO_STORED(o) ((o)-1) +  /*! ZSTD_storeSeq() : - *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. - *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). - *  `mlBase` : matchLength - MINMATCH + *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. + *  @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). + *  @matchLength : must be >= MINMATCH   *  Allowed to overread literals up to litLimit.  */ -HINT_INLINE UNUSED_ATTR -void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) +HINT_INLINE UNUSED_ATTR void +ZSTD_storeSeq(seqStore_t* seqStorePtr, +              size_t litLength, const BYTE* literals, const BYTE* litLimit, +              U32 offBase_minus1, +              size_t matchLength)  {      BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;      BYTE const* const litEnd = literals + litLength; @@ -595,7 +609,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera      if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */      {   U32 const pos = (U32)((const BYTE*)literals - g_start);          DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", -               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); +               pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);      }  #endif      assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); @@ -626,19 +640,59 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera      seqStorePtr->sequences[0].litLength = (U16)litLength;      /* match offset */ -    seqStorePtr->sequences[0].offset = offCode + 1; +    seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);      /* match Length */ -    if (mlBase>0xFFFF) { -        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ -        seqStorePtr->longLengthType = ZSTD_llt_matchLength; -        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); +    assert(matchLength >= MINMATCH); +    {   size_t const mlBase = matchLength - MINMATCH; +        if (mlBase>0xFFFF) { +            assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ +            seqStorePtr->longLengthType = ZSTD_llt_matchLength; +            seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); +        } +        seqStorePtr->sequences[0].mlBase = (U16)mlBase;      } -    seqStorePtr->sequences[0].matchLength = (U16)mlBase;      seqStorePtr->sequences++;  } +/* ZSTD_updateRep() : + * updates in-place @rep (array of repeat offsets) + * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() + */ +MEM_STATIC void +ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) +{ +    if (STORED_IS_OFFSET(offBase_minus1)) {  /* full offset */ +        rep[2] = rep[1]; +        rep[1] = rep[0]; +        rep[0] = STORED_OFFSET(offBase_minus1); +    } else {   /* repcode */ +        U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; +        if (repCode > 0) {  /* note : if repCode==0, no change */ +            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; +            rep[2] = (repCode >= 2) ? rep[1] : rep[2]; +            rep[1] = rep[0]; +            rep[0] = currentOffset; +        } else {   /* repCode == 0 */ +            /* nothing to do */ +        } +    } +} + +typedef struct repcodes_s { +    U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t +ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) +{ +    repcodes_t newReps; +    ZSTD_memcpy(&newReps, rep, sizeof(newReps)); +    ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); +    return newReps; +} +  /*-*************************************  *  Match length counter @@ -651,8 +705,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)  #           if STATIC_BMI2                  return _tzcnt_u64(val) >> 3;  #           else -                unsigned long r = 0; -                return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; +                if (val != 0) { +                    unsigned long r; +                    _BitScanForward64(&r, (U64)val); +                    return (unsigned)(r >> 3); +                } else { +                    /* Should not reach this code path */ +                    __assume(0); +                }  #           endif  #       elif defined(__GNUC__) && (__GNUC__ >= 4)              return (__builtin_ctzll((U64)val) >> 3); @@ -669,8 +729,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)  #       endif          } else { /* 32 bits */  #       if defined(_MSC_VER) -            unsigned long r=0; -            return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; +            if (val != 0) { +                unsigned long r; +                _BitScanForward(&r, (U32)val); +                return (unsigned)(r >> 3); +            } else { +                /* Should not reach this code path */ +                __assume(0); +            }  #       elif defined(__GNUC__) && (__GNUC__ >= 3)              return (__builtin_ctz((U32)val) >> 3);  #       else @@ -687,8 +753,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)  #           if STATIC_BMI2  			    return _lzcnt_u64(val) >> 3;  #           else -			    unsigned long r = 0; -			    return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0; +                if (val != 0) { +                    unsigned long r; +                    _BitScanReverse64(&r, (U64)val); +                    return (unsigned)(r >> 3); +                } else { +                    /* Should not reach this code path */ +                    __assume(0); +                }  #           endif  #       elif defined(__GNUC__) && (__GNUC__ >= 4)              return (__builtin_clzll(val) >> 3); @@ -702,8 +774,14 @@ static unsigned ZSTD_NbCommonBytes (size_t val)  #       endif          } else { /* 32 bits */  #       if defined(_MSC_VER) -            unsigned long r = 0; -            return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; +            if (val != 0) { +                unsigned long r; +                _BitScanReverse(&r, (unsigned long)val); +                return (unsigned)(r >> 3); +            } else { +                /* Should not reach this code path */ +                __assume(0); +            }  #       elif defined(__GNUC__) && (__GNUC__ >= 3)              return (__builtin_clz((U32)val) >> 3);  #       else @@ -884,9 +962,9 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)  MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)  { -    return window.dictLimit == 1 && -           window.lowLimit == 1 && -           (window.nextSrc - window.base) == 1; +    return window.dictLimit == ZSTD_WINDOW_START_INDEX && +           window.lowLimit == ZSTD_WINDOW_START_INDEX && +           (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;  }  /** @@ -937,7 +1015,9 @@ MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,  {      U32 const cycleSize = 1u << cycleLog;      U32 const curr = (U32)((BYTE const*)src - window.base); -    U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize); +    U32 const minIndexToOverflowCorrect = cycleSize +                                        + MAX(maxDist, cycleSize) +                                        + ZSTD_WINDOW_START_INDEX;      /* Adjust the min index to backoff the overflow correction frequency,       * so we don't waste too much CPU in overflow correction. If this @@ -1012,10 +1092,14 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,      U32 const cycleSize = 1u << cycleLog;      U32 const cycleMask = cycleSize - 1;      U32 const curr = (U32)((BYTE const*)src - window->base); -    U32 const currentCycle0 = curr & cycleMask; -    /* Exclude zero so that newCurrent - maxDist >= 1. */ -    U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0; -    U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize); +    U32 const currentCycle = curr & cycleMask; +    /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ +    U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX +                                     ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) +                                     : 0; +    U32 const newCurrent = currentCycle +                         + currentCycleCorrection +                         + MAX(maxDist, cycleSize);      U32 const correction = curr - newCurrent;      /* maxDist must be a power of two so that:       *   (newCurrent & cycleMask) == (curr & cycleMask) @@ -1031,14 +1115,20 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,      window->base += correction;      window->dictBase += correction; -    if (window->lowLimit <= correction) window->lowLimit = 1; -    else window->lowLimit -= correction; -    if (window->dictLimit <= correction) window->dictLimit = 1; -    else window->dictLimit -= correction; +    if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) { +        window->lowLimit = ZSTD_WINDOW_START_INDEX; +    } else { +        window->lowLimit -= correction; +    } +    if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) { +        window->dictLimit = ZSTD_WINDOW_START_INDEX; +    } else { +        window->dictLimit -= correction; +    }      /* Ensure we can still reference the full window. */      assert(newCurrent >= maxDist); -    assert(newCurrent - maxDist >= 1); +    assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);      /* Ensure that lowLimit and dictLimit didn't underflow. */      assert(window->lowLimit <= newCurrent);      assert(window->dictLimit <= newCurrent); @@ -1149,11 +1239,12 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,  MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {      ZSTD_memset(window, 0, sizeof(*window)); -    window->base = (BYTE const*)""; -    window->dictBase = (BYTE const*)""; -    window->dictLimit = 1;    /* start from 1, so that 1st position is valid */ -    window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */ -    window->nextSrc = window->base + 1;   /* see issue #1241 */ +    window->base = (BYTE const*)" "; +    window->dictBase = (BYTE const*)" "; +    ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */ +    window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */ +    window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */ +    window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */      window->nbOverflowCorrections = 0;  } @@ -1206,15 +1297,15 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,   */  MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)  { -    U32    const maxDistance = 1U << windowLog; -    U32    const lowestValid = ms->window.lowLimit; -    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; -    U32    const isDictionary = (ms->loadedDictEnd != 0); +    U32 const maxDistance = 1U << windowLog; +    U32 const lowestValid = ms->window.lowLimit; +    U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; +    U32 const isDictionary = (ms->loadedDictEnd != 0);      /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary       * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't       * valid for the entire block. So this check is sufficient to find the lowest valid match index.       */ -    U32    const matchLowest = isDictionary ? lowestValid : withinWindow; +    U32 const matchLowest = isDictionary ? lowestValid : withinWindow;      return matchLowest;  }  |