diff options
Diffstat (limited to 'thirdparty/zstd/compress/zstd_lazy.c')
| -rw-r--r-- | thirdparty/zstd/compress/zstd_lazy.c | 45 | 
1 files changed, 34 insertions, 11 deletions
| diff --git a/thirdparty/zstd/compress/zstd_lazy.c b/thirdparty/zstd/compress/zstd_lazy.c index 9ad7e03b54..4cf5c88b53 100644 --- a/thirdparty/zstd/compress/zstd_lazy.c +++ b/thirdparty/zstd/compress/zstd_lazy.c @@ -1,5 +1,5 @@  /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.   * All rights reserved.   *   * This source code is licensed under both the BSD-style license (found in the @@ -660,12 +660,16 @@ ZSTD_compressBlock_lazy_generic(      const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?                                       prefixLowestIndex - (U32)(dictEnd - dictBase) :                                       0; -    const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); +    const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); + +    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);      /* init */      ip += (dictAndPrefixLength == 0);      if (dictMode == ZSTD_noDict) { -        U32 const maxRep = (U32)(ip - prefixLowest); +        U32 const current = (U32)(ip - base); +        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog); +        U32 const maxRep = current - windowLow;          if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;          if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;      } @@ -677,6 +681,12 @@ ZSTD_compressBlock_lazy_generic(      }      /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) +    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the +     * code alignment is perturbed. To fix the instability align the loop on 32-bytes. +     */ +    __asm__(".p2align 5"); +#endif      while (ip < ilimit) {          size_t matchLength=0;          size_t offset=0; @@ -929,11 +939,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(      const BYTE* const ilimit = iend - 8;      const BYTE* const base = ms->window.base;      const U32 dictLimit = ms->window.dictLimit; -    const U32 lowestIndex = ms->window.lowLimit;      const BYTE* const prefixStart = base + dictLimit;      const BYTE* const dictBase = ms->window.dictBase;      const BYTE* const dictEnd  = dictBase + dictLimit; -    const BYTE* const dictStart  = dictBase + lowestIndex; +    const BYTE* const dictStart  = dictBase + ms->window.lowLimit; +    const U32 windowLog = ms->cParams.windowLog;      typedef size_t (*searchMax_f)(                          ZSTD_matchState_t* ms, @@ -942,10 +952,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(      U32 offset_1 = rep[0], offset_2 = rep[1]; +    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); +      /* init */      ip += (ip == prefixStart);      /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) +    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the +     * code alignment is perturbed. To fix the instability align the loop on 32-bytes. +     */ +    __asm__(".p2align 5"); +#endif      while (ip < ilimit) {          size_t matchLength=0;          size_t offset=0; @@ -953,10 +971,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(          U32 current = (U32)(ip-base);          /* check repCode */ -        {   const U32 repIndex = (U32)(current+1 - offset_1); +        {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog); +            const U32 repIndex = (U32)(current+1 - offset_1);              const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;              const BYTE* const repMatch = repBase + repIndex; -            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))   /* intentional overflow */ +            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */              if (MEM_read32(ip+1) == MEM_read32(repMatch)) {                  /* repcode detected we should take it */                  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -983,10 +1002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(              current++;              /* check repCode */              if (offset) { +                const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);                  const U32 repIndex = (U32)(current - offset_1);                  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;                  const BYTE* const repMatch = repBase + repIndex; -                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */ +                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */                  if (MEM_read32(ip) == MEM_read32(repMatch)) {                      /* repcode detected */                      const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1013,10 +1033,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(                  current++;                  /* check repCode */                  if (offset) { +                    const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);                      const U32 repIndex = (U32)(current - offset_1);                      const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;                      const BYTE* const repMatch = repBase + repIndex; -                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */ +                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */                      if (MEM_read32(ip) == MEM_read32(repMatch)) {                          /* repcode detected */                          const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1057,10 +1078,12 @@ _storeSequence:          /* check immediate repcode */          while (ip <= ilimit) { -            const U32 repIndex = (U32)((ip-base) - offset_2); +            const U32 repCurrent = (U32)(ip-base); +            const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); +            const U32 repIndex = repCurrent - offset_2;              const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;              const BYTE* const repMatch = repBase + repIndex; -            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex))  /* intentional overflow */ +            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */              if (MEM_read32(ip) == MEM_read32(repMatch)) {                  /* repcode detected we should take it */                  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; |