diff options
Diffstat (limited to 'thirdparty/zstd/compress/zstd_ldm.c')
-rw-r--r-- | thirdparty/zstd/compress/zstd_ldm.c | 77 |
1 files changed, 59 insertions, 18 deletions
diff --git a/thirdparty/zstd/compress/zstd_ldm.c b/thirdparty/zstd/compress/zstd_ldm.c index 8c47948358..3f3d7c46ab 100644 --- a/thirdparty/zstd/compress/zstd_ldm.c +++ b/thirdparty/zstd/compress/zstd_ldm.c @@ -27,13 +27,6 @@ void ZSTD_ldm_adjustParameters(ldmParams_t* params, DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; - if (cParams->strategy >= ZSTD_btopt) { - /* Get out of the way of the optimal parser */ - U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); - assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); - assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); - params->minMatchLength = minMatch; - } if (params->hashLog == 0) { params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); assert(params->hashLog <= ZSTD_HASHLOG_MAX); @@ -150,10 +143,10 @@ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ static size_t ZSTD_ldm_countBackwardsMatch( const BYTE* pIn, const BYTE* pAnchor, - const BYTE* pMatch, const BYTE* pBase) + const BYTE* pMatch, const BYTE* pMatchBase) { size_t matchLength = 0; - while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { + while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) { pIn--; pMatch--; matchLength++; @@ -161,6 +154,27 @@ static size_t ZSTD_ldm_countBackwardsMatch( return matchLength; } +/** ZSTD_ldm_countBackwardsMatch_2segments() : + * Returns the number of bytes that match backwards from pMatch, + * even with the backwards match spanning 2 different segments. + * + * On reaching `pMatchBase`, start counting from mEnd */ +static size_t ZSTD_ldm_countBackwardsMatch_2segments( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase, + const BYTE* pExtDictStart, const BYTE* pExtDictEnd) +{ + size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase); + if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) { + /* If backwards match is entirely in the extDict or prefix, immediately return */ + return matchLength; + } + DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength); + matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart); + DEBUGLOG(7, "final backwards match length = %zu", matchLength); + return matchLength; +} + /** ZSTD_ldm_fillFastTables() : * * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. @@ -246,10 +260,10 @@ void ZSTD_ldm_fillHashTable( * (after a long match, only update tables a limited amount). */ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) { - U32 const current = (U32)(anchor - ms->window.base); - if (current > ms->nextToUpdate + 1024) { + U32 const curr = (U32)(anchor - ms->window.base); + if (curr > ms->nextToUpdate + 1024) { ms->nextToUpdate = - current - MIN(512, current - ms->nextToUpdate - 1024); + curr - MIN(512, curr - ms->nextToUpdate - 1024); } } @@ -286,7 +300,7 @@ static size_t ZSTD_ldm_generateSequences_internal( while (ip <= ilimit) { size_t mLength; - U32 const current = (U32)(ip - base); + U32 const curr = (U32)(ip - base); size_t forwardMatchLength = 0, backwardMatchLength = 0; ldmEntry_t* bestEntry = NULL; if (ip != istart) { @@ -336,8 +350,9 @@ static size_t ZSTD_ldm_generateSequences_internal( continue; } curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, - lowMatchPtr); + ZSTD_ldm_countBackwardsMatch_2segments(ip, anchor, + pMatch, lowMatchPtr, + dictStart, dictEnd); curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; } else { /* !extDict */ @@ -365,7 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal( /* No match found -- continue searching */ if (bestEntry == NULL) { ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, - hBits, current, + hBits, curr, *params); ip++; continue; @@ -377,11 +392,11 @@ static size_t ZSTD_ldm_generateSequences_internal( { /* Store the sequence: - * ip = current - backwardMatchLength + * ip = curr - backwardMatchLength * The match is at (bestEntry->offset - backwardMatchLength) */ U32 const matchIndex = bestEntry->offset; - U32 const offset = current - matchIndex; + U32 const offset = curr - matchIndex; rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; /* Out of sequence storage */ @@ -562,6 +577,23 @@ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, return sequence; } +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) @@ -577,6 +609,15 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, BYTE const* ip = istart; DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + /* If using opt parser, use LDMs only as candidates rather than always accepting them */ + if (cParams->strategy >= ZSTD_btopt) { + size_t lastLLSize; + ms->ldmSeqStore = rawSeqStore; + lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); + ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); + return lastLLSize; + } + assert(rawSeqStore->pos <= rawSeqStore->size); assert(rawSeqStore->size <= rawSeqStore->capacity); /* Loop through each sequence and apply the block compressor to the lits */ |