diff options
author | Rémi Verschelde <rverschelde@gmail.com> | 2017-08-27 12:05:17 +0200 |
---|---|---|
committer | Rémi Verschelde <rverschelde@gmail.com> | 2017-08-27 12:05:39 +0200 |
commit | c3ab9eb590ca10b3b67dd4124047b6879ec73721 (patch) | |
tree | 2d977b022aa0170d16c8ae999f4a8ea62d55851f /thirdparty/zstd/compress | |
parent | 560fc0f19932a7b51e02e9eda8a64f284f41e6d0 (diff) |
zstd: Update to upstream version 1.3.1
It is now dual-licensed BSD-3-Clause and GPL-2.0, we use the former.
The PATENTS file is no longer applicable \o/
Also add zstd to COPYRIGHT.txt
Diffstat (limited to 'thirdparty/zstd/compress')
-rw-r--r-- | thirdparty/zstd/compress/fse_compress.c | 26 | ||||
-rw-r--r-- | thirdparty/zstd/compress/huf_compress.c | 7 | ||||
-rw-r--r-- | thirdparty/zstd/compress/zstd_compress.c | 618 | ||||
-rw-r--r-- | thirdparty/zstd/compress/zstd_opt.h | 234 | ||||
-rw-r--r-- | thirdparty/zstd/compress/zstdmt_compress.c | 379 | ||||
-rw-r--r-- | thirdparty/zstd/compress/zstdmt_compress.h | 19 |
6 files changed, 673 insertions, 610 deletions
diff --git a/thirdparty/zstd/compress/fse_compress.c b/thirdparty/zstd/compress/fse_compress.c index 26e8052ddc..cc9fa73514 100644 --- a/thirdparty/zstd/compress/fse_compress.c +++ b/thirdparty/zstd/compress/fse_compress.c @@ -33,40 +33,22 @@ ****************************************************************** */ /* ************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include <intrin.h> /* For Visual 2005 */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ -#else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif - - -/* ************************************************************** * Includes ****************************************************************/ #include <stdlib.h> /* malloc, free, qsort */ #include <string.h> /* memcpy, memset */ #include <stdio.h> /* printf (debug) */ #include "bitstream.h" +#include "compiler.h" #define FSE_STATIC_LINKING_ONLY #include "fse.h" +#include "error_private.h" /* ************************************************************** * Error Management ****************************************************************/ +#define FSE_isError ERR_isError #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ @@ -781,7 +763,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } -#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } /* FSE_compress_wksp() : diff --git a/thirdparty/zstd/compress/huf_compress.c b/thirdparty/zstd/compress/huf_compress.c index 7af0789a9c..2a47c18205 100644 --- a/thirdparty/zstd/compress/huf_compress.c +++ b/thirdparty/zstd/compress/huf_compress.c @@ -50,13 +50,15 @@ #include "fse.h" /* header compression */ #define HUF_STATIC_LINKING_ONLY #include "huf.h" +#include "error_private.h" /* ************************************************************** * Error Management ****************************************************************/ +#define HUF_isError ERR_isError #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ -#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } @@ -436,7 +438,7 @@ static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } -#define HUF_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) +#define HUF_FLUSHBITS(s) BIT_flushBits(s) #define HUF_FLUSHBITS_1(stream) \ if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) @@ -451,7 +453,6 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si BYTE* const oend = ostart + dstSize; BYTE* op = ostart; size_t n; - const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize)); BIT_CStream_t bitC; /* init */ diff --git a/thirdparty/zstd/compress/zstd_compress.c b/thirdparty/zstd/compress/zstd_compress.c index 9300357f2d..0322c03eb3 100644 --- a/thirdparty/zstd/compress/zstd_compress.c +++ b/thirdparty/zstd/compress/zstd_compress.c @@ -1,10 +1,10 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). */ @@ -36,13 +36,6 @@ static const U32 g_searchStrength = 8; /* control skip over incompressible dat #define HASH_READ_SIZE 8 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; -/* entropy tables always have same size */ -static size_t const hufCTable_size = HUF_CTABLE_SIZE(255); -static size_t const litlengthCTable_size = FSE_CTABLE_SIZE(LLFSELog, MaxLL); -static size_t const offcodeCTable_size = FSE_CTABLE_SIZE(OffFSELog, MaxOff); -static size_t const matchlengthCTable_size = FSE_CTABLE_SIZE(MLFSELog, MaxML); -static size_t const entropyScratchSpace_size = HUF_WORKSPACE_SIZE; - /*-************************************* * Helper functions @@ -89,8 +82,6 @@ struct ZSTD_CCtx_s { U32 loadedDictEnd; /* index of end of dictionary */ U32 forceWindow; /* force back-references to respect limit of 1<<wLog, even for dictionary */ ZSTD_compressionStage_e stage; - U32 rep[ZSTD_REP_NUM]; - U32 repToConfirm[ZSTD_REP_NUM]; U32 dictID; int compressionLevel; ZSTD_parameters requestedParams; @@ -105,16 +96,11 @@ struct ZSTD_CCtx_s { size_t staticSize; seqStore_t seqStore; /* sequences storage ptrs */ + optState_t optState; U32* hashTable; U32* hashTable3; U32* chainTable; - HUF_repeat hufCTable_repeatMode; - HUF_CElt* hufCTable; - U32 fseCTables_ready; - FSE_CTable* offcodeCTable; - FSE_CTable* matchlengthCTable; - FSE_CTable* litlengthCTable; - unsigned* entropyScratchSpace; + ZSTD_entropyCTables_t* entropy; /* streaming */ char* inBuff; @@ -174,19 +160,9 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx); /* entropy space (never moves) */ - /* note : this code should be shared with resetCCtx, rather than copy/pasted */ - { void* ptr = cctx->workSpace; - cctx->hufCTable = (HUF_CElt*)ptr; - ptr = (char*)cctx->hufCTable + hufCTable_size; - cctx->offcodeCTable = (FSE_CTable*) ptr; - ptr = (char*)ptr + offcodeCTable_size; - cctx->matchlengthCTable = (FSE_CTable*) ptr; - ptr = (char*)ptr + matchlengthCTable_size; - cctx->litlengthCTable = (FSE_CTable*) ptr; - ptr = (char*)ptr + litlengthCTable_size; - assert(((size_t)ptr & 3) == 0); /* ensure correct alignment */ - cctx->entropyScratchSpace = (unsigned*) ptr; - } + if (cctx->workSpaceSize < sizeof(ZSTD_entropyCTables_t)) return NULL; + assert(((size_t)cctx->workSpace & 7) == 0); /* ensure correct alignment */ + cctx->entropy = (ZSTD_entropyCTables_t*)cctx->workSpace; return cctx; } @@ -237,7 +213,7 @@ size_t ZSTD_setCCtxParameter(ZSTD_CCtx* cctx, ZSTD_CCtxParameter param, unsigned ZSTD_STATIC_ASSERT(ZSTD_dm_auto==0); ZSTD_STATIC_ASSERT(ZSTD_dm_rawContent==1); case ZSTD_p_forceRawDict : cctx->dictMode = (ZSTD_dictMode_e)(value>0); return 0; - default: return ERROR(parameter_unknown); + default: return ERROR(parameter_unsupported); } } @@ -251,9 +227,9 @@ static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx) cctx->compressionLevel = ZSTD_CLEVEL_CUSTOM; } -#define CLAMPCHECK(val,min,max) { \ - if (((val)<(min)) | ((val)>(max))) { \ - return ERROR(compressionParameter_outOfBound); \ +#define CLAMPCHECK(val,min,max) { \ + if (((val)<(min)) | ((val)>(max))) { \ + return ERROR(parameter_outOfBound); \ } } size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value) @@ -349,7 +325,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v /* restrict dictionary mode, to "rawContent" or "fullDict" only */ ZSTD_STATIC_ASSERT((U32)ZSTD_dm_fullDict > (U32)ZSTD_dm_rawContent); if (value > (unsigned)ZSTD_dm_fullDict) - return ERROR(compressionParameter_outOfBound); + return ERROR(parameter_outOfBound); cctx->dictMode = (ZSTD_dictMode_e)value; return 0; @@ -370,31 +346,31 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v if (value==0) return 0; DEBUGLOG(5, " setting nbThreads : %u", value); #ifndef ZSTD_MULTITHREAD - if (value > 1) return ERROR(compressionParameter_unsupported); + if (value > 1) return ERROR(parameter_unsupported); #endif if ((value>1) && (cctx->nbThreads != value)) { if (cctx->staticSize) /* MT not compatible with static alloc */ - return ERROR(compressionParameter_unsupported); + return ERROR(parameter_unsupported); ZSTDMT_freeCCtx(cctx->mtctx); cctx->nbThreads = 1; - cctx->mtctx = ZSTDMT_createCCtx(value); + cctx->mtctx = ZSTDMT_createCCtx_advanced(value, cctx->customMem); if (cctx->mtctx == NULL) return ERROR(memory_allocation); } cctx->nbThreads = value; return 0; case ZSTD_p_jobSize: - if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported); + if (cctx->nbThreads <= 1) return ERROR(parameter_unsupported); assert(cctx->mtctx != NULL); return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_sectionSize, value); case ZSTD_p_overlapSizeLog: DEBUGLOG(5, " setting overlap with nbThreads == %u", cctx->nbThreads); - if (cctx->nbThreads <= 1) return ERROR(compressionParameter_unsupported); + if (cctx->nbThreads <= 1) return ERROR(parameter_unsupported); assert(cctx->mtctx != NULL); return ZSTDMT_setMTCtxParameter(cctx->mtctx, ZSTDMT_p_overlapSectionLog, value); - default: return ERROR(parameter_unknown); + default: return ERROR(parameter_unsupported); } } @@ -474,7 +450,8 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) return ERROR(compressionParameter_unsupported); + if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) + return ERROR(parameter_unsupported); return 0; } @@ -551,9 +528,7 @@ size_t ZSTD_estimateCCtxSize_advanced(ZSTD_compressionParameters cParams) size_t const hSize = ((size_t)1) << cParams.hashLog; U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog); size_t const h3Size = ((size_t)1) << hashLog3; - size_t const entropySpace = hufCTable_size + litlengthCTable_size - + offcodeCTable_size + matchlengthCTable_size - + entropyScratchSpace_size; + size_t const entropySpace = sizeof(ZSTD_entropyCTables_t); size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); size_t const optBudget = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32) @@ -620,8 +595,8 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 ple cctx->stage = ZSTDcs_init; cctx->dictID = 0; cctx->loadedDictEnd = 0; - { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = repStartValue[i]; } - cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */ + { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->seqStore.rep[i] = repStartValue[i]; } + cctx->optState.litLengthSum = 0; /* force reset of btopt stats */ XXH64_reset(&cctx->xxhState, 0); return 0; } @@ -641,8 +616,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, if (crp == ZSTDcrp_continue) { if (ZSTD_equivalentParams(params.cParams, zc->appliedParams.cParams)) { DEBUGLOG(5, "ZSTD_equivalentParams()==1"); - zc->fseCTables_ready = 0; - zc->hufCTable_repeatMode = HUF_repeat_none; + zc->entropy->hufCTable_repeatMode = HUF_repeat_none; + zc->entropy->offcode_repeatMode = FSE_repeat_none; + zc->entropy->matchlength_repeatMode = FSE_repeat_none; + zc->entropy->litlength_repeatMode = FSE_repeat_none; return ZSTD_continueCCtx(zc, params, pledgedSrcSize); } } @@ -662,9 +639,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, void* ptr; /* Check if workSpace is large enough, alloc a new one if needed */ - { size_t const entropySpace = hufCTable_size + litlengthCTable_size - + offcodeCTable_size + matchlengthCTable_size - + entropyScratchSpace_size; + { size_t const entropySpace = sizeof(ZSTD_entropyCTables_t); size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32) + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t)); size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt) @@ -689,16 +664,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ptr = zc->workSpace; /* entropy space */ - zc->hufCTable = (HUF_CElt*)ptr; - ptr = (char*)zc->hufCTable + hufCTable_size; /* note : HUF_CElt* is incomplete type, size is estimated via macro */ - zc->offcodeCTable = (FSE_CTable*) ptr; - ptr = (char*)ptr + offcodeCTable_size; - zc->matchlengthCTable = (FSE_CTable*) ptr; - ptr = (char*)ptr + matchlengthCTable_size; - zc->litlengthCTable = (FSE_CTable*) ptr; - ptr = (char*)ptr + litlengthCTable_size; - assert(((size_t)ptr & 3) == 0); /* ensure correct alignment */ - zc->entropyScratchSpace = (unsigned*) ptr; + assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */ + assert(zc->workSpaceSize >= sizeof(ZSTD_entropyCTables_t)); + zc->entropy = (ZSTD_entropyCTables_t*)zc->workSpace; } } /* init params */ @@ -715,39 +683,35 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->stage = ZSTDcs_init; zc->dictID = 0; zc->loadedDictEnd = 0; - zc->fseCTables_ready = 0; - zc->hufCTable_repeatMode = HUF_repeat_none; + zc->entropy->hufCTable_repeatMode = HUF_repeat_none; + zc->entropy->offcode_repeatMode = FSE_repeat_none; + zc->entropy->matchlength_repeatMode = FSE_repeat_none; + zc->entropy->litlength_repeatMode = FSE_repeat_none; zc->nextToUpdate = 1; zc->nextSrc = NULL; zc->base = NULL; zc->dictBase = NULL; zc->dictLimit = 0; zc->lowLimit = 0; - { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; } + { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->seqStore.rep[i] = repStartValue[i]; } zc->hashLog3 = hashLog3; - zc->seqStore.litLengthSum = 0; + zc->optState.litLengthSum = 0; - /* ensure entropy tables are close together at the beginning */ - assert((void*)zc->hufCTable == zc->workSpace); - assert((char*)zc->offcodeCTable == (char*)zc->hufCTable + hufCTable_size); - assert((char*)zc->matchlengthCTable == (char*)zc->offcodeCTable + offcodeCTable_size); - assert((char*)zc->litlengthCTable == (char*)zc->matchlengthCTable + matchlengthCTable_size); - assert((char*)zc->entropyScratchSpace == (char*)zc->litlengthCTable + litlengthCTable_size); - ptr = (char*)zc->entropyScratchSpace + entropyScratchSpace_size; + ptr = zc->entropy + 1; /* opt parser space */ if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) { DEBUGLOG(5, "reserving optimal parser space"); assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ - zc->seqStore.litFreq = (U32*)ptr; - zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits); - zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1); - zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1); - ptr = zc->seqStore.offCodeFreq + (MaxOff+1); - zc->seqStore.matchTable = (ZSTD_match_t*)ptr; - ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1; - zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr; - ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1; + zc->optState.litFreq = (U32*)ptr; + zc->optState.litLengthFreq = zc->optState.litFreq + (1<<Litbits); + zc->optState.matchLengthFreq = zc->optState.litLengthFreq + (MaxLL+1); + zc->optState.offCodeFreq = zc->optState.matchLengthFreq + (MaxML+1); + ptr = zc->optState.offCodeFreq + (MaxOff+1); + zc->optState.matchTable = (ZSTD_match_t*)ptr; + ptr = zc->optState.matchTable + ZSTD_OPT_NUM+1; + zc->optState.priceTable = (ZSTD_optimal_t*)ptr; + ptr = zc->optState.priceTable + ZSTD_OPT_NUM+1; } /* table Space */ @@ -783,7 +747,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, * do not use with extDict variant ! */ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { int i; - for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = 0; + for (i=0; i<ZSTD_REP_NUM; i++) cctx->seqStore.rep[i] = 0; } @@ -830,16 +794,7 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, dstCCtx->dictID = srcCCtx->dictID; /* copy entropy tables */ - dstCCtx->fseCTables_ready = srcCCtx->fseCTables_ready; - if (srcCCtx->fseCTables_ready) { - memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, litlengthCTable_size); - memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, matchlengthCTable_size); - memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, offcodeCTable_size); - } - dstCCtx->hufCTable_repeatMode = srcCCtx->hufCTable_repeatMode; - if (srcCCtx->hufCTable_repeatMode) { - memcpy(dstCCtx->hufCTable, srcCCtx->hufCTable, hufCTable_size); - } + memcpy(dstCCtx->entropy, srcCCtx->entropy, sizeof(ZSTD_entropyCTables_t)); return 0; } @@ -956,7 +911,8 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } -static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, +static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy, + ZSTD_strategy strategy, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { @@ -970,28 +926,28 @@ static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc, /* small ? don't even attempt compression (speed opt) */ # define LITERAL_NOENTROPY 63 - { size_t const minLitSize = zc->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY; + { size_t const minLitSize = entropy->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY; if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ - { HUF_repeat repeat = zc->hufCTable_repeatMode; - int const preferRepeat = zc->appliedParams.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + { HUF_repeat repeat = entropy->hufCTable_repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat) + entropy->workspace, sizeof(entropy->workspace), (HUF_CElt*)entropy->hufCTable, &repeat, preferRepeat) : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - zc->entropyScratchSpace, entropyScratchSpace_size, zc->hufCTable, &repeat, preferRepeat); + entropy->workspace, sizeof(entropy->workspace), (HUF_CElt*)entropy->hufCTable, &repeat, preferRepeat); if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */ - else { zc->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */ + else { entropy->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */ } - if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) { - zc->hufCTable_repeatMode = HUF_repeat_none; + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + entropy->hufCTable_repeatMode = HUF_repeat_none; return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); } if (cLitSize==1) { - zc->hufCTable_repeatMode = HUF_repeat_none; + entropy->hufCTable_repeatMode = HUF_repeat_none; return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); } @@ -1062,17 +1018,154 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } -MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - size_t srcSize) +MEM_STATIC symbolEncodingType_e ZSTD_selectEncodingType(FSE_repeat* repeatMode, + size_t const mostFrequent, size_t nbSeq, U32 defaultNormLog) +{ +#define MIN_SEQ_FOR_DYNAMIC_FSE 64 +#define MAX_SEQ_FOR_STATIC_FSE 1000 + + if ((mostFrequent == nbSeq) && (nbSeq > 2)) { + *repeatMode = FSE_repeat_check; + return set_rle; + } + if ((*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { + return set_repeat; + } + if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) { + *repeatMode = FSE_repeat_valid; + return set_basic; + } + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +MEM_STATIC size_t ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* CTable, U32 FSELog, symbolEncodingType_e type, + U32* count, U32 max, + BYTE const* codeTable, size_t nbSeq, + S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t workspaceSize) +{ + BYTE* op = (BYTE*)dst; + BYTE const* const oend = op + dstCapacity; + + switch (type) { + case set_rle: + *op = codeTable[0]; + CHECK_F(FSE_buildCTable_rle(CTable, (BYTE)max)); + return 1; + case set_repeat: + return 0; + case set_basic: + CHECK_F(FSE_buildCTable_wksp(CTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); + return 0; + case set_compressed: { + S16 norm[MaxSeq + 1]; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ + if (FSE_isError(NCountSize)) return NCountSize; + CHECK_F(FSE_buildCTable_wksp(CTable, norm, max, tableLog, workspace, workspaceSize)); + return NCountSize; + } + } + default: return assert(0), ERROR(GENERIC); + } +} + +MEM_STATIC size_t ZSTD_encodeSequences(void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */ + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ + BYTE const llCode = llCodeTable[n]; + BYTE const ofCode = ofCodeTable[n]; + BYTE const mlCode = mlCodeTable[n]; + U32 const llBits = LL_bits[llCode]; + U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ + U32 const mlBits = ML_bits[mlCode]; + /* (7)*/ /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ + FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ + if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ + if (longOffsets) { + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + } } + + FSE_flushCState(&blockStream, &stateMatchLength); + FSE_flushCState(&blockStream, &stateOffsetBits); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ + return streamSize; + } +} + +MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, + ZSTD_entropyCTables_t* entropy, + ZSTD_compressionParameters const* cParams, + void* dst, size_t dstCapacity) { - const int longOffsets = zc->appliedParams.cParams.windowLog > STREAM_ACCUMULATOR_MIN; - const seqStore_t* seqStorePtr = &(zc->seqStore); + const int longOffsets = cParams->windowLog > STREAM_ACCUMULATOR_MIN; U32 count[MaxSeq+1]; - S16 norm[MaxSeq+1]; - FSE_CTable* CTable_LitLength = zc->litlengthCTable; - FSE_CTable* CTable_OffsetBits = zc->offcodeCTable; - FSE_CTable* CTable_MatchLength = zc->matchlengthCTable; + FSE_CTable* CTable_LitLength = entropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = entropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = entropy->matchlengthCTable; U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ const seqDef* const sequences = seqStorePtr->sequencesStart; const BYTE* const ofCodeTable = seqStorePtr->ofCode; @@ -1083,13 +1176,16 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc, BYTE* op = ostart; size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; BYTE* seqHead; - BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)]; + + ZSTD_STATIC_ASSERT(sizeof(entropy->workspace) >= (1<<MAX(MLFSELog,LLFSELog))); /* Compress literals */ { const BYTE* const literals = seqStorePtr->litStart; size_t const litSize = seqStorePtr->lit - literals; - size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); - if (ZSTD_isError(cSize)) return cSize; + size_t const cSize = ZSTD_compressLiterals( + entropy, cParams->strategy, op, dstCapacity, literals, litSize); + if (ZSTD_isError(cSize)) + return cSize; op += cSize; } @@ -1098,177 +1194,89 @@ MEM_STATIC size_t ZSTD_compressSequences (ZSTD_CCtx* zc, if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq; else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; - if (nbSeq==0) goto _check_compressibility; + if (nbSeq==0) return op - ostart; /* seqHead : flags for FSE encoding type */ seqHead = op++; -#define MIN_SEQ_FOR_DYNAMIC_FSE 64 -#define MAX_SEQ_FOR_STATIC_FSE 1000 - /* convert length/distances into codes */ ZSTD_seqToCodes(seqStorePtr); - /* CTable for Literal Lengths */ { U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->entropyScratchSpace); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = llCodeTable[0]; - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = set_rle; - } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - LLtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { - FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); - LLtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return NCountSize; - op += NCountSize; } - FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); - LLtype = set_compressed; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace); + LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog); + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + entropy->workspace, sizeof(entropy->workspace)); + if (ZSTD_isError(countSize)) return countSize; + op += countSize; } } - /* CTable for Offsets */ { U32 max = MaxOff; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->entropyScratchSpace); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = ofCodeTable[0]; - FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); - Offtype = set_rle; - } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - Offtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { - FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); - Offtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return NCountSize; - op += NCountSize; } - FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); - Offtype = set_compressed; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace); + Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog); + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, MaxOff, + entropy->workspace, sizeof(entropy->workspace)); + if (ZSTD_isError(countSize)) return countSize; + op += countSize; } } - /* CTable for MatchLengths */ { U32 max = MaxML; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->entropyScratchSpace); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *mlCodeTable; - FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); - MLtype = set_rle; - } else if ((zc->fseCTables_ready) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - MLtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { - FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); - MLtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); - if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) return NCountSize; - op += NCountSize; } - FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); - MLtype = set_compressed; + size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace); + MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog); + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + entropy->workspace, sizeof(entropy->workspace)); + if (ZSTD_isError(countSize)) return countSize; + op += countSize; } } *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - zc->fseCTables_ready = 0; - - /* Encoding Sequences */ - { BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - - CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */ - - /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); - FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - if (longOffsets) { - U32 const ofBits = ofCodeTable[nbSeq-1]; - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); - BIT_flushBits(&blockStream); - } - BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, - ofBits - extraBits); - } else { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); - } - BIT_flushBits(&blockStream); - - { size_t n; - for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ - BYTE const llCode = llCodeTable[n]; - BYTE const ofCode = ofCodeTable[n]; - BYTE const mlCode = mlCodeTable[n]; - U32 const llBits = LL_bits[llCode]; - U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ - U32 const mlBits = ML_bits[mlCode]; - /* (7)*/ /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ - FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ - if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ - if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) - BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].litLength, llBits); - if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); - if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); - BIT_flushBits(&blockStream); /* (7)*/ - } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, - ofBits - extraBits); /* 31 */ - } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ - } - BIT_flushBits(&blockStream); /* (7)*/ - } } - FSE_flushCState(&blockStream, &stateMatchLength); - FSE_flushCState(&blockStream, &stateOffsetBits); - FSE_flushCState(&blockStream, &stateLitLength); + { size_t const streamSize = ZSTD_encodeSequences(op, oend - op, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + if (ZSTD_isError(streamSize)) return streamSize; + op += streamSize; + } - { size_t const streamSize = BIT_closeCStream(&blockStream); - if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ - op += streamSize; - } } + return op - ostart; +} - /* check compressibility */ -_check_compressibility: - { size_t const minGain = ZSTD_minGain(srcSize); - size_t const maxCSize = srcSize - minGain; - if ((size_t)(op-ostart) >= maxCSize) { - zc->hufCTable_repeatMode = HUF_repeat_none; - return 0; - } } +MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr, + ZSTD_entropyCTables_t* entropy, + ZSTD_compressionParameters const* cParams, + void* dst, size_t dstCapacity, + size_t srcSize) +{ + size_t const cSize = ZSTD_compressSequences_internal(seqStorePtr, entropy, cParams, + dst, dstCapacity); + size_t const minGain = ZSTD_minGain(srcSize); + size_t const maxCSize = srcSize - minGain; + /* If the srcSize <= dstCapacity, then there is enough space to write a + * raw uncompressed block. Since we ran out of space, the block must not + * be compressible, so fall back to a raw uncompressed block. + */ + int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity; + + if (ZSTD_isError(cSize) && !uncompressibleError) + return cSize; + /* Check compressibility */ + if (cSize >= maxCSize || uncompressibleError) { + entropy->hufCTable_repeatMode = HUF_repeat_none; + entropy->offcode_repeatMode = FSE_repeat_none; + entropy->matchlength_repeatMode = FSE_repeat_none; + entropy->litlength_repeatMode = FSE_repeat_none; + return 0; + } + assert(!ZSTD_isError(cSize)); /* confirm repcodes */ - { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->repToConfirm[i]; } - - return op - ostart; + { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->rep[i] = seqStorePtr->repToConfirm[i]; } + return cSize; } @@ -1475,7 +1483,7 @@ static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls) } -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const U32 mls) @@ -1491,7 +1499,7 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, const BYTE* const lowest = base + lowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1]; + U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; U32 offsetSaved = 0; /* init */ @@ -1552,8 +1560,8 @@ void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx, } } } /* save reps for next block */ - cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; - cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; + seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; + seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -1601,7 +1609,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; + U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ @@ -1667,7 +1675,7 @@ static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx, } } } /* save reps for next block */ - ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; + seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -1718,7 +1726,7 @@ static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U3 } -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const U32 mls) @@ -1736,7 +1744,7 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, const BYTE* const lowest = base + lowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1]; + U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; U32 offsetSaved = 0; /* init */ @@ -1823,8 +1831,8 @@ void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx, } } } /* save reps for next block */ - cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; - cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; + seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; + seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -1873,7 +1881,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1]; + U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1]; /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ @@ -1973,7 +1981,7 @@ static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx, } } } /* save reps for next block */ - ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; + seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -2276,7 +2284,7 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict ( /* Update chains up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) { U32* const hashTable = zc->hashTable; @@ -2300,7 +2308,7 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls) /* inlining is important to hardwire a hot branch (template emulation) */ -FORCE_INLINE +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_generic ( ZSTD_CCtx* zc, /* Index table will be updated */ const BYTE* const ip, const BYTE* const iLimit, @@ -2352,7 +2360,7 @@ size_t ZSTD_HcFindBestMatch_generic ( } -FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( ZSTD_CCtx* zc, const BYTE* ip, const BYTE* const iLimit, size_t* offsetPtr, @@ -2369,7 +2377,7 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS ( } -FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( ZSTD_CCtx* zc, const BYTE* ip, const BYTE* const iLimit, size_t* offsetPtr, @@ -2389,7 +2397,7 @@ FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( /* ******************************* * Common parser - lazy strategy *********************************/ -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 searchMethod, const U32 depth) @@ -2409,7 +2417,7 @@ void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx, size_t* offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0; + U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0; /* init */ ip += (ip==base); @@ -2519,8 +2527,8 @@ _storeSequence: } } /* Save reps for next block */ - ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; - ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; + seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; + seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -2551,7 +2559,7 @@ static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t sr } -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const U32 searchMethod, const U32 depth) @@ -2578,7 +2586,7 @@ void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx, U32 maxNbAttempts, U32 matchLengthSearch); searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1]; + U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1]; /* init */ ctx->nextToUpdate3 = ctx->nextToUpdate; @@ -2714,7 +2722,7 @@ _storeSequence: } } /* Save reps for next block */ - ctx->repToConfirm[0] = offset_1; ctx->repToConfirm[1] = offset_2; + seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2; /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -2823,7 +2831,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCa if (current > zc->nextToUpdate + 384) zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* limited update after finding a very long match */ blockCompressor(zc, src, srcSize); - return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); + return ZSTD_compressSequences(&zc->seqStore, zc->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize); } @@ -3000,7 +3008,6 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, return fhSize; } - size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) @@ -3106,13 +3113,14 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t const BYTE* const dictEnd = dictPtr + dictSize; short offcodeNCount[MaxOff+1]; unsigned offcodeMaxValue = MaxOff; - BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)]; + + ZSTD_STATIC_ASSERT(sizeof(cctx->entropy->workspace) >= (1<<MAX(MLFSELog,LLFSELog))); dictPtr += 4; /* skip magic number */ cctx->dictID = cctx->appliedParams.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); dictPtr += 4; - { size_t const hufHeaderSize = HUF_readCTable(cctx->hufCTable, 255, dictPtr, dictEnd-dictPtr); + { size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)cctx->entropy->hufCTable, 255, dictPtr, dictEnd-dictPtr); if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); dictPtr += hufHeaderSize; } @@ -3122,7 +3130,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ - CHECK_E( FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)), + CHECK_E( FSE_buildCTable_wksp(cctx->entropy->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)), dictionary_corrupted); dictPtr += offcodeHeaderSize; } @@ -3134,7 +3142,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); /* Every match length code must have non-zero probability */ CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); - CHECK_E( FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)), + CHECK_E( FSE_buildCTable_wksp(cctx->entropy->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)), dictionary_corrupted); dictPtr += matchlengthHeaderSize; } @@ -3146,15 +3154,15 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); /* Every literal length code must have non-zero probability */ CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); - CHECK_E( FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)), + CHECK_E( FSE_buildCTable_wksp(cctx->entropy->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)), dictionary_corrupted); dictPtr += litlengthHeaderSize; } if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted); - cctx->rep[0] = MEM_readLE32(dictPtr+0); - cctx->rep[1] = MEM_readLE32(dictPtr+4); - cctx->rep[2] = MEM_readLE32(dictPtr+8); + cctx->seqStore.rep[0] = MEM_readLE32(dictPtr+0); + cctx->seqStore.rep[1] = MEM_readLE32(dictPtr+4); + cctx->seqStore.rep[2] = MEM_readLE32(dictPtr+8); dictPtr += 12; { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); @@ -3168,12 +3176,14 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t /* All repCodes must be <= dictContentSize and != 0*/ { U32 u; for (u=0; u<3; u++) { - if (cctx->rep[u] == 0) return ERROR(dictionary_corrupted); - if (cctx->rep[u] > dictContentSize) return ERROR(dictionary_corrupted); + if (cctx->seqStore.rep[u] == 0) return ERROR(dictionary_corrupted); + if (cctx->seqStore.rep[u] > dictContentSize) return ERROR(dictionary_corrupted); } } - cctx->fseCTables_ready = 1; - cctx->hufCTable_repeatMode = HUF_repeat_valid; + cctx->entropy->hufCTable_repeatMode = HUF_repeat_valid; + cctx->entropy->offcode_repeatMode = FSE_repeat_valid; + cctx->entropy->matchlength_repeatMode = FSE_repeat_valid; + cctx->entropy->litlength_repeatMode = FSE_repeat_valid; return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize); } } diff --git a/thirdparty/zstd/compress/zstd_opt.h b/thirdparty/zstd/compress/zstd_opt.h index e8e98915ea..ae24732c7d 100644 --- a/thirdparty/zstd/compress/zstd_opt.h +++ b/thirdparty/zstd/compress/zstd_opt.h @@ -1,10 +1,10 @@ -/** - * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). */ @@ -22,173 +22,173 @@ /*-************************************* * Price functions for optimal parser ***************************************/ -FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr) +static void ZSTD_setLog2Prices(optState_t* optPtr) { - ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum+1); - ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum+1); - ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum+1); - ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1); - ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum)); + optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1); + optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1); + optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1); + optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1); + optPtr->factor = 1 + ((optPtr->litSum>>5) / optPtr->litLengthSum) + ((optPtr->litSum<<1) / (optPtr->litSum + optPtr->matchSum)); } -MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize) +static void ZSTD_rescaleFreqs(optState_t* optPtr, const BYTE* src, size_t srcSize) { unsigned u; - ssPtr->cachedLiterals = NULL; - ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; - ssPtr->staticPrices = 0; + optPtr->cachedLiterals = NULL; + optPtr->cachedPrice = optPtr->cachedLitLength = 0; + optPtr->staticPrices = 0; - if (ssPtr->litLengthSum == 0) { - if (srcSize <= 1024) ssPtr->staticPrices = 1; + if (optPtr->litLengthSum == 0) { + if (srcSize <= 1024) optPtr->staticPrices = 1; - assert(ssPtr->litFreq!=NULL); + assert(optPtr->litFreq!=NULL); for (u=0; u<=MaxLit; u++) - ssPtr->litFreq[u] = 0; + optPtr->litFreq[u] = 0; for (u=0; u<srcSize; u++) - ssPtr->litFreq[src[u]]++; + optPtr->litFreq[src[u]]++; - ssPtr->litSum = 0; - ssPtr->litLengthSum = MaxLL+1; - ssPtr->matchLengthSum = MaxML+1; - ssPtr->offCodeSum = (MaxOff+1); - ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits); + optPtr->litSum = 0; + optPtr->litLengthSum = MaxLL+1; + optPtr->matchLengthSum = MaxML+1; + optPtr->offCodeSum = (MaxOff+1); + optPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits); for (u=0; u<=MaxLit; u++) { - ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->litSum += ssPtr->litFreq[u]; + optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>ZSTD_FREQ_DIV); + optPtr->litSum += optPtr->litFreq[u]; } for (u=0; u<=MaxLL; u++) - ssPtr->litLengthFreq[u] = 1; + optPtr->litLengthFreq[u] = 1; for (u=0; u<=MaxML; u++) - ssPtr->matchLengthFreq[u] = 1; + optPtr->matchLengthFreq[u] = 1; for (u=0; u<=MaxOff; u++) - ssPtr->offCodeFreq[u] = 1; + optPtr->offCodeFreq[u] = 1; } else { - ssPtr->matchLengthSum = 0; - ssPtr->litLengthSum = 0; - ssPtr->offCodeSum = 0; - ssPtr->matchSum = 0; - ssPtr->litSum = 0; + optPtr->matchLengthSum = 0; + optPtr->litLengthSum = 0; + optPtr->offCodeSum = 0; + optPtr->matchSum = 0; + optPtr->litSum = 0; for (u=0; u<=MaxLit; u++) { - ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1)); - ssPtr->litSum += ssPtr->litFreq[u]; + optPtr->litFreq[u] = 1 + (optPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1)); + optPtr->litSum += optPtr->litFreq[u]; } for (u=0; u<=MaxLL; u++) { - ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); - ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; + optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); + optPtr->litLengthSum += optPtr->litLengthFreq[u]; } for (u=0; u<=MaxML; u++) { - ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; - ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); + optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); + optPtr->matchLengthSum += optPtr->matchLengthFreq[u]; + optPtr->matchSum += optPtr->matchLengthFreq[u] * (u + 3); } - ssPtr->matchSum *= ZSTD_LITFREQ_ADD; + optPtr->matchSum *= ZSTD_LITFREQ_ADD; for (u=0; u<=MaxOff; u++) { - ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); - ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; + optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); + optPtr->offCodeSum += optPtr->offCodeFreq[u]; } } - ZSTD_setLog2Prices(ssPtr); + ZSTD_setLog2Prices(optPtr); } -FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals) +static U32 ZSTD_getLiteralPrice(optState_t* optPtr, U32 litLength, const BYTE* literals) { U32 price, u; - if (ssPtr->staticPrices) + if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1) + (litLength*6); if (litLength == 0) - return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1); + return optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[0]+1); /* literals */ - if (ssPtr->cachedLiterals == literals) { - U32 const additional = litLength - ssPtr->cachedLitLength; - const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength; - price = ssPtr->cachedPrice + additional * ssPtr->log2litSum; + if (optPtr->cachedLiterals == literals) { + U32 const additional = litLength - optPtr->cachedLitLength; + const BYTE* literals2 = optPtr->cachedLiterals + optPtr->cachedLitLength; + price = optPtr->cachedPrice + additional * optPtr->log2litSum; for (u=0; u < additional; u++) - price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]]+1); - ssPtr->cachedPrice = price; - ssPtr->cachedLitLength = litLength; + price -= ZSTD_highbit32(optPtr->litFreq[literals2[u]]+1); + optPtr->cachedPrice = price; + optPtr->cachedLitLength = litLength; } else { - price = litLength * ssPtr->log2litSum; + price = litLength * optPtr->log2litSum; for (u=0; u < litLength; u++) - price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]]+1); + price -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1); if (litLength >= 12) { - ssPtr->cachedLiterals = literals; - ssPtr->cachedPrice = price; - ssPtr->cachedLitLength = litLength; + optPtr->cachedLiterals = literals; + optPtr->cachedPrice = price; + optPtr->cachedLitLength = litLength; } } /* literal Length */ { const BYTE LL_deltaCode = 19; const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1); + price += LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1); } return price; } -FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) +FORCE_INLINE_TEMPLATE U32 ZSTD_getPrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) { /* offset */ U32 price; BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); - if (seqStorePtr->staticPrices) - return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode; + if (optPtr->staticPrices) + return ZSTD_getLiteralPrice(optPtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode; - price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); + price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1); if (!ultra && offCode >= 20) price += (offCode-19)*2; /* match Length */ { const BYTE ML_deltaCode = 36; const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; - price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1); + price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1); } - return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor; + return price + ZSTD_getLiteralPrice(optPtr, litLength, literals) + optPtr->factor; } -MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) +static void ZSTD_updatePrice(optState_t* optPtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) { U32 u; /* literals */ - seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; for (u=0; u < litLength; u++) - seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; /* literal Length */ { const BYTE LL_deltaCode = 19; const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - seqStorePtr->litLengthFreq[llCode]++; - seqStorePtr->litLengthSum++; + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; } /* match offset */ { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); - seqStorePtr->offCodeSum++; - seqStorePtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + optPtr->offCodeFreq[offCode]++; } /* match Length */ { const BYTE ML_deltaCode = 36; const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; - seqStorePtr->matchLengthFreq[mlCode]++; - seqStorePtr->matchLengthSum++; + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; } - ZSTD_setLog2Prices(seqStorePtr); + ZSTD_setLog2Prices(optPtr); } @@ -203,7 +203,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B /* function safe only for comparisons */ -MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +static U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) { switch (length) { @@ -219,7 +219,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) /* Update hashTable3 up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip) { U32* const hashTable3 = zc->hashTable3; @@ -412,11 +412,12 @@ static U32 ZSTD_BtGetAllMatches_selectMLS_extDict ( /*-******************************* * Optimal parser *********************************/ -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const int ultra) { seqStore_t* seqStorePtr = &(ctx->seqStore); + optState_t* optStatePtr = &(ctx->optState); const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; @@ -430,16 +431,16 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, const U32 mls = ctx->appliedParams.cParams.searchLength; const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4; - ZSTD_optimal_t* opt = seqStorePtr->priceTable; - ZSTD_match_t* matches = seqStorePtr->matchTable; + ZSTD_optimal_t* opt = optStatePtr->priceTable; + ZSTD_match_t* matches = optStatePtr->matchTable; const BYTE* inr; U32 offset, rep[ZSTD_REP_NUM]; /* init */ ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); ip += (ip==prefixStart); - { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; } /* Match Loop */ while (ip < ilimit) { @@ -462,7 +463,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, } best_off = i - (ip == anchor); do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ mlen--; @@ -487,7 +488,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, mlen = (u>0) ? matches[u-1].len+1 : best_mlen; best_mlen = matches[u].len; while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ mlen++; @@ -507,12 +508,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur-1].mlen == 1) { litlen = opt[cur-1].litlen + 1; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen); } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); + price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor); } else { litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); + price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1); } if (cur > last_pos || price <= opt[cur].price) @@ -554,12 +555,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); + price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); + price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); } if (cur + mlen > last_pos || price <= opt[cur + mlen].price) @@ -586,12 +587,12 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); + price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); + price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); } if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) @@ -645,13 +646,13 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ if (litLength==0) offset--; } - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH); ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); anchor = ip = ip + mlen; } } /* for (cur=0; cur < last_pos; ) */ /* Save reps for next block */ - { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; } + { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; } /* Last Literals */ { size_t const lastLLSize = iend - anchor; @@ -661,11 +662,12 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ } -FORCE_INLINE +FORCE_INLINE_TEMPLATE void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const void* src, size_t srcSize, const int ultra) { seqStore_t* seqStorePtr = &(ctx->seqStore); + optState_t* optStatePtr = &(ctx->optState); const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; @@ -683,16 +685,16 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, const U32 mls = ctx->appliedParams.cParams.searchLength; const U32 minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4; - ZSTD_optimal_t* opt = seqStorePtr->priceTable; - ZSTD_match_t* matches = seqStorePtr->matchTable; + ZSTD_optimal_t* opt = optStatePtr->priceTable; + ZSTD_match_t* matches = optStatePtr->matchTable; const BYTE* inr; /* init */ U32 offset, rep[ZSTD_REP_NUM]; - { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } + { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; } ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize); ip += (ip==prefixStart); /* Match Loop */ @@ -726,7 +728,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, best_off = i - (ip==anchor); litlen = opt[0].litlen; do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ mlen--; @@ -756,7 +758,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, best_mlen = matches[u].len; litlen = opt[0].litlen; while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); if (mlen > last_pos || price < opt[mlen].price) SET_PRICE(mlen, mlen, matches[u].off, litlen, price); mlen++; @@ -773,12 +775,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (opt[cur-1].mlen == 1) { litlen = opt[cur-1].litlen + 1; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen); + price = opt[cur - litlen].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-litlen); } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); + price = ZSTD_getLiteralPrice(optStatePtr, litlen, anchor); } else { litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1); + price = opt[cur - 1].price + ZSTD_getLiteralPrice(optStatePtr, litlen, inr-1); } if (cur > last_pos || price <= opt[cur].price) @@ -826,12 +828,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); + price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra); } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); + price = ZSTD_getPrice(optStatePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); + price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); } if (cur + mlen > last_pos || price <= opt[cur + mlen].price) @@ -858,12 +860,12 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx, if (opt[cur].mlen == 1) { litlen = opt[cur].litlen; if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); + price = opt[cur - litlen].price + ZSTD_getPrice(optStatePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra); else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); + price = ZSTD_getPrice(optStatePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra); } else { litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); + price = opt[cur].price + ZSTD_getPrice(optStatePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra); } if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) @@ -918,13 +920,13 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ if (litLength==0) offset--; } - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); + ZSTD_updatePrice(optStatePtr, litLength, anchor, offset, mlen-MINMATCH); ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH); anchor = ip = ip + mlen; } } /* for (cur=0; cur < last_pos; ) */ /* Save reps for next block */ - { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; } + { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; } /* Last Literals */ { size_t lastLLSize = iend - anchor; diff --git a/thirdparty/zstd/compress/zstdmt_compress.c b/thirdparty/zstd/compress/zstdmt_compress.c index 0cee01eacb..8564bc4392 100644 --- a/thirdparty/zstd/compress/zstdmt_compress.c +++ b/thirdparty/zstd/compress/zstdmt_compress.c @@ -1,15 +1,16 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). */ /* ====== Tuning parameters ====== */ -#define ZSTDMT_NBTHREADS_MAX 128 +#define ZSTDMT_NBTHREADS_MAX 256 +#define ZSTDMT_OVERLAPLOG_DEFAULT 6 /* ====== Compiler specifics ====== */ @@ -73,6 +74,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void) /* ===== Buffer Pool ===== */ +/* a single Buffer Pool can be invoked from multiple threads in parallel */ typedef struct buffer_s { void* start; @@ -82,6 +84,8 @@ typedef struct buffer_s { static const buffer_t g_nullBuffer = { NULL, 0 }; typedef struct ZSTDMT_bufferPool_s { + pthread_mutex_t poolMutex; + size_t bufferSize; unsigned totalBuffers; unsigned nbBuffers; ZSTD_customMem cMem; @@ -90,10 +94,15 @@ typedef struct ZSTDMT_bufferPool_s { static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem) { - unsigned const maxNbBuffers = 2*nbThreads + 2; + unsigned const maxNbBuffers = 2*nbThreads + 3; ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc( sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem); if (bufPool==NULL) return NULL; + if (pthread_mutex_init(&bufPool->poolMutex, NULL)) { + ZSTD_free(bufPool, cMem); + return NULL; + } + bufPool->bufferSize = 64 KB; bufPool->totalBuffers = maxNbBuffers; bufPool->nbBuffers = 0; bufPool->cMem = cMem; @@ -106,6 +115,7 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) if (!bufPool) return; /* compatibility with free on NULL */ for (u=0; u<bufPool->totalBuffers; u++) ZSTD_free(bufPool->bTable[u].start, bufPool->cMem); + pthread_mutex_destroy(&bufPool->poolMutex); ZSTD_free(bufPool, bufPool->cMem); } @@ -116,65 +126,85 @@ static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool) + (bufPool->totalBuffers - 1) * sizeof(buffer_t); unsigned u; size_t totalBufferSize = 0; + pthread_mutex_lock(&bufPool->poolMutex); for (u=0; u<bufPool->totalBuffers; u++) totalBufferSize += bufPool->bTable[u].size; + pthread_mutex_unlock(&bufPool->poolMutex); return poolSize + totalBufferSize; } -/** ZSTDMT_getBuffer() : - * assumption : invocation from main thread only ! */ -static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* pool, size_t bSize) +static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* bufPool, size_t bSize) { - if (pool->nbBuffers) { /* try to use an existing buffer */ - buffer_t const buf = pool->bTable[--(pool->nbBuffers)]; + bufPool->bufferSize = bSize; +} + +/** ZSTDMT_getBuffer() : + * assumption : bufPool must be valid */ +static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool) +{ + size_t const bSize = bufPool->bufferSize; + DEBUGLOG(5, "ZSTDMT_getBuffer"); + pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers) { /* try to use an existing buffer */ + buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)]; size_t const availBufferSize = buf.size; - if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) + if ((availBufferSize >= bSize) & (availBufferSize <= 10*bSize)) { /* large enough, but not too much */ + pthread_mutex_unlock(&bufPool->poolMutex); return buf; + } /* size conditions not respected : scratch this buffer, create new one */ - ZSTD_free(buf.start, pool->cMem); + DEBUGLOG(5, "existing buffer does not meet size conditions => freeing"); + ZSTD_free(buf.start, bufPool->cMem); } + pthread_mutex_unlock(&bufPool->poolMutex); /* create new buffer */ + DEBUGLOG(5, "create a new buffer"); { buffer_t buffer; - void* const start = ZSTD_malloc(bSize, pool->cMem); - if (start==NULL) bSize = 0; + void* const start = ZSTD_malloc(bSize, bufPool->cMem); buffer.start = start; /* note : start can be NULL if malloc fails ! */ - buffer.size = bSize; + buffer.size = (start==NULL) ? 0 : bSize; return buffer; } } /* store buffer for later re-use, up to pool capacity */ -static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* pool, buffer_t buf) -{ - if (buf.start == NULL) return; /* release on NULL */ - if (pool->nbBuffers < pool->totalBuffers) { - pool->bTable[pool->nbBuffers++] = buf; /* store for later re-use */ +static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) +{ + if (buf.start == NULL) return; /* compatible with release on NULL */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer"); + pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers < bufPool->totalBuffers) { + bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */ + pthread_mutex_unlock(&bufPool->poolMutex); return; } + pthread_mutex_unlock(&bufPool->poolMutex); /* Reached bufferPool capacity (should not happen) */ - ZSTD_free(buf.start, pool->cMem); + DEBUGLOG(5, "buffer pool capacity reached => freeing "); + ZSTD_free(buf.start, bufPool->cMem); } /* ===== CCtx Pool ===== */ +/* a single CCtx Pool can be invoked from multiple threads in parallel */ typedef struct { + pthread_mutex_t poolMutex; unsigned totalCCtx; unsigned availCCtx; ZSTD_customMem cMem; ZSTD_CCtx* cctx[1]; /* variable size */ } ZSTDMT_CCtxPool; -/* assumption : CCtxPool invocation only from main thread */ - /* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) { unsigned u; for (u=0; u<pool->totalCCtx; u++) ZSTD_freeCCtx(pool->cctx[u]); /* note : compatible with free on NULL */ + pthread_mutex_destroy(&pool->poolMutex); ZSTD_free(pool, pool->cMem); } @@ -186,6 +216,10 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads, ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc( sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem); if (!cctxPool) return NULL; + if (pthread_mutex_init(&cctxPool->poolMutex, NULL)) { + ZSTD_free(cctxPool, cMem); + return NULL; + } cctxPool->cMem = cMem; cctxPool->totalCCtx = nbThreads; cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ @@ -198,50 +232,57 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads, /* only works during initialization phase, not during compression */ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool) { - unsigned const nbThreads = cctxPool->totalCCtx; - size_t const poolSize = sizeof(*cctxPool) - + (nbThreads-1)*sizeof(ZSTD_CCtx*); - unsigned u; - size_t totalCCtxSize = 0; - for (u=0; u<nbThreads; u++) - totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]); - - return poolSize + totalCCtxSize; + pthread_mutex_lock(&cctxPool->poolMutex); + { unsigned const nbThreads = cctxPool->totalCCtx; + size_t const poolSize = sizeof(*cctxPool) + + (nbThreads-1)*sizeof(ZSTD_CCtx*); + unsigned u; + size_t totalCCtxSize = 0; + for (u=0; u<nbThreads; u++) { + totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]); + } + pthread_mutex_unlock(&cctxPool->poolMutex); + return poolSize + totalCCtxSize; + } } -static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* pool) +static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool) { - if (pool->availCCtx) { - pool->availCCtx--; - return pool->cctx[pool->availCCtx]; - } - return ZSTD_createCCtx(); /* note : can be NULL, when creation fails ! */ + DEBUGLOG(5, "ZSTDMT_getCCtx"); + pthread_mutex_lock(&cctxPool->poolMutex); + if (cctxPool->availCCtx) { + cctxPool->availCCtx--; + { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx]; + pthread_mutex_unlock(&cctxPool->poolMutex); + return cctx; + } } + pthread_mutex_unlock(&cctxPool->poolMutex); + DEBUGLOG(5, "create one more CCtx"); + return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */ } static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) { if (cctx==NULL) return; /* compatibility with release on NULL */ + pthread_mutex_lock(&pool->poolMutex); if (pool->availCCtx < pool->totalCCtx) pool->cctx[pool->availCCtx++] = cctx; - else + else { /* pool overflow : should not happen, since totalCCtx==nbThreads */ + DEBUGLOG(5, "CCtx pool overflow : free cctx"); ZSTD_freeCCtx(cctx); + } + pthread_mutex_unlock(&pool->poolMutex); } /* ===== Thread worker ===== */ typedef struct { - buffer_t buffer; - size_t filled; -} inBuff_t; - -typedef struct { - ZSTD_CCtx* cctx; buffer_t src; const void* srcStart; - size_t srcSize; size_t dictSize; + size_t srcSize; buffer_t dstBuff; size_t cSize; size_t dstFlushed; @@ -253,6 +294,8 @@ typedef struct { pthread_cond_t* jobCompleted_cond; ZSTD_parameters params; const ZSTD_CDict* cdict; + ZSTDMT_CCtxPool* cctxPool; + ZSTDMT_bufferPool* bufPool; unsigned long long fullFrameSize; } ZSTDMT_jobDescription; @@ -260,37 +303,56 @@ typedef struct { void ZSTDMT_compressChunk(void* jobDescription) { ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + ZSTD_CCtx* cctx = ZSTDMT_getCCtx(job->cctxPool); const void* const src = (const char*)job->srcStart + job->dictSize; - buffer_t const dstBuff = job->dstBuff; + buffer_t dstBuff = job->dstBuff; DEBUGLOG(5, "job (first:%u) (last:%u) : dictSize %u, srcSize %u", job->firstChunk, job->lastChunk, (U32)job->dictSize, (U32)job->srcSize); + + if (cctx==NULL) { + job->cSize = ERROR(memory_allocation); + goto _endJob; + } + + if (dstBuff.start == NULL) { + dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (dstBuff.start==NULL) { + job->cSize = ERROR(memory_allocation); + goto _endJob; + } + job->dstBuff = dstBuff; + } + if (job->cdict) { /* should only happen for first segment */ - size_t const initError = ZSTD_compressBegin_usingCDict_advanced(job->cctx, job->cdict, job->params.fParams, job->fullFrameSize); + size_t const initError = ZSTD_compressBegin_usingCDict_advanced(cctx, job->cdict, job->params.fParams, job->fullFrameSize); DEBUGLOG(5, "using CDict"); if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; } } else { /* srcStart points at reloaded section */ if (!job->firstChunk) job->params.fParams.contentSizeFlag = 0; /* ensure no srcSize control */ - { size_t const dictModeError = ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */ - size_t const initError = ZSTD_compressBegin_advanced(job->cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize); + { size_t const dictModeError = ZSTD_setCCtxParameter(cctx, ZSTD_p_forceRawDict, 1); /* Force loading dictionary in "content-only" mode (no header analysis) */ + size_t const initError = ZSTD_compressBegin_advanced(cctx, job->srcStart, job->dictSize, job->params, job->fullFrameSize); if (ZSTD_isError(initError) || ZSTD_isError(dictModeError)) { job->cSize = initError; goto _endJob; } - ZSTD_setCCtxParameter(job->cctx, ZSTD_p_forceWindow, 1); + ZSTD_setCCtxParameter(cctx, ZSTD_p_forceWindow, 1); } } if (!job->firstChunk) { /* flush and overwrite frame header when it's not first segment */ - size_t const hSize = ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, 0); + size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0); if (ZSTD_isError(hSize)) { job->cSize = hSize; goto _endJob; } - ZSTD_invalidateRepCodes(job->cctx); + ZSTD_invalidateRepCodes(cctx); } DEBUGLOG(5, "Compressing : "); DEBUG_PRINTHEX(4, job->srcStart, 12); job->cSize = (job->lastChunk) ? - ZSTD_compressEnd (job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize) : - ZSTD_compressContinue(job->cctx, dstBuff.start, dstBuff.size, src, job->srcSize); + ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) : + ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize); DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u)", (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk); DEBUGLOG(5, "dstBuff.size : %u ; => %s", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize)); _endJob: + ZSTDMT_releaseCCtx(job->cctxPool, cctx); + ZSTDMT_releaseBuffer(job->bufPool, job->src); + job->src = g_nullBuffer; job->srcStart = NULL; PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex); job->jobCompleted = 1; job->jobScanned = 0; @@ -303,15 +365,19 @@ _endJob: /* ===== Multi-threaded compression ===== */ /* ------------------------------------------ */ +typedef struct { + buffer_t buffer; + size_t filled; +} inBuff_t; + struct ZSTDMT_CCtx_s { POOL_ctx* factory; ZSTDMT_jobDescription* jobs; - ZSTDMT_bufferPool* buffPool; + ZSTDMT_bufferPool* bufPool; ZSTDMT_CCtxPool* cctxPool; pthread_mutex_t jobCompleted_mutex; pthread_cond_t jobCompleted_cond; size_t targetSectionSize; - size_t marginSize; size_t inBuffSize; size_t dictSize; size_t targetDictSize; @@ -324,7 +390,7 @@ struct ZSTDMT_CCtx_s { unsigned nextJobID; unsigned frameEnded; unsigned allJobsCompleted; - unsigned overlapRLog; + unsigned overlapLog; unsigned long long frameContentSize; size_t sectionSize; ZSTD_customMem cMem; @@ -347,7 +413,8 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem) U32 nbJobs = nbThreads + 2; DEBUGLOG(3, "ZSTDMT_createCCtx_advanced"); - if ((nbThreads < 1) | (nbThreads > ZSTDMT_NBTHREADS_MAX)) return NULL; + if (nbThreads < 1) return NULL; + nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX); if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL)) /* invalid custom allocator */ return NULL; @@ -358,18 +425,24 @@ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem) mtctx->nbThreads = nbThreads; mtctx->allJobsCompleted = 1; mtctx->sectionSize = 0; - mtctx->overlapRLog = 3; - mtctx->factory = POOL_create(nbThreads, 1); + mtctx->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT; + mtctx->factory = POOL_create(nbThreads, 0); mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem); mtctx->jobIDMask = nbJobs - 1; - mtctx->buffPool = ZSTDMT_createBufferPool(nbThreads, cMem); + mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem); mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem); - if (!mtctx->factory | !mtctx->jobs | !mtctx->buffPool | !mtctx->cctxPool) { + if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool) { + ZSTDMT_freeCCtx(mtctx); + return NULL; + } + if (pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL)) { + ZSTDMT_freeCCtx(mtctx); + return NULL; + } + if (pthread_cond_init(&mtctx->jobCompleted_cond, NULL)) { ZSTDMT_freeCCtx(mtctx); return NULL; } - pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL); /* Todo : check init function return */ - pthread_cond_init(&mtctx->jobCompleted_cond, NULL); DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads); return mtctx; } @@ -386,15 +459,13 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) unsigned jobID; DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { - ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].dstBuff); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); mtctx->jobs[jobID].dstBuff = g_nullBuffer; - ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[jobID].src); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].src); mtctx->jobs[jobID].src = g_nullBuffer; - ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[jobID].cctx); - mtctx->jobs[jobID].cctx = NULL; } memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription)); - ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer); mtctx->inBuff.buffer = g_nullBuffer; mtctx->allJobsCompleted = 1; } @@ -404,7 +475,7 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) if (mtctx==NULL) return 0; /* compatible with free on NULL */ POOL_free(mtctx->factory); if (!mtctx->allJobsCompleted) ZSTDMT_releaseAllJobResources(mtctx); /* stop workers first */ - ZSTDMT_freeBufferPool(mtctx->buffPool); /* release job resources into pools first */ + ZSTDMT_freeBufferPool(mtctx->bufPool); /* release job resources into pools first */ ZSTD_free(mtctx->jobs, mtctx->cMem); ZSTDMT_freeCCtxPool(mtctx->cctxPool); ZSTD_freeCDict(mtctx->cdictLocal); @@ -418,11 +489,11 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx) { if (mtctx == NULL) return 0; /* supports sizeof NULL */ return sizeof(*mtctx) - + POOL_sizeof(mtctx->factory) - + ZSTDMT_sizeof_bufferPool(mtctx->buffPool) - + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription) - + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool) - + ZSTD_sizeof_CDict(mtctx->cdictLocal); + + POOL_sizeof(mtctx->factory) + + ZSTDMT_sizeof_bufferPool(mtctx->bufPool) + + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription) + + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool) + + ZSTD_sizeof_CDict(mtctx->cdictLocal); } size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value) @@ -434,10 +505,10 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, return 0; case ZSTDMT_p_overlapSectionLog : DEBUGLOG(5, "ZSTDMT_p_overlapSectionLog : %u", value); - mtctx->overlapRLog = (value >= 9) ? 0 : 9 - value; + mtctx->overlapLog = (value >= 9) ? 9 : value; return 0; default : - return ERROR(compressionParameter_unsupported); + return ERROR(parameter_unsupported); } } @@ -459,12 +530,13 @@ static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbT size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, - ZSTD_parameters const params, - unsigned overlapRLog) + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_parameters const params, + unsigned overlapLog) { + unsigned const overlapRLog = (overlapLog>9) ? 0 : 9-overlapLog; size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog); unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, mtctx->nbThreads); size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks; @@ -473,6 +545,7 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, size_t remainingSrcSize = srcSize; unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */ size_t frameStartPos = 0, dstBufferPos = 0; + XXH64_state_t xxh64; DEBUGLOG(4, "nbChunks : %2u (chunkSize : %u bytes) ", nbChunks, (U32)avgChunkSize); if (nbChunks==1) { /* fallback to single-thread mode */ @@ -480,7 +553,9 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, params.fParams); return ZSTD_compress_advanced(cctx, dst, dstCapacity, src, srcSize, NULL, 0, params); } - assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is useful to avoid allocating extra buffers */ + assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */ + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) ); + XXH64_reset(&xxh64, 0); if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */ U32 nbJobs = nbChunks; @@ -496,17 +571,10 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize); size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize); buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity }; - buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : ZSTDMT_getBuffer(mtctx->buffPool, dstBufferCapacity); - ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(mtctx->cctxPool); + buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer; size_t dictSize = u ? overlapSize : 0; - if ((cctx==NULL) || (dstBuffer.start==NULL)) { - mtctx->jobs[u].cSize = ERROR(memory_allocation); /* job result */ - mtctx->jobs[u].jobCompleted = 1; - nbChunks = u+1; /* only wait and free u jobs, instead of initially expected nbChunks ones */ - break; /* let's wait for previous jobs to complete, but don't start new ones */ - } - + mtctx->jobs[u].src = g_nullBuffer; mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize; mtctx->jobs[u].dictSize = dictSize; mtctx->jobs[u].srcSize = chunkSize; @@ -516,13 +584,18 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, /* do not calculate checksum within sections, but write it in header for first section */ if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0; mtctx->jobs[u].dstBuff = dstBuffer; - mtctx->jobs[u].cctx = cctx; + mtctx->jobs[u].cctxPool = mtctx->cctxPool; + mtctx->jobs[u].bufPool = mtctx->bufPool; mtctx->jobs[u].firstChunk = (u==0); mtctx->jobs[u].lastChunk = (u==nbChunks-1); mtctx->jobs[u].jobCompleted = 0; mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex; mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond; + if (params.fParams.checksumFlag) { + XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize); + } + DEBUGLOG(5, "posting job %u (%u bytes)", u, (U32)chunkSize); DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12); POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]); @@ -533,8 +606,8 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, } } /* collect result */ - { unsigned chunkID; - size_t error = 0, dstPos = 0; + { size_t error = 0, dstPos = 0; + unsigned chunkID; for (chunkID=0; chunkID<nbChunks; chunkID++) { DEBUGLOG(5, "waiting for chunk %u ", chunkID); PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex); @@ -545,8 +618,6 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, pthread_mutex_unlock(&mtctx->jobCompleted_mutex); DEBUGLOG(5, "ready to write chunk %u ", chunkID); - ZSTDMT_releaseCCtx(mtctx->cctxPool, mtctx->jobs[chunkID].cctx); - mtctx->jobs[chunkID].cctx = NULL; mtctx->jobs[chunkID].srcStart = NULL; { size_t const cSize = mtctx->jobs[chunkID].cSize; if (ZSTD_isError(cSize)) error = cSize; @@ -556,13 +627,25 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */ if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */ DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst); - ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->jobs[chunkID].dstBuff); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff); } mtctx->jobs[chunkID].dstBuff = g_nullBuffer; } dstPos += cSize ; } - } + } /* for (chunkID=0; chunkID<nbChunks; chunkID++) */ + + DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag); + if (params.fParams.checksumFlag) { + U32 const checksum = (U32)XXH64_digest(&xxh64); + if (dstPos + 4 > dstCapacity) { + error = ERROR(dstSize_tooSmall); + } else { + DEBUGLOG(4, "writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)dst + dstPos, checksum); + dstPos += 4; + } } + if (!error) DEBUGLOG(4, "compressed size : %u ", (U32)dstPos); return error ? error : dstPos; } @@ -574,10 +657,10 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, const void* src, size_t srcSize, int compressionLevel) { - U32 const overlapRLog = (compressionLevel >= ZSTD_maxCLevel()) ? 0 : 3; + U32 const overlapLog = (compressionLevel >= ZSTD_maxCLevel()) ? 9 : ZSTDMT_OVERLAPLOG_DEFAULT; ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, 0); params.fParams.contentSizeFlag = 1; - return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapRLog); + return ZSTDMT_compress_advanced(mtctx, dst, dstCapacity, src, srcSize, NULL, params, overlapLog); } @@ -615,8 +698,8 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, if (zcs->nbThreads==1) { DEBUGLOG(4, "single thread mode"); return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0], - dict, dictSize, cdict, - params, pledgedSrcSize); + dict, dictSize, cdict, + params, pledgedSrcSize); } if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */ @@ -642,18 +725,16 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, zcs->cdict = cdict; } - zcs->targetDictSize = (zcs->overlapRLog>=9) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - zcs->overlapRLog); - DEBUGLOG(4, "overlapRLog : %u ", zcs->overlapRLog); + zcs->targetDictSize = (zcs->overlapLog==0) ? 0 : (size_t)1 << (zcs->params.cParams.windowLog - (9 - zcs->overlapLog)); + DEBUGLOG(4, "overlapLog : %u ", zcs->overlapLog); DEBUGLOG(4, "overlap Size : %u KB", (U32)(zcs->targetDictSize>>10)); zcs->targetSectionSize = zcs->sectionSize ? zcs->sectionSize : (size_t)1 << (zcs->params.cParams.windowLog + 2); zcs->targetSectionSize = MAX(ZSTDMT_SECTION_SIZE_MIN, zcs->targetSectionSize); zcs->targetSectionSize = MAX(zcs->targetDictSize, zcs->targetSectionSize); DEBUGLOG(4, "Section Size : %u KB", (U32)(zcs->targetSectionSize>>10)); - zcs->marginSize = zcs->targetSectionSize >> 2; - zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize + zcs->marginSize; - zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); - if (zcs->inBuff.buffer.start == NULL) return ERROR(memory_allocation); - zcs->inBuff.filled = 0; + zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize; + ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) ); + zcs->inBuff.buffer = g_nullBuffer; zcs->dictSize = 0; zcs->doneJobID = 0; zcs->nextJobID = 0; @@ -664,8 +745,9 @@ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, } size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, - const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize) + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize) { DEBUGLOG(5, "ZSTDMT_initCStream_advanced"); return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, NULL, params, pledgedSrcSize); @@ -701,19 +783,8 @@ size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) { static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame) { - size_t const dstBufferCapacity = ZSTD_compressBound(srcSize); - buffer_t const dstBuffer = ZSTDMT_getBuffer(zcs->buffPool, dstBufferCapacity); - ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(zcs->cctxPool); unsigned const jobID = zcs->nextJobID & zcs->jobIDMask; - if ((cctx==NULL) || (dstBuffer.start==NULL)) { - zcs->jobs[jobID].jobCompleted = 1; - zcs->nextJobID++; - ZSTDMT_waitForAllJobsCompleted(zcs); - ZSTDMT_releaseAllJobResources(zcs); - return ERROR(memory_allocation); - } - DEBUGLOG(4, "preparing job %u to compress %u bytes with %u preload ", zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize); zcs->jobs[jobID].src = zcs->inBuff.buffer; @@ -726,8 +797,9 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0; zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL; zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize; - zcs->jobs[jobID].dstBuff = dstBuffer; - zcs->jobs[jobID].cctx = cctx; + zcs->jobs[jobID].dstBuff = g_nullBuffer; + zcs->jobs[jobID].cctxPool = zcs->cctxPool; + zcs->jobs[jobID].bufPool = zcs->bufPool; zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0); zcs->jobs[jobID].lastChunk = endFrame; zcs->jobs[jobID].jobCompleted = 0; @@ -735,11 +807,13 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex; zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond; + if (zcs->params.fParams.checksumFlag) + XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->dictSize, srcSize); + /* get a new buffer for next input */ if (!endFrame) { size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize); - DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame); - zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->buffPool, zcs->inBuffSize); + zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool); if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */ zcs->jobs[jobID].jobCompleted = 1; zcs->nextJobID++; @@ -747,26 +821,20 @@ static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsi ZSTDMT_releaseAllJobResources(zcs); return ERROR(memory_allocation); } - DEBUGLOG(5, "inBuff currently filled to %u", (U32)zcs->inBuff.filled); zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize; - DEBUGLOG(5, "new job : inBuff filled to %u, with %u dict and %u src", - (U32)zcs->inBuff.filled, (U32)newDictSize, - (U32)(zcs->inBuff.filled - newDictSize)); memmove(zcs->inBuff.buffer.start, (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize, zcs->inBuff.filled); - DEBUGLOG(5, "new inBuff pre-filled"); zcs->dictSize = newDictSize; } else { /* if (endFrame==1) */ - DEBUGLOG(5, "ZSTDMT_createCompressionJob::endFrame = %u", endFrame); zcs->inBuff.buffer = g_nullBuffer; zcs->inBuff.filled = 0; zcs->dictSize = 0; zcs->frameEnded = 1; - if (zcs->nextJobID == 0) + if (zcs->nextJobID == 0) { /* single chunk exception : checksum is calculated directly within worker thread */ zcs->params.fParams.checksumFlag = 0; - } + } } DEBUGLOG(4, "posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)", zcs->nextJobID, @@ -804,11 +872,8 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi ZSTDMT_releaseAllJobResources(zcs); return job.cSize; } - ZSTDMT_releaseCCtx(zcs->cctxPool, job.cctx); - zcs->jobs[wJobID].cctx = NULL; DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag); if (zcs->params.fParams.checksumFlag) { - XXH64_update(&zcs->xxhState, (const char*)job.srcStart + job.dictSize, job.srcSize); if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */ U32 const checksum = (U32)XXH64_digest(&zcs->xxhState); DEBUGLOG(5, "writing checksum : %08X \n", checksum); @@ -816,9 +881,6 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi job.cSize += 4; zcs->jobs[wJobID].cSize += 4; } } - ZSTDMT_releaseBuffer(zcs->buffPool, job.src); - zcs->jobs[wJobID].srcStart = NULL; - zcs->jobs[wJobID].src = g_nullBuffer; zcs->jobs[wJobID].jobScanned = 1; } { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos); @@ -828,7 +890,7 @@ static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsi job.dstFlushed += toWrite; } if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */ - ZSTDMT_releaseBuffer(zcs->buffPool, job.dstBuff); + ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff); zcs->jobs[wJobID].dstBuff = g_nullBuffer; zcs->jobs[wJobID].jobCompleted = 0; zcs->doneJobID++; @@ -852,18 +914,18 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, ZSTD_inBuffer* input, ZSTD_EndDirective endOp) { - size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize + mtctx->marginSize; + size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize; assert(output->pos <= output->size); assert(input->pos <= input->size); if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { /* current frame being ended. Only flush/end are allowed. Or start new frame with init */ return ERROR(stage_wrong); } - if (mtctx->nbThreads==1) { + if (mtctx->nbThreads==1) { /* delegate to single-thread (synchronous) */ return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp); } - /* single-pass shortcut (note : this is blocking-mode) */ + /* single-pass shortcut (note : this is synchronous-mode) */ if ( (mtctx->nextJobID==0) /* just started */ && (mtctx->inBuff.filled==0) /* nothing buffered */ && (endOp==ZSTD_e_end) /* end order */ @@ -871,24 +933,29 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, size_t const cSize = ZSTDMT_compress_advanced(mtctx, (char*)output->dst + output->pos, output->size - output->pos, (const char*)input->src + input->pos, input->size - input->pos, - mtctx->cdict, mtctx->params, mtctx->overlapRLog); + mtctx->cdict, mtctx->params, mtctx->overlapLog); if (ZSTD_isError(cSize)) return cSize; input->pos = input->size; output->pos += cSize; - ZSTDMT_releaseBuffer(mtctx->buffPool, mtctx->inBuff.buffer); /* was allocated in initStream */ + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer); /* was allocated in initStream */ mtctx->allJobsCompleted = 1; mtctx->frameEnded = 1; return 0; } /* fill input buffer */ - if ((input->src) && (mtctx->inBuff.buffer.start)) { /* support NULL input */ - size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled); - DEBUGLOG(2, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad); - memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad); - input->pos += toLoad; - mtctx->inBuff.filled += toLoad; - } + if (input->size > input->pos) { /* support NULL input */ + if (mtctx->inBuff.buffer.start == NULL) { + mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool); + if (mtctx->inBuff.buffer.start == NULL) return ERROR(memory_allocation); + mtctx->inBuff.filled = 0; + } + { size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled); + DEBUGLOG(5, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad); + memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad); + input->pos += toLoad; + mtctx->inBuff.filled += toLoad; + } } if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */ && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */ diff --git a/thirdparty/zstd/compress/zstdmt_compress.h b/thirdparty/zstd/compress/zstdmt_compress.h index fad63b6d86..0f0fc2b03f 100644 --- a/thirdparty/zstd/compress/zstdmt_compress.h +++ b/thirdparty/zstd/compress/zstdmt_compress.h @@ -1,10 +1,10 @@ -/** +/* * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). */ #ifndef ZSTDMT_COMPRESS_H @@ -15,10 +15,11 @@ #endif -/* Note : All prototypes defined in this file are labelled experimental. - * No guarantee of API continuity is provided on any of them. - * In fact, the expectation is that these prototypes will be replaced - * by ZSTD_compress_generic() API in the near future */ +/* Note : This is an internal API. + * Some methods are still exposed (ZSTDLIB_API), + * because it used to be the only way to invoke MT compression. + * Now, it's recommended to use ZSTD_compress_generic() instead. + * These methods will stop being exposed in a future version */ /* === Dependencies === */ #include <stddef.h> /* size_t */ @@ -67,7 +68,7 @@ ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, const void* src, size_t srcSize, const ZSTD_CDict* cdict, ZSTD_parameters const params, - unsigned overlapRLog); + unsigned overlapLog); ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */ |