summaryrefslogtreecommitdiff
path: root/thirdparty/zstd
diff options
context:
space:
mode:
authorvolzhs <volzhs@gmail.com>2018-05-16 02:45:22 +0900
committervolzhs <volzhs@gmail.com>2018-05-16 02:45:22 +0900
commit5c5918a52dd93489fcb61b84d07d07368151824a (patch)
tree63f5023dc0d942c8f0fdea7813aa20730d92947a /thirdparty/zstd
parent3b8bd50b41e0197ab3bce653548715872a93ea80 (diff)
Update zstd to 1.3.4
Diffstat (limited to 'thirdparty/zstd')
-rw-r--r--thirdparty/zstd/common/bitstream.h2
-rw-r--r--thirdparty/zstd/common/compiler.h25
-rw-r--r--thirdparty/zstd/common/cpu.h216
-rw-r--r--thirdparty/zstd/common/error_private.c1
-rw-r--r--thirdparty/zstd/common/fse.h2
-rw-r--r--thirdparty/zstd/common/fse_decompress.c4
-rw-r--r--thirdparty/zstd/common/huf.h203
-rw-r--r--thirdparty/zstd/common/pool.c63
-rw-r--r--thirdparty/zstd/common/pool.h27
-rw-r--r--thirdparty/zstd/common/threading.h24
-rw-r--r--thirdparty/zstd/common/zstd_errors.h23
-rw-r--r--thirdparty/zstd/common/zstd_internal.h9
-rw-r--r--thirdparty/zstd/compress/fse_compress.c30
-rw-r--r--thirdparty/zstd/compress/huf_compress.c222
-rw-r--r--thirdparty/zstd/compress/zstd_compress.c1617
-rw-r--r--thirdparty/zstd/compress/zstd_compress_internal.h315
-rw-r--r--thirdparty/zstd/compress/zstd_double_fast.c142
-rw-r--r--thirdparty/zstd/compress/zstd_double_fast.h15
-rw-r--r--thirdparty/zstd/compress/zstd_fast.c158
-rw-r--r--thirdparty/zstd/compress/zstd_fast.h16
-rw-r--r--thirdparty/zstd/compress/zstd_lazy.c607
-rw-r--r--thirdparty/zstd/compress/zstd_lazy.h49
-rw-r--r--thirdparty/zstd/compress/zstd_ldm.c664
-rw-r--r--thirdparty/zstd/compress/zstd_ldm.h85
-rw-r--r--thirdparty/zstd/compress/zstd_opt.c250
-rw-r--r--thirdparty/zstd/compress/zstd_opt.h22
-rw-r--r--thirdparty/zstd/compress/zstdmt_compress.c1580
-rw-r--r--thirdparty/zstd/compress/zstdmt_compress.h48
-rw-r--r--thirdparty/zstd/decompress/huf_decompress.c680
-rw-r--r--thirdparty/zstd/decompress/zstd_decompress.c1216
-rw-r--r--thirdparty/zstd/zstd.h508
31 files changed, 5607 insertions, 3216 deletions
diff --git a/thirdparty/zstd/common/bitstream.h b/thirdparty/zstd/common/bitstream.h
index fcf3843079..f7f389fe0f 100644
--- a/thirdparty/zstd/common/bitstream.h
+++ b/thirdparty/zstd/common/bitstream.h
@@ -426,7 +426,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
* Refill `bitD` from buffer previously set in BIT_initDStream() .
* This function is safe, it guarantees it will not read beyond src buffer.
* @return : status of `BIT_DStream_t` internal register.
- * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
+ * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
diff --git a/thirdparty/zstd/common/compiler.h b/thirdparty/zstd/common/compiler.h
index 3a7553c380..e90a3bcde3 100644
--- a/thirdparty/zstd/common/compiler.h
+++ b/thirdparty/zstd/common/compiler.h
@@ -63,6 +63,31 @@
# endif
#endif
+/* target attribute */
+#ifndef __has_attribute
+ #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
+#endif
+#if defined(__GNUC__)
+# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
+#else
+# define TARGET_ATTRIBUTE(target)
+#endif
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+ #if (defined(__clang__) && __has_attribute(__target__)) \
+ || (defined(__GNUC__) \
+ && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \
+ && (defined(__x86_64__) || defined(_M_X86)) \
+ && !defined(__BMI2__)
+ # define DYNAMIC_BMI2 1
+ #else
+ # define DYNAMIC_BMI2 0
+ #endif
+#endif
+
/* prefetch */
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
diff --git a/thirdparty/zstd/common/cpu.h b/thirdparty/zstd/common/cpu.h
new file mode 100644
index 0000000000..4eb48e39e1
--- /dev/null
+++ b/thirdparty/zstd/common/cpu.h
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2018-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMMON_CPU_H
+#define ZSTD_COMMON_CPU_H
+
+/**
+ * Implementation taken from folly/CpuId.h
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
+ */
+
+#include <string.h>
+
+#include "mem.h"
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+typedef struct {
+ U32 f1c;
+ U32 f1d;
+ U32 f7b;
+ U32 f7c;
+} ZSTD_cpuid_t;
+
+MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
+ U32 f1c = 0;
+ U32 f1d = 0;
+ U32 f7b = 0;
+ U32 f7c = 0;
+#ifdef _MSC_VER
+ int reg[4];
+ __cpuid((int*)reg, 0);
+ {
+ int const n = reg[0];
+ if (n >= 1) {
+ __cpuid((int*)reg, 1);
+ f1c = (U32)reg[2];
+ f1d = (U32)reg[3];
+ }
+ if (n >= 7) {
+ __cpuidex((int*)reg, 7, 0);
+ f7b = (U32)reg[1];
+ f7c = (U32)reg[2];
+ }
+ }
+#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
+ /* The following block like the normal cpuid branch below, but gcc
+ * reserves ebx for use of its pic register so we must specially
+ * handle the save and restore to avoid clobbering the register
+ */
+ U32 n;
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "popl %%ebx\n\t"
+ : "=a"(n)
+ : "a"(0)
+ : "ecx", "edx");
+ if (n >= 1) {
+ U32 f1a;
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "popl %%ebx\n\t"
+ : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+ : "a"(1)
+ :);
+ }
+ if (n >= 7) {
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "movl %%ebx, %%eax\n\r"
+ "popl %%ebx"
+ : "=a"(f7b), "=c"(f7c)
+ : "a"(7), "c"(0)
+ : "edx");
+ }
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
+ U32 n;
+ __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+ if (n >= 1) {
+ U32 f1a;
+ __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
+ }
+ if (n >= 7) {
+ U32 f7a;
+ __asm__("cpuid"
+ : "=a"(f7a), "=b"(f7b), "=c"(f7c)
+ : "a"(7), "c"(0)
+ : "edx");
+ }
+#endif
+ {
+ ZSTD_cpuid_t cpuid;
+ cpuid.f1c = f1c;
+ cpuid.f1d = f1d;
+ cpuid.f7b = f7b;
+ cpuid.f7c = f7c;
+ return cpuid;
+ }
+}
+
+#define X(name, r, bit) \
+ MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
+ return ((cpuid.r) & (1U << bit)) != 0; \
+ }
+
+/* cpuid(1): Processor Info and Feature Bits. */
+#define C(name, bit) X(name, f1c, bit)
+ C(sse3, 0)
+ C(pclmuldq, 1)
+ C(dtes64, 2)
+ C(monitor, 3)
+ C(dscpl, 4)
+ C(vmx, 5)
+ C(smx, 6)
+ C(eist, 7)
+ C(tm2, 8)
+ C(ssse3, 9)
+ C(cnxtid, 10)
+ C(fma, 12)
+ C(cx16, 13)
+ C(xtpr, 14)
+ C(pdcm, 15)
+ C(pcid, 17)
+ C(dca, 18)
+ C(sse41, 19)
+ C(sse42, 20)
+ C(x2apic, 21)
+ C(movbe, 22)
+ C(popcnt, 23)
+ C(tscdeadline, 24)
+ C(aes, 25)
+ C(xsave, 26)
+ C(osxsave, 27)
+ C(avx, 28)
+ C(f16c, 29)
+ C(rdrand, 30)
+#undef C
+#define D(name, bit) X(name, f1d, bit)
+ D(fpu, 0)
+ D(vme, 1)
+ D(de, 2)
+ D(pse, 3)
+ D(tsc, 4)
+ D(msr, 5)
+ D(pae, 6)
+ D(mce, 7)
+ D(cx8, 8)
+ D(apic, 9)
+ D(sep, 11)
+ D(mtrr, 12)
+ D(pge, 13)
+ D(mca, 14)
+ D(cmov, 15)
+ D(pat, 16)
+ D(pse36, 17)
+ D(psn, 18)
+ D(clfsh, 19)
+ D(ds, 21)
+ D(acpi, 22)
+ D(mmx, 23)
+ D(fxsr, 24)
+ D(sse, 25)
+ D(sse2, 26)
+ D(ss, 27)
+ D(htt, 28)
+ D(tm, 29)
+ D(pbe, 31)
+#undef D
+
+/* cpuid(7): Extended Features. */
+#define B(name, bit) X(name, f7b, bit)
+ B(bmi1, 3)
+ B(hle, 4)
+ B(avx2, 5)
+ B(smep, 7)
+ B(bmi2, 8)
+ B(erms, 9)
+ B(invpcid, 10)
+ B(rtm, 11)
+ B(mpx, 14)
+ B(avx512f, 16)
+ B(avx512dq, 17)
+ B(rdseed, 18)
+ B(adx, 19)
+ B(smap, 20)
+ B(avx512ifma, 21)
+ B(pcommit, 22)
+ B(clflushopt, 23)
+ B(clwb, 24)
+ B(avx512pf, 26)
+ B(avx512er, 27)
+ B(avx512cd, 28)
+ B(sha, 29)
+ B(avx512bw, 30)
+ B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c, bit)
+ C(prefetchwt1, 0)
+ C(avx512vbmi, 1)
+#undef C
+
+#undef X
+
+#endif /* ZSTD_COMMON_CPU_H */
diff --git a/thirdparty/zstd/common/error_private.c b/thirdparty/zstd/common/error_private.c
index 11f7cdab1c..d004ee636c 100644
--- a/thirdparty/zstd/common/error_private.c
+++ b/thirdparty/zstd/common/error_private.c
@@ -29,6 +29,7 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+ case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
diff --git a/thirdparty/zstd/common/fse.h b/thirdparty/zstd/common/fse.h
index afd7801963..6a1d272be5 100644
--- a/thirdparty/zstd/common/fse.h
+++ b/thirdparty/zstd/common/fse.h
@@ -345,7 +345,7 @@ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* s
*/
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
-/*! FSE_count_simple
+/*! FSE_count_simple() :
* Same as FSE_countFast(), but does not use any additional memory (not even on stack).
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
*/
diff --git a/thirdparty/zstd/common/fse_decompress.c b/thirdparty/zstd/common/fse_decompress.c
index 8e3f0035f6..4c66c3b774 100644
--- a/thirdparty/zstd/common/fse_decompress.c
+++ b/thirdparty/zstd/common/fse_decompress.c
@@ -139,8 +139,8 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
{ U32 u;
for (u=0; u<tableSize; u++) {
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
- U16 nextState = symbolNext[symbol]++;
- tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+ U32 const nextState = symbolNext[symbol]++;
+ tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
} }
diff --git a/thirdparty/zstd/common/huf.h b/thirdparty/zstd/common/huf.h
index 522bf9b6c0..b4645b4e51 100644
--- a/thirdparty/zstd/common/huf.h
+++ b/thirdparty/zstd/common/huf.h
@@ -58,32 +58,32 @@ extern "C" {
#endif
-/* *** simple functions *** */
-/**
-HUF_compress() :
- Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
- 'dst' buffer must be already allocated.
- Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
- `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
- @return : size of compressed data (<= `dstCapacity`).
- Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
- if return == 1, srcData is a single repeated byte symbol (RLE compression).
- if HUF_isError(return), compression failed (more details using HUF_getErrorName())
-*/
+/* ========================== */
+/* *** simple functions *** */
+/* ========================== */
+
+/** HUF_compress() :
+ * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
+ * 'dst' buffer must be already allocated.
+ * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
+ * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
+ * @return : size of compressed data (<= `dstCapacity`).
+ * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+ * if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+ */
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
-/**
-HUF_decompress() :
- Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
- into already allocated buffer 'dst', of minimum size 'dstSize'.
- `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
- Note : in contrast with FSE, HUF_decompress can regenerate
- RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
- because it knows size to regenerate.
- @return : size of regenerated data (== originalSize),
- or an error code, which can be tested using HUF_isError()
-*/
+/** HUF_decompress() :
+ * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+ * into already allocated buffer 'dst', of minimum size 'dstSize'.
+ * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
+ * Note : in contrast with FSE, HUF_decompress can regenerate
+ * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+ * because it knows size to regenerate (originalSize).
+ * @return : size of regenerated data (== originalSize),
+ * or an error code, which can be tested using HUF_isError()
+ */
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize);
@@ -100,39 +100,32 @@ HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error c
/* *** Advanced function *** */
/** HUF_compress2() :
- * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog`.
- * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
-HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+ * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
+ * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
+ * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog);
/** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
- * `workspace` must have minimum alignment of 4, and be at least as large as following macro */
+ * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE (6 << 10)
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
-HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
-
-/**
- * The minimum workspace size for the `workSpace` used in
- * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
- *
- * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
- * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
- * Buffer overflow errors may potentially occur if code modifications result in
- * a required workspace size greater than that specified in the following
- * macro.
- */
-#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
-#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog,
+ void* workSpace, size_t wkspSize);
#endif /* HUF_H_298734234 */
/* ******************************************************************
* WARNING !!
* The following section contains advanced and experimental definitions
- * which shall never be used in the context of dll
+ * which shall never be used in the context of a dynamic library,
* because they are not guaranteed to remain stable in the future.
* Only consider them in association with static linking.
- *******************************************************************/
+ * *****************************************************************/
#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
#define HUF_H_HUF_STATIC_LINKING_ONLY
@@ -141,11 +134,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const
/* *** Constants *** */
-#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
-#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */
+#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
#define HUF_SYMBOLVALUE_MAX 255
-#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
# error "HUF_TABLELOG_MAX is too large !"
#endif
@@ -192,24 +185,23 @@ size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
/* ****************************************
-* HUF detailed API
-******************************************/
-/*!
-HUF_compress() does the following:
-1. count symbol occurrence from source[] into table count[] using FSE_count()
-2. (optional) refine tableLog using HUF_optimalTableLog()
-3. build Huffman table from count using HUF_buildCTable()
-4. save Huffman table to memory buffer using HUF_writeCTable()
-5. encode the data stream using HUF_compress4X_usingCTable()
-
-The following API allows targeting specific sub-functions for advanced tasks.
-For example, it's possible to compress several blocks using the same 'CTable',
-or to save and regenerate 'CTable' using external methods.
-*/
-/* FSE_count() : find it within "fse.h" */
+ * HUF detailed API
+ * ****************************************/
+
+/*! HUF_compress() does the following:
+ * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
+ * 2. (optional) refine tableLog using HUF_optimalTableLog()
+ * 3. build Huffman table from count using HUF_buildCTable()
+ * 4. save Huffman table to memory buffer using HUF_writeCTable()
+ * 5. encode the data stream using HUF_compress4X_usingCTable()
+ *
+ * The following API allows targeting specific sub-functions for advanced tasks.
+ * For example, it's possible to compress several blocks using the same 'CTable',
+ * or to save and regenerate 'CTable' using external methods.
+ */
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
-size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);
+size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
@@ -219,46 +211,65 @@ typedef enum {
HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */
} HUF_repeat;
/** HUF_compress4X_repeat() :
-* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-* If it uses hufTable it does not modify hufTable or repeat.
-* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
-* If preferRepeat then the old table will always be used if valid. */
-size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+ * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ * If it uses hufTable it does not modify hufTable or repeat.
+ * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ * If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog,
+ void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
*/
+#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
+#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
/*! HUF_readStats() :
- Read compact Huffman tree, saved by HUF_writeCTable().
- `huffWeight` is destination buffer.
- @return : size read from `src` , or an error Code .
- Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
-size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
- U32* nbSymbolsPtr, U32* tableLogPtr,
+ * Read compact Huffman tree, saved by HUF_writeCTable().
+ * `huffWeight` is destination buffer.
+ * @return : size read from `src` , or an error Code .
+ * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
+ U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize);
/** HUF_readCTable() :
-* Loading a CTable saved with HUF_writeCTable() */
+ * Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
/*
-HUF_decompress() does the following:
-1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
-2. build Huffman table from save, using HUF_readDTableXn()
-3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
-*/
+ * HUF_decompress() does the following:
+ * 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
+ * 2. build Huffman table from save, using HUF_readDTableX?()
+ * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
+ */
/** HUF_selectDecoder() :
-* Tells which decoder is likely to decode faster,
-* based on a set of pre-determined metrics.
-* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-* Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+ * Tells which decoder is likely to decode faster,
+ * based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
+ * Assumption : 0 < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
+/**
+ * The minimum workspace size for the `workSpace` used in
+ * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
+ *
+ * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ * Buffer overflow errors may potentially occur if code modifications result in
+ * a required workspace size greater than that specified in the following
+ * macro.
+ */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
@@ -269,17 +280,23 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+/* ====================== */
/* single stream variants */
+/* ====================== */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
/** HUF_compress1X_repeat() :
-* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-* If it uses hufTable it does not modify hufTable or repeat.
-* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
-* If preferRepeat then the old table will always be used if valid. */
-size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+ * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ * If it uses hufTable it does not modify hufTable or repeat.
+ * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ * If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog,
+ void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
@@ -295,6 +312,14 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cS
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+/* BMI2 variants.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+
#endif /* HUF_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
diff --git a/thirdparty/zstd/common/pool.c b/thirdparty/zstd/common/pool.c
index 98b109e72a..773488b072 100644
--- a/thirdparty/zstd/common/pool.c
+++ b/thirdparty/zstd/common/pool.c
@@ -12,6 +12,7 @@
/* ====== Dependencies ======= */
#include <stddef.h> /* size_t */
#include "pool.h"
+#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
/* ====== Compiler specifics ====== */
#if defined(_MSC_VER)
@@ -193,32 +194,54 @@ static int isQueueFull(POOL_ctx const* ctx) {
}
}
-void POOL_add(void* ctxVoid, POOL_function function, void *opaque) {
- POOL_ctx* const ctx = (POOL_ctx*)ctxVoid;
- if (!ctx) { return; }
+static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
+{
+ POOL_job const job = {function, opaque};
+ assert(ctx != NULL);
+ if (ctx->shutdown) return;
+
+ ctx->queueEmpty = 0;
+ ctx->queue[ctx->queueTail] = job;
+ ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
+ ZSTD_pthread_cond_signal(&ctx->queuePopCond);
+}
+
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque)
+{
+ assert(ctx != NULL);
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
- { POOL_job const job = {function, opaque};
+ /* Wait until there is space in the queue for the new job */
+ while (isQueueFull(ctx) && (!ctx->shutdown)) {
+ ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
+ }
+ POOL_add_internal(ctx, function, opaque);
+ ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+}
- /* Wait until there is space in the queue for the new job */
- while (isQueueFull(ctx) && !ctx->shutdown) {
- ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
- }
- /* The queue is still going => there is space */
- if (!ctx->shutdown) {
- ctx->queueEmpty = 0;
- ctx->queue[ctx->queueTail] = job;
- ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
- }
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
+{
+ assert(ctx != NULL);
+ ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+ if (isQueueFull(ctx)) {
+ ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+ return 0;
}
+ POOL_add_internal(ctx, function, opaque);
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
- ZSTD_pthread_cond_signal(&ctx->queuePopCond);
+ return 1;
}
+
#else /* ZSTD_MULTITHREAD not defined */
+
+/* ========================== */
/* No multi-threading support */
+/* ========================== */
-/* We don't need any data, but if it is empty malloc() might return NULL. */
+
+/* We don't need any data, but if it is empty, malloc() might return NULL. */
struct POOL_ctx_s {
int dummy;
};
@@ -240,9 +263,15 @@ void POOL_free(POOL_ctx* ctx) {
(void)ctx;
}
-void POOL_add(void* ctx, POOL_function function, void* opaque) {
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
+ (void)ctx;
+ function(opaque);
+}
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
(void)ctx;
function(opaque);
+ return 1;
}
size_t POOL_sizeof(POOL_ctx* ctx) {
diff --git a/thirdparty/zstd/common/pool.h b/thirdparty/zstd/common/pool.h
index 08c63715aa..a57e9b4fab 100644
--- a/thirdparty/zstd/common/pool.h
+++ b/thirdparty/zstd/common/pool.h
@@ -17,7 +17,8 @@ extern "C" {
#include <stddef.h> /* size_t */
-#include "zstd_internal.h" /* ZSTD_customMem */
+#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
+#include "zstd.h"
typedef struct POOL_ctx_s POOL_ctx;
@@ -27,35 +28,43 @@ typedef struct POOL_ctx_s POOL_ctx;
* The maximum number of queued jobs before blocking is `queueSize`.
* @return : POOL_ctx pointer on success, else NULL.
*/
-POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
-POOL_ctx *POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
/*! POOL_free() :
Free a thread pool returned by POOL_create().
*/
-void POOL_free(POOL_ctx *ctx);
+void POOL_free(POOL_ctx* ctx);
/*! POOL_sizeof() :
return memory usage of pool returned by POOL_create().
*/
-size_t POOL_sizeof(POOL_ctx *ctx);
+size_t POOL_sizeof(POOL_ctx* ctx);
/*! POOL_function :
The function type that can be added to a thread pool.
*/
-typedef void (*POOL_function)(void *);
+typedef void (*POOL_function)(void*);
/*! POOL_add_function :
The function type for a generic thread pool add function.
*/
-typedef void (*POOL_add_function)(void *, POOL_function, void *);
+typedef void (*POOL_add_function)(void*, POOL_function, void*);
/*! POOL_add() :
- Add the job `function(opaque)` to the thread pool.
+ Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
Possibly blocks until there is room in the queue.
Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
*/
-void POOL_add(void *ctx, POOL_function function, void *opaque);
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
+
+
+/*! POOL_tryAdd() :
+ Add the job `function(opaque)` to the thread pool if a worker is available.
+ return immediately otherwise.
+ @return : 1 if successful, 0 if not.
+*/
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
#if defined (__cplusplus)
diff --git a/thirdparty/zstd/common/threading.h b/thirdparty/zstd/common/threading.h
index 197770db27..d806c89d01 100644
--- a/thirdparty/zstd/common/threading.h
+++ b/thirdparty/zstd/common/threading.h
@@ -45,15 +45,15 @@ extern "C" {
/* mutex */
#define ZSTD_pthread_mutex_t CRITICAL_SECTION
-#define ZSTD_pthread_mutex_init(a, b) (InitializeCriticalSection((a)), 0)
+#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0)
#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a))
#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a))
#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a))
/* condition variable */
#define ZSTD_pthread_cond_t CONDITION_VARIABLE
-#define ZSTD_pthread_cond_init(a, b) (InitializeConditionVariable((a)), 0)
-#define ZSTD_pthread_cond_destroy(a) /* No delete */
+#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0)
+#define ZSTD_pthread_cond_destroy(a) ((void)(a))
#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a))
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
@@ -100,17 +100,17 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
/* No multithreading support */
typedef int ZSTD_pthread_mutex_t;
-#define ZSTD_pthread_mutex_init(a, b) ((void)a, 0)
-#define ZSTD_pthread_mutex_destroy(a)
-#define ZSTD_pthread_mutex_lock(a)
-#define ZSTD_pthread_mutex_unlock(a)
+#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0)
+#define ZSTD_pthread_mutex_destroy(a) ((void)(a))
+#define ZSTD_pthread_mutex_lock(a) ((void)(a))
+#define ZSTD_pthread_mutex_unlock(a) ((void)(a))
typedef int ZSTD_pthread_cond_t;
-#define ZSTD_pthread_cond_init(a, b) ((void)a, 0)
-#define ZSTD_pthread_cond_destroy(a)
-#define ZSTD_pthread_cond_wait(a, b)
-#define ZSTD_pthread_cond_signal(a)
-#define ZSTD_pthread_cond_broadcast(a)
+#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0)
+#define ZSTD_pthread_cond_destroy(a) ((void)(a))
+#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b))
+#define ZSTD_pthread_cond_signal(a) ((void)(a))
+#define ZSTD_pthread_cond_broadcast(a) ((void)(a))
/* do not use ZSTD_pthread_t */
diff --git a/thirdparty/zstd/common/zstd_errors.h b/thirdparty/zstd/common/zstd_errors.h
index 4bcb7769fe..57533f2869 100644
--- a/thirdparty/zstd/common/zstd_errors.h
+++ b/thirdparty/zstd/common/zstd_errors.h
@@ -35,12 +35,20 @@ extern "C" {
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
#endif
-/*-****************************************
- * error codes list
- * note : this API is still considered unstable
- * and shall not be used with a dynamic library.
- * only static linking is allowed
- ******************************************/
+/*-*********************************************
+ * Error codes list
+ *-*********************************************
+ * Error codes _values_ are pinned down since v1.3.1 only.
+ * Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ * Only values < 100 are considered stable.
+ *
+ * note 1 : this API shall be used with static linking only.
+ * dynamic linking is not yet officially supported.
+ * note 2 : Prefer relying on the enum than on its value whenever possible
+ * This is the only supported way to use the error list < v1.3.1
+ * note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
typedef enum {
ZSTD_error_no_error = 0,
ZSTD_error_GENERIC = 1,
@@ -61,9 +69,10 @@ typedef enum {
ZSTD_error_stage_wrong = 60,
ZSTD_error_init_missing = 62,
ZSTD_error_memory_allocation = 64,
+ ZSTD_error_workSpace_tooSmall= 66,
ZSTD_error_dstSize_tooSmall = 70,
ZSTD_error_srcSize_wrong = 72,
- /* following error codes are not stable and may be removed or changed in a future version */
+ /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
diff --git a/thirdparty/zstd/common/zstd_internal.h b/thirdparty/zstd/common/zstd_internal.h
index 5d2900eb76..65c08a8257 100644
--- a/thirdparty/zstd/common/zstd_internal.h
+++ b/thirdparty/zstd/common/zstd_internal.h
@@ -132,14 +132,15 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define Litbits 8
#define MaxLit ((1<<Litbits) - 1)
-#define MaxML 52
-#define MaxLL 35
+#define MaxML 52
+#define MaxLL 35
#define DefaultMaxOff 28
-#define MaxOff 31
+#define MaxOff 31
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
#define MLFSELog 9
#define LLFSELog 9
#define OffFSELog 8
+#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -228,8 +229,6 @@ typedef struct {
BYTE* ofCode;
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
U32 longLengthPos;
- U32 rep[ZSTD_REP_NUM];
- U32 repToConfirm[ZSTD_REP_NUM];
} seqStore_t;
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
diff --git a/thirdparty/zstd/compress/fse_compress.c b/thirdparty/zstd/compress/fse_compress.c
index 549c115d42..cb8f1fa323 100644
--- a/thirdparty/zstd/compress/fse_compress.c
+++ b/thirdparty/zstd/compress/fse_compress.c
@@ -248,7 +248,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
bitCount -= (count<max);
previous0 = (count==1);
if (remaining<1) return ERROR(GENERIC);
- while (remaining<threshold) nbBits--, threshold>>=1;
+ while (remaining<threshold) { nbBits--; threshold>>=1; }
}
if (bitCount>16) {
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
@@ -292,7 +292,7 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
It doesn't use any additional memory.
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
For this reason, prefer using a table `count` with 256 elements.
- @return : count of most numerous element
+ @return : count of most numerous element.
*/
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
@@ -305,7 +305,10 @@ size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
- while (ip<end) count[*ip++]++;
+ while (ip<end) {
+ assert(*ip <= maxSymbolValue);
+ count[*ip++]++;
+ }
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
@@ -318,7 +321,8 @@ size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
/* FSE_count_parallel_wksp() :
* Same as FSE_count_parallel(), but using an externally provided scratch buffer.
- * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
+ * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
+ * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
static size_t FSE_count_parallel_wksp(
unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
@@ -333,7 +337,7 @@ static size_t FSE_count_parallel_wksp(
U32* const Counting3 = Counting2 + 256;
U32* const Counting4 = Counting3 + 256;
- memset(Counting1, 0, 4*256*sizeof(unsigned));
+ memset(workSpace, 0, 4*256*sizeof(unsigned));
/* safety checks */
if (!sourceSize) {
@@ -379,7 +383,9 @@ static size_t FSE_count_parallel_wksp(
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
} }
- { U32 s; for (s=0; s<=maxSymbolValue; s++) {
+ { U32 s;
+ if (maxSymbolValue > 255) maxSymbolValue = 255;
+ for (s=0; s<=maxSymbolValue; s++) {
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
if (count[s] > max) max = count[s];
} }
@@ -393,9 +399,11 @@ static size_t FSE_count_parallel_wksp(
* Same as FSE_countFast(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned */
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
- const void* source, size_t sourceSize, unsigned* workSpace)
+ const void* source, size_t sourceSize,
+ unsigned* workSpace)
{
- if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+ if (sourceSize < 1500) /* heuristic threshold */
+ return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
}
@@ -540,7 +548,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
find max, then give all remaining points to max */
U32 maxV = 0, maxC = 0;
for (s=0; s<=maxSymbolValue; s++)
- if (count[s] > maxC) maxV=s, maxC=count[s];
+ if (count[s] > maxC) { maxV=s; maxC=count[s]; }
norm[maxV] += (short)ToDistribute;
return 0;
}
@@ -548,7 +556,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
if (total == 0) {
/* all of the symbols were low enough for the lowOne or lowThreshold */
for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
- if (norm[s] > 0) ToDistribute--, norm[s]++;
+ if (norm[s] > 0) { ToDistribute--; norm[s]++; }
return 0;
}
@@ -604,7 +612,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
U64 restToBeat = vStep * rtbTable[proba];
proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
}
- if (proba > largestP) largestP=proba, largest=s;
+ if (proba > largestP) { largestP=proba; largest=s; }
normalizedCounter[s] = proba;
stillToDistribute -= proba;
} }
diff --git a/thirdparty/zstd/compress/huf_compress.c b/thirdparty/zstd/compress/huf_compress.c
index 5692d56e00..83230b415f 100644
--- a/thirdparty/zstd/compress/huf_compress.c
+++ b/thirdparty/zstd/compress/huf_compress.c
@@ -46,6 +46,7 @@
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
#include "bitstream.h"
+#include "compiler.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
#include "fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
@@ -322,7 +323,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
U32 const c = count[n];
U32 const r = BIT_highbit32(c+1) + 1;
U32 pos = rank[r].current++;
- while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
+ while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
+ huffNode[pos] = huffNode[pos-1];
+ pos--;
+ }
huffNode[pos].count = c;
huffNode[pos].byte = (BYTE)n;
}
@@ -331,10 +335,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
*/
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
-typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
+typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
{
nodeElt* const huffNode0 = (nodeElt*)workSpace;
@@ -345,9 +349,10 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu
U32 nodeRoot;
/* safety checks */
- if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */
+ if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
+ if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */
@@ -405,6 +410,7 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu
}
/** HUF_buildCTable() :
+ * @return : maxNbBits
* Note : count is used before tree is written, so they can safely overlap
*/
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
@@ -432,13 +438,14 @@ static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, uns
return !bad;
}
-static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+FORCE_INLINE_TEMPLATE void
+HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
{
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
}
-size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
-
#define HUF_FLUSHBITS(s) BIT_flushBits(s)
#define HUF_FLUSHBITS_1(stream) \
@@ -447,7 +454,10 @@ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
#define HUF_FLUSHBITS_2(stream) \
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
-size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+FORCE_INLINE_TEMPLATE size_t
+HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable)
{
const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst;
@@ -491,8 +501,58 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
return BIT_closeCStream(&bitC);
}
+#if DYNAMIC_BMI2
-size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+static TARGET_ATTRIBUTE("bmi2") size_t
+HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable)
+{
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable)
+{
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable, const int bmi2)
+{
+ if (bmi2) {
+ return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
+ }
+ return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
+}
+
+#else
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable, const int bmi2)
+{
+ (void)bmi2;
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+#endif
+
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+
+static size_t
+HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable, int bmi2)
{
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
const BYTE* ip = (const BYTE*) src;
@@ -505,28 +565,31 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
if (srcSize < 12) return 0; /* no saving possible : too small input */
op += 6; /* jumpTable */
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
+ assert(cSize <= 65535);
MEM_writeLE16(ostart, (U16)cSize);
op += cSize;
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
+ assert(cSize <= 65535);
MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize;
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
+ assert(cSize <= 65535);
MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize;
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
if (cSize==0) return 0;
op += cSize;
}
@@ -534,15 +597,20 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
return op-ostart;
}
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
static size_t HUF_compressCTable_internal(
BYTE* const ostart, BYTE* op, BYTE* const oend,
const void* src, size_t srcSize,
- unsigned singleStream, const HUF_CElt* CTable)
+ unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
{
size_t const cSize = singleStream ?
- HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
- HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
+ HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
+ HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
if (HUF_isError(cSize)) { return cSize; }
if (cSize==0) { return 0; } /* uncompressible */
op += cSize;
@@ -551,86 +619,98 @@ static size_t HUF_compressCTable_internal(
return op-ostart;
}
+typedef struct {
+ U32 count[HUF_SYMBOLVALUE_MAX + 1];
+ HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
+ huffNodeTable nodeTable;
+} HUF_compress_tables_t;
-/* `workSpace` must a table of at least 1024 unsigned */
+/* HUF_compress_internal() :
+ * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
static size_t HUF_compress_internal (
void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
unsigned singleStream,
void* workSpace, size_t wkspSize,
- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
+ const int bmi2)
{
+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
- U32* count;
- size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
- HUF_CElt* CTable;
- size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
-
/* checks & inits */
- if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
- if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
- if (!dstSize) return 0; /* cannot fit within dst budget */
+ if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
+ if (!srcSize) return 0; /* Uncompressed */
+ if (!dstSize) return 0; /* cannot fit anything within dst budget */
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
- count = (U32*)workSpace;
- workSpace = (BYTE*)workSpace + countSize;
- wkspSize -= countSize;
- CTable = (HUF_CElt*)workSpace;
- workSpace = (BYTE*)workSpace + CTableSize;
- wkspSize -= CTableSize;
-
- /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
+ /* Heuristic : If old table is valid, use it for small inputs */
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+ return HUF_compressCTable_internal(ostart, op, oend,
+ src, srcSize,
+ singleStream, oldHufTable, bmi2);
}
/* Scan input and build symbol stats */
- { CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
+ { CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
- if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
+ if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */
}
/* Check validity of previous table */
- if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
+ if ( repeat
+ && *repeat == HUF_repeat_check
+ && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
*repeat = HUF_repeat_none;
}
/* Heuristic : use existing table for small inputs */
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+ return HUF_compressCTable_internal(ostart, op, oend,
+ src, srcSize,
+ singleStream, oldHufTable, bmi2);
}
/* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
- { CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
+ { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
+ maxSymbolValue, huffLog,
+ table->nodeTable, sizeof(table->nodeTable)) );
huffLog = (U32)maxBits;
- /* Zero the unused symbols so we can check it for validity */
- memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
+ /* Zero unused symbols in CTable, so we can check it for validity */
+ memset(table->CTable + (maxSymbolValue + 1), 0,
+ sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
}
/* Write table description header */
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
- /* Check if using the previous table will be beneficial */
+ { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
+ /* Check if using previous huffman table is beneficial */
if (repeat && *repeat != HUF_repeat_none) {
- size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
- size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
+ size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
+ size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
- }
- }
- /* Use the new table */
+ return HUF_compressCTable_internal(ostart, op, oend,
+ src, srcSize,
+ singleStream, oldHufTable, bmi2);
+ } }
+
+ /* Use the new huffman table */
if (hSize + 12ul >= srcSize) { return 0; }
op += hSize;
if (repeat) { *repeat = HUF_repeat_none; }
- if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
+ if (oldHufTable)
+ memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
}
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
+ return HUF_compressCTable_internal(ostart, op, oend,
+ src, srcSize,
+ singleStream, table->CTable, bmi2);
}
@@ -639,52 +719,70 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
+ maxSymbolValue, huffLog, 1 /*single stream*/,
+ workSpace, wkspSize,
+ NULL, NULL, 0, 0 /*bmi2*/);
}
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
{
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
+ maxSymbolValue, huffLog, 1 /*single stream*/,
+ workSpace, wkspSize, hufTable,
+ repeat, preferRepeat, bmi2);
}
size_t HUF_compress1X (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
- unsigned workSpace[1024];
+ unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * provide workspace to generate compression tables */
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
+ maxSymbolValue, huffLog, 0 /*4 streams*/,
+ workSpace, wkspSize,
+ NULL, NULL, 0, 0 /*bmi2*/);
}
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * re-use an existing huffman compression table */
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
{
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
+ maxSymbolValue, huffLog, 0 /* 4 streams */,
+ workSpace, wkspSize,
+ hufTable, repeat, preferRepeat, bmi2);
}
size_t HUF_compress2 (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
- unsigned workSpace[1024];
+ unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
- return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT);
+ return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
}
diff --git a/thirdparty/zstd/compress/zstd_compress.c b/thirdparty/zstd/compress/zstd_compress.c
index 8d1629246d..2aa26da4cd 100644
--- a/thirdparty/zstd/compress/zstd_compress.c
+++ b/thirdparty/zstd/compress/zstd_compress.c
@@ -21,6 +21,7 @@
* Dependencies
***************************************/
#include <string.h> /* memset */
+#include "cpu.h"
#include "mem.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
#include "fse.h"
@@ -49,7 +50,13 @@ struct ZSTD_CDict_s {
void* dictBuffer;
const void* dictContent;
size_t dictContentSize;
- ZSTD_CCtx* refContext;
+ void* workspace;
+ size_t workspaceSize;
+ ZSTD_matchState_t matchState;
+ ZSTD_compressedBlockState_t cBlockState;
+ ZSTD_compressionParameters cParams;
+ ZSTD_customMem customMem;
+ U32 dictID;
}; /* typedef'd to ZSTD_CDict within "zstd.h" */
ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -59,18 +66,17 @@ ZSTD_CCtx* ZSTD_createCCtx(void)
ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
{
- ZSTD_CCtx* cctx;
-
- if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
-
- cctx = (ZSTD_CCtx*) ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
- if (!cctx) return NULL;
- cctx->customMem = customMem;
- cctx->requestedParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
- cctx->requestedParams.fParams.contentSizeFlag = 1;
ZSTD_STATIC_ASSERT(zcss_init==0);
ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
- return cctx;
+ if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+ { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
+ if (!cctx) return NULL;
+ cctx->customMem = customMem;
+ cctx->requestedParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
+ cctx->requestedParams.fParams.contentSizeFlag = 1;
+ cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+ return cctx;
+ }
}
ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
@@ -83,11 +89,16 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
cctx->workSpace = (void*)(cctx+1);
cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
- /* entropy space (never moves) */
- if (cctx->workSpaceSize < sizeof(ZSTD_entropyCTables_t)) return NULL;
+ /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
+ if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL;
assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
- cctx->entropy = (ZSTD_entropyCTables_t*)cctx->workSpace;
-
+ cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace;
+ cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1;
+ {
+ void* const ptr = cctx->blockState.nextCBlock + 1;
+ cctx->entropyWorkspace = (U32*)ptr;
+ }
+ cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
return cctx;
}
@@ -95,13 +106,10 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
{
if (cctx==NULL) return 0; /* support free on NULL */
if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */
- ZSTD_free(cctx->workSpace, cctx->customMem);
- cctx->workSpace = NULL;
- ZSTD_freeCDict(cctx->cdictLocal);
- cctx->cdictLocal = NULL;
+ ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL;
+ ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL;
#ifdef ZSTD_MULTITHREAD
- ZSTDMT_freeCCtx(cctx->mtctx);
- cctx->mtctx = NULL;
+ ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
#endif
ZSTD_free(cctx, cctx->customMem);
return 0; /* reserved as a potential error code in the future */
@@ -122,10 +130,6 @@ static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
{
if (cctx==NULL) return 0; /* support sizeof on NULL */
- DEBUGLOG(3, "sizeof(*cctx) : %u", (U32)sizeof(*cctx));
- DEBUGLOG(3, "workSpaceSize (including streaming buffers): %u", (U32)cctx->workSpaceSize);
- DEBUGLOG(3, "inner cdict : %u", (U32)ZSTD_sizeof_CDict(cctx->cdictLocal));
- DEBUGLOG(3, "inner MTCTX : %u", (U32)ZSTD_sizeof_mtctx(cctx));
return sizeof(*cctx) + cctx->workSpaceSize
+ ZSTD_sizeof_CDict(cctx->cdictLocal)
+ ZSTD_sizeof_mtctx(cctx);
@@ -139,37 +143,19 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
/* private API call, for dictBuilder only */
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
-#define ZSTD_CLEVEL_CUSTOM 999
-
-static ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
- ZSTD_CCtx_params CCtxParams, U64 srcSizeHint, size_t dictSize)
-{
- DEBUGLOG(4, "ZSTD_getCParamsFromCCtxParams: srcSize = %u, dictSize = %u",
- (U32)srcSizeHint, (U32)dictSize);
- return (CCtxParams.compressionLevel == ZSTD_CLEVEL_CUSTOM) ?
- CCtxParams.cParams :
- ZSTD_getCParams(CCtxParams.compressionLevel, srcSizeHint, dictSize);
-}
-
-static void ZSTD_cLevelToCCtxParams_srcSize(ZSTD_CCtx_params* CCtxParams, U64 srcSize)
-{
- DEBUGLOG(4, "ZSTD_cLevelToCCtxParams_srcSize: srcSize = %u",
- (U32)srcSize);
- CCtxParams->cParams = ZSTD_getCParamsFromCCtxParams(*CCtxParams, srcSize, 0);
- CCtxParams->compressionLevel = ZSTD_CLEVEL_CUSTOM;
-}
-
-static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx)
-{
- DEBUGLOG(4, "ZSTD_cLevelToCParams: level=%i", cctx->requestedParams.compressionLevel);
- ZSTD_cLevelToCCtxParams_srcSize(
- &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1);
-}
-
-static void ZSTD_cLevelToCCtxParams(ZSTD_CCtx_params* CCtxParams)
-{
- DEBUGLOG(4, "ZSTD_cLevelToCCtxParams");
- ZSTD_cLevelToCCtxParams_srcSize(CCtxParams, ZSTD_CONTENTSIZE_UNKNOWN);
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+ const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
+{
+ ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
+ if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
+ if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
+ if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
+ if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
+ if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
+ if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength;
+ if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
+ if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
+ return cParams;
}
static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
@@ -178,7 +164,9 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
ZSTD_CCtx_params cctxParams;
memset(&cctxParams, 0, sizeof(cctxParams));
cctxParams.cParams = cParams;
- cctxParams.compressionLevel = ZSTD_CLEVEL_CUSTOM;
+ cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
+ assert(!ZSTD_checkCParams(cParams));
+ cctxParams.fParams.contentSizeFlag = 1;
return cctxParams;
}
@@ -192,6 +180,7 @@ static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
if (!params) { return NULL; }
params->customMem = customMem;
params->compressionLevel = ZSTD_CLEVEL_DEFAULT;
+ params->fParams.contentSizeFlag = 1;
return params;
}
@@ -207,36 +196,41 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
return 0;
}
-size_t ZSTD_resetCCtxParams(ZSTD_CCtx_params* params)
+size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
{
- return ZSTD_initCCtxParams(params, ZSTD_CLEVEL_DEFAULT);
+ return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
}
-size_t ZSTD_initCCtxParams(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
+size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
if (!cctxParams) { return ERROR(GENERIC); }
memset(cctxParams, 0, sizeof(*cctxParams));
cctxParams->compressionLevel = compressionLevel;
+ cctxParams->fParams.contentSizeFlag = 1;
return 0;
}
-size_t ZSTD_initCCtxParams_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
+size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
{
if (!cctxParams) { return ERROR(GENERIC); }
CHECK_F( ZSTD_checkCParams(params.cParams) );
memset(cctxParams, 0, sizeof(*cctxParams));
cctxParams->cParams = params.cParams;
cctxParams->fParams = params.fParams;
- cctxParams->compressionLevel = ZSTD_CLEVEL_CUSTOM;
+ cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
+ assert(!ZSTD_checkCParams(params.cParams));
return 0;
}
+/* ZSTD_assignParamsToCCtxParams() :
+ * params is presumed valid at this stage */
static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
ZSTD_CCtx_params cctxParams, ZSTD_parameters params)
{
ZSTD_CCtx_params ret = cctxParams;
ret.cParams = params.cParams;
ret.fParams = params.fParams;
- ret.compressionLevel = ZSTD_CLEVEL_CUSTOM;
+ ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
+ assert(!ZSTD_checkCParams(params.cParams));
return ret;
}
@@ -245,10 +239,49 @@ static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
return ERROR(parameter_outOfBound); \
} }
+
+static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
+{
+ switch(param)
+ {
+ case ZSTD_p_compressionLevel:
+ case ZSTD_p_hashLog:
+ case ZSTD_p_chainLog:
+ case ZSTD_p_searchLog:
+ case ZSTD_p_minMatch:
+ case ZSTD_p_targetLength:
+ case ZSTD_p_compressionStrategy:
+ case ZSTD_p_compressLiterals:
+ return 1;
+
+ case ZSTD_p_format:
+ case ZSTD_p_windowLog:
+ case ZSTD_p_contentSizeFlag:
+ case ZSTD_p_checksumFlag:
+ case ZSTD_p_dictIDFlag:
+ case ZSTD_p_forceMaxWindow :
+ case ZSTD_p_nbWorkers:
+ case ZSTD_p_jobSize:
+ case ZSTD_p_overlapSizeLog:
+ case ZSTD_p_enableLongDistanceMatching:
+ case ZSTD_p_ldmHashLog:
+ case ZSTD_p_ldmMinMatch:
+ case ZSTD_p_ldmBucketSizeLog:
+ case ZSTD_p_ldmHashEveryLog:
+ default:
+ return 0;
+ }
+}
+
size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
{
DEBUGLOG(4, "ZSTD_CCtx_setParameter (%u, %u)", (U32)param, value);
- if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+ if (cctx->streamStage != zcss_init) {
+ if (ZSTD_isUpdateAuthorized(param)) {
+ cctx->cParamsChanged = 1;
+ } else {
+ return ERROR(stage_wrong);
+ } }
switch(param)
{
@@ -267,9 +300,9 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
case ZSTD_p_targetLength:
case ZSTD_p_compressionStrategy:
if (cctx->cdict) return ERROR(stage_wrong);
- if (value>0) ZSTD_cLevelToCParams(cctx); /* Can optimize if srcSize is known */
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
+ case ZSTD_p_compressLiterals:
case ZSTD_p_contentSizeFlag:
case ZSTD_p_checksumFlag:
case ZSTD_p_dictIDFlag:
@@ -280,23 +313,17 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
* default : 0 when using a CDict, 1 when using a Prefix */
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
- case ZSTD_p_nbThreads:
- if ((value > 1) && cctx->staticSize) {
+ case ZSTD_p_nbWorkers:
+ if ((value>0) && cctx->staticSize) {
return ERROR(parameter_unsupported); /* MT not compatible with static alloc */
}
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_p_jobSize:
- return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
-
case ZSTD_p_overlapSizeLog:
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_p_enableLongDistanceMatching:
- if (cctx->cdict) return ERROR(stage_wrong);
- if (value>0) ZSTD_cLevelToCParams(cctx);
- return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
-
case ZSTD_p_ldmHashLog:
case ZSTD_p_ldmMinMatch:
case ZSTD_p_ldmBucketSizeLog:
@@ -320,69 +347,62 @@ size_t ZSTD_CCtxParam_setParameter(
CCtxParams->format = (ZSTD_format_e)value;
return (size_t)CCtxParams->format;
- case ZSTD_p_compressionLevel :
- if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel();
- if (value) /* 0 : does not change current level */
- CCtxParams->compressionLevel = value;
- return CCtxParams->compressionLevel;
+ case ZSTD_p_compressionLevel : {
+ int cLevel = (int)value; /* cast expected to restore negative sign */
+ if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
+ if (cLevel) { /* 0 : does not change current level */
+ CCtxParams->disableLiteralCompression = (cLevel<0); /* negative levels disable huffman */
+ CCtxParams->compressionLevel = cLevel;
+ }
+ if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel;
+ return 0; /* return type (size_t) cannot represent negative values */
+ }
case ZSTD_p_windowLog :
- DEBUGLOG(4, "ZSTD_CCtxParam_setParameter: set windowLog=%u", value);
- if (value) { /* 0 : does not change current windowLog */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.windowLog = value;
- }
+ CCtxParams->cParams.windowLog = value;
return CCtxParams->cParams.windowLog;
case ZSTD_p_hashLog :
- if (value) { /* 0 : does not change current hashLog */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.hashLog = value;
- }
+ CCtxParams->cParams.hashLog = value;
return CCtxParams->cParams.hashLog;
case ZSTD_p_chainLog :
- if (value) { /* 0 : does not change current chainLog */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.chainLog = value;
- }
+ CCtxParams->cParams.chainLog = value;
return CCtxParams->cParams.chainLog;
case ZSTD_p_searchLog :
- if (value) { /* 0 : does not change current searchLog */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.searchLog = value;
- }
+ CCtxParams->cParams.searchLog = value;
return value;
case ZSTD_p_minMatch :
- if (value) { /* 0 : does not change current minMatch length */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.searchLength = value;
- }
+ CCtxParams->cParams.searchLength = value;
return CCtxParams->cParams.searchLength;
case ZSTD_p_targetLength :
- if (value) { /* 0 : does not change current sufficient_len */
- CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.targetLength = value;
- }
+ /* all values are valid. 0 => use default */
+ CCtxParams->cParams.targetLength = value;
return CCtxParams->cParams.targetLength;
case ZSTD_p_compressionStrategy :
- if (value) { /* 0 : does not change currentstrategy */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.strategy = (ZSTD_strategy)value;
- }
+ CCtxParams->cParams.strategy = (ZSTD_strategy)value;
return (size_t)CCtxParams->cParams.strategy;
+ case ZSTD_p_compressLiterals:
+ CCtxParams->disableLiteralCompression = !value;
+ return !CCtxParams->disableLiteralCompression;
+
case ZSTD_p_contentSizeFlag :
/* Content size written in frame header _when known_ (default:1) */
DEBUGLOG(4, "set content size flag = %u", (value>0));
@@ -396,27 +416,25 @@ size_t ZSTD_CCtxParam_setParameter(
case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
DEBUGLOG(4, "set dictIDFlag = %u", (value>0));
- CCtxParams->fParams.noDictIDFlag = (value == 0);
+ CCtxParams->fParams.noDictIDFlag = !value;
return !CCtxParams->fParams.noDictIDFlag;
case ZSTD_p_forceMaxWindow :
CCtxParams->forceWindow = (value > 0);
return CCtxParams->forceWindow;
- case ZSTD_p_nbThreads :
- if (value == 0) return CCtxParams->nbThreads;
+ case ZSTD_p_nbWorkers :
#ifndef ZSTD_MULTITHREAD
- if (value > 1) return ERROR(parameter_unsupported);
- return 1;
+ if (value>0) return ERROR(parameter_unsupported);
+ return 0;
#else
- return ZSTDMT_CCtxParam_setNbThreads(CCtxParams, value);
+ return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value);
#endif
case ZSTD_p_jobSize :
#ifndef ZSTD_MULTITHREAD
return ERROR(parameter_unsupported);
#else
- if (CCtxParams->nbThreads <= 1) return ERROR(parameter_unsupported);
return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value);
#endif
@@ -424,44 +442,36 @@ size_t ZSTD_CCtxParam_setParameter(
#ifndef ZSTD_MULTITHREAD
return ERROR(parameter_unsupported);
#else
- if (CCtxParams->nbThreads <= 1) return ERROR(parameter_unsupported);
return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapSectionLog, value);
#endif
case ZSTD_p_enableLongDistanceMatching :
- if (value) {
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
- }
- return ZSTD_ldm_initializeParameters(&CCtxParams->ldmParams, value);
+ CCtxParams->ldmParams.enableLdm = (value>0);
+ return CCtxParams->ldmParams.enableLdm;
case ZSTD_p_ldmHashLog :
- if (value) { /* 0 : does not change current ldmHashLog */
+ if (value>0) /* 0 ==> auto */
CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
- CCtxParams->ldmParams.hashLog = value;
- }
+ CCtxParams->ldmParams.hashLog = value;
return CCtxParams->ldmParams.hashLog;
case ZSTD_p_ldmMinMatch :
- if (value) { /* 0 : does not change current ldmMinMatch */
+ if (value>0) /* 0 ==> default */
CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX);
- CCtxParams->ldmParams.minMatchLength = value;
- }
+ CCtxParams->ldmParams.minMatchLength = value;
return CCtxParams->ldmParams.minMatchLength;
case ZSTD_p_ldmBucketSizeLog :
- if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) {
+ if (value > ZSTD_LDM_BUCKETSIZELOG_MAX)
return ERROR(parameter_outOfBound);
- }
CCtxParams->ldmParams.bucketSizeLog = value;
- return value;
+ return CCtxParams->ldmParams.bucketSizeLog;
case ZSTD_p_ldmHashEveryLog :
- if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) {
+ if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
return ERROR(parameter_outOfBound);
- }
CCtxParams->ldmParams.hashEveryLog = value;
- return value;
+ return CCtxParams->ldmParams.hashEveryLog;
default: return ERROR(parameter_unsupported);
}
@@ -470,6 +480,9 @@ size_t ZSTD_CCtxParam_setParameter(
/** ZSTD_CCtx_setParametersUsingCCtxParams() :
* just applies `params` into `cctx`
* no action is performed, parameters are merely stored.
+ * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
+ * This is possible even if a compression is ongoing.
+ * In which case, new parameters will be applied on the fly, starting with next compression job.
*/
size_t ZSTD_CCtx_setParametersUsingCCtxParams(
ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
@@ -478,7 +491,6 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
if (cctx->cdict) return ERROR(stage_wrong);
cctx->requestedParams = *params;
-
return 0;
}
@@ -492,7 +504,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo
size_t ZSTD_CCtx_loadDictionary_advanced(
ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode)
+ ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
{
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */
@@ -503,10 +515,10 @@ size_t ZSTD_CCtx_loadDictionary_advanced(
cctx->cdict = NULL;
} else {
ZSTD_compressionParameters const cParams =
- ZSTD_getCParamsFromCCtxParams(cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize);
+ ZSTD_getCParamsFromCCtxParams(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize);
cctx->cdictLocal = ZSTD_createCDict_advanced(
dict, dictSize,
- dictLoadMethod, dictMode,
+ dictLoadMethod, dictContentType,
cParams, cctx->customMem);
cctx->cdict = cctx->cdictLocal;
if (cctx->cdictLocal == NULL)
@@ -519,13 +531,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
{
return ZSTD_CCtx_loadDictionary_advanced(
- cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dm_auto);
+ cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
}
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
{
return ZSTD_CCtx_loadDictionary_advanced(
- cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dm_auto);
+ cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
}
@@ -539,17 +551,17 @@ size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
{
- return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dm_rawContent);
+ return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
}
size_t ZSTD_CCtx_refPrefix_advanced(
- ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode)
+ ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
{
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
cctx->cdict = NULL; /* prefix discards any prior cdict */
cctx->prefixDict.dict = prefix;
cctx->prefixDict.dictSize = prefixSize;
- cctx->prefixDict.dictMode = dictMode;
+ cctx->prefixDict.dictContentType = dictContentType;
return 0;
}
@@ -577,7 +589,8 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
- CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+ if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN)
+ return ERROR(parameter_unsupported);
if ((U32)(cParams.strategy) > (U32)ZSTD_btultra)
return ERROR(parameter_unsupported);
return 0;
@@ -597,7 +610,7 @@ static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters c
CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
- CLAMP(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+ if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN) cParams.targetLength = ZSTD_TARGETLENGTH_MIN;
if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra;
return cParams;
}
@@ -653,36 +666,43 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
}
+static size_t ZSTD_sizeof_matchState(ZSTD_compressionParameters const* cParams, const U32 forCCtx)
+{
+ size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+ size_t const hSize = ((size_t)1) << cParams->hashLog;
+ U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+ size_t const h3Size = ((size_t)1) << hashLog3;
+ size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+ size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
+ + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
+ size_t const optSpace = (forCCtx && ((cParams->strategy == ZSTD_btopt) ||
+ (cParams->strategy == ZSTD_btultra)))
+ ? optPotentialSpace
+ : 0;
+ DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
+ (U32)chainSize, (U32)hSize, (U32)h3Size);
+ return tableSpace + optSpace;
+}
+
size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
{
/* Estimate CCtx size is supported for single-threaded compression only. */
- if (params->nbThreads > 1) { return ERROR(GENERIC); }
+ if (params->nbWorkers > 0) { return ERROR(GENERIC); }
{ ZSTD_compressionParameters const cParams =
- ZSTD_getCParamsFromCCtxParams(*params, 0, 0);
+ ZSTD_getCParamsFromCCtxParams(params, 0, 0);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
U32 const divider = (cParams.searchLength==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = blockSize + 11*maxNbSeq;
- size_t const chainSize =
- (cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams.chainLog);
- size_t const hSize = ((size_t)1) << cParams.hashLog;
- U32 const hashLog3 = (cParams.searchLength>3) ?
- 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
- size_t const h3Size = ((size_t)1) << hashLog3;
- size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
- size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+ size_t const entropySpace = HUF_WORKSPACE_SIZE;
+ size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
+ size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
- size_t const optBudget =
- ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
- + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
- size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
+ size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams);
+ size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq);
- size_t const ldmSpace = params->ldmParams.enableLdm ?
- ZSTD_ldm_getTableSize(params->ldmParams.hashLog,
- params->ldmParams.bucketSizeLog) : 0;
-
- size_t const neededSpace = entropySpace + tableSpace + tokenSpace +
- optSpace + ldmSpace;
+ size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace +
+ matchStateSize + ldmSpace + ldmSeqSpace;
DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
@@ -696,15 +716,26 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
}
-size_t ZSTD_estimateCCtxSize(int compressionLevel)
+static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
{
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
return ZSTD_estimateCCtxSize_usingCParams(cParams);
}
+size_t ZSTD_estimateCCtxSize(int compressionLevel)
+{
+ int level;
+ size_t memBudget = 0;
+ for (level=1; level<=compressionLevel; level++) {
+ size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
+ if (newMB > memBudget) memBudget = newMB;
+ }
+ return memBudget;
+}
+
size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
{
- if (params->nbThreads > 1) { return ERROR(GENERIC); }
+ if (params->nbWorkers > 0) { return ERROR(GENERIC); }
{ size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params->cParams.windowLog);
size_t const inBuffSize = ((size_t)1 << params->cParams.windowLog) + blockSize;
@@ -721,11 +752,44 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
}
-size_t ZSTD_estimateCStreamSize(int compressionLevel) {
+static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) {
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
return ZSTD_estimateCStreamSize_usingCParams(cParams);
}
+size_t ZSTD_estimateCStreamSize(int compressionLevel) {
+ int level;
+ size_t memBudget = 0;
+ for (level=1; level<=compressionLevel; level++) {
+ size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
+ if (newMB > memBudget) memBudget = newMB;
+ }
+ return memBudget;
+}
+
+/* ZSTD_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads (non-blocking mode).
+ */
+ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
+{
+#ifdef ZSTD_MULTITHREAD
+ if (cctx->appliedParams.nbWorkers > 0) {
+ return ZSTDMT_getFrameProgression(cctx->mtctx);
+ }
+#endif
+ { ZSTD_frameProgression fp;
+ size_t const buffered = (cctx->inBuff == NULL) ? 0 :
+ cctx->inBuffPos - cctx->inToCompress;
+ if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
+ assert(buffered <= ZSTD_BLOCKSIZE_MAX);
+ fp.ingested = cctx->consumedSrcSize + buffered;
+ fp.consumed = cctx->consumedSrcSize;
+ fp.produced = cctx->producedCSize;
+ return fp;
+} }
+
+
static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
ZSTD_compressionParameters cParams2)
{
@@ -761,9 +825,9 @@ static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t blockSize1,
size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
- DEBUGLOG(4, "ZSTD_sufficientBuff: windowSize2=%u from wlog=%u",
+ DEBUGLOG(4, "ZSTD_sufficientBuff: is windowSize2=%u <= wlog1=%u",
(U32)windowSize2, cParams2.windowLog);
- DEBUGLOG(4, "ZSTD_sufficientBuff: blockSize2 %u <=? blockSize1 %u",
+ DEBUGLOG(4, "ZSTD_sufficientBuff: is blockSize2=%u <= blockSize1=%u",
(U32)blockSize2, (U32)blockSize1);
return (blockSize2 <= blockSize1) /* seqStore space depends on blockSize */
& (neededBufferSize2 <= bufferSize1);
@@ -782,37 +846,101 @@ static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
ZSTD_sufficientBuff(buffSize1, blockSize1, buffPol2, params2.cParams, pledgedSrcSize);
}
+static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
+{
+ int i;
+ for (i = 0; i < ZSTD_REP_NUM; ++i)
+ bs->rep[i] = repStartValue[i];
+ bs->entropy.hufCTable_repeatMode = HUF_repeat_none;
+ bs->entropy.offcode_repeatMode = FSE_repeat_none;
+ bs->entropy.matchlength_repeatMode = FSE_repeat_none;
+ bs->entropy.litlength_repeatMode = FSE_repeat_none;
+}
+
+/*! ZSTD_invalidateMatchState()
+ * Invalidate all the matches in the match finder tables.
+ * Requires nextSrc and base to be set (can be NULL).
+ */
+static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
+{
+ ZSTD_window_clear(&ms->window);
+
+ ms->nextToUpdate = ms->window.dictLimit + 1;
+ ms->loadedDictEnd = 0;
+ ms->opt.litLengthSum = 0; /* force reset of btopt stats */
+}
+
/*! ZSTD_continueCCtx() :
* reuse CCtx without reset (note : requires no dictionary) */
static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize)
{
- U32 const end = (U32)(cctx->nextSrc - cctx->base);
size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
- DEBUGLOG(4, "ZSTD_continueCCtx");
+ DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place");
cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */
cctx->appliedParams = params;
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
cctx->consumedSrcSize = 0;
+ cctx->producedCSize = 0;
if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
cctx->appliedParams.fParams.contentSizeFlag = 0;
DEBUGLOG(4, "pledged content size : %u ; flag : %u",
(U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
- cctx->lowLimit = end;
- cctx->dictLimit = end;
- cctx->nextToUpdate = end+1;
cctx->stage = ZSTDcs_init;
cctx->dictID = 0;
- cctx->loadedDictEnd = 0;
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->seqStore.rep[i] = repStartValue[i]; }
- cctx->optState.litLengthSum = 0; /* force reset of btopt stats */
+ if (params.ldmParams.enableLdm)
+ ZSTD_window_clear(&cctx->ldmState.window);
+ ZSTD_referenceExternalSequences(cctx, NULL, 0);
+ ZSTD_invalidateMatchState(&cctx->blockState.matchState);
+ ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
XXH64_reset(&cctx->xxhState, 0);
return 0;
}
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
+static void* ZSTD_reset_matchState(ZSTD_matchState_t* ms, void* ptr, ZSTD_compressionParameters const* cParams, ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
+{
+ size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+ size_t const hSize = ((size_t)1) << cParams->hashLog;
+ U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+ size_t const h3Size = ((size_t)1) << hashLog3;
+ size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+
+ assert(((size_t)ptr & 3) == 0);
+
+ ms->hashLog3 = hashLog3;
+ memset(&ms->window, 0, sizeof(ms->window));
+ ZSTD_invalidateMatchState(ms);
+
+ /* opt parser space */
+ if (forCCtx && ((cParams->strategy == ZSTD_btopt) | (cParams->strategy == ZSTD_btultra))) {
+ DEBUGLOG(4, "reserving optimal parser space");
+ ms->opt.litFreq = (U32*)ptr;
+ ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
+ ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1);
+ ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1);
+ ptr = ms->opt.offCodeFreq + (MaxOff+1);
+ ms->opt.matchTable = (ZSTD_match_t*)ptr;
+ ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1;
+ ms->opt.priceTable = (ZSTD_optimal_t*)ptr;
+ ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1;
+ }
+
+ /* table Space */
+ DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset);
+ assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
+ if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
+ ms->hashTable = (U32*)(ptr);
+ ms->chainTable = ms->hashTable + hSize;
+ ms->hashTable3 = ms->chainTable + chainSize;
+ ptr = ms->hashTable3 + h3Size;
+
+ assert(((size_t)ptr & 3) == 0);
+ return ptr;
+}
+
/*! ZSTD_resetCCtx_internal() :
note : `params` are assumed fully validated at this stage */
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
@@ -830,19 +958,14 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
zbuff, pledgedSrcSize)) {
DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%u)",
zc->appliedParams.cParams.windowLog, (U32)zc->blockSize);
- assert(!(params.ldmParams.enableLdm &&
- params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET));
- zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
- zc->entropy->offcode_repeatMode = FSE_repeat_none;
- zc->entropy->matchlength_repeatMode = FSE_repeat_none;
- zc->entropy->litlength_repeatMode = FSE_repeat_none;
return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
} }
DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
if (params.ldmParams.enableLdm) {
/* Adjust long distance matching parameters */
- ZSTD_ldm_adjustParameters(&params.ldmParams, params.cParams.windowLog);
+ params.ldmParams.windowLog = params.cParams.windowLog;
+ ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
assert(params.ldmParams.hashEveryLog < 32);
zc->ldmState.hashPower =
@@ -854,34 +977,25 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = blockSize + 11*maxNbSeq;
- size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ?
- 0 : ((size_t)1 << params.cParams.chainLog);
- size_t const hSize = ((size_t)1) << params.cParams.hashLog;
- U32 const hashLog3 = (params.cParams.searchLength>3) ?
- 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
- size_t const h3Size = ((size_t)1) << hashLog3;
- size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
+ size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
+ size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
void* ptr;
/* Check if workSpace is large enough, alloc a new one if needed */
- { size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
- size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
- + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
- size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt)
- || (params.cParams.strategy == ZSTD_btultra)) ?
- optPotentialSpace : 0;
+ { size_t const entropySpace = HUF_WORKSPACE_SIZE;
+ size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
size_t const bufferSpace = buffInSize + buffOutSize;
- size_t const ldmSpace = params.ldmParams.enableLdm
- ? ZSTD_ldm_getTableSize(params.ldmParams.hashLog, params.ldmParams.bucketSizeLog)
- : 0;
- size_t const neededSpace = entropySpace + optSpace + ldmSpace +
- tableSpace + tokenSpace + bufferSpace;
- DEBUGLOG(4, "Need %uKB workspace, including %uKB for tables, and %uKB for buffers",
- (U32)(neededSpace>>10), (U32)(tableSpace>>10), (U32)(bufferSpace>>10));
- DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u - windowSize: %u - blockSize: %u",
- (U32)chainSize, (U32)hSize, (U32)h3Size, (U32)windowSize, (U32)blockSize);
+ size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams);
+ size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq);
+
+ size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace +
+ ldmSeqSpace + matchStateSize + tokenSpace +
+ bufferSpace;
+ DEBUGLOG(4, "Need %uKB workspace, including %uKB for match state, and %uKB for buffers",
+ (U32)(neededSpace>>10), (U32)(matchStateSize>>10), (U32)(bufferSpace>>10));
+ DEBUGLOG(4, "windowSize: %u - blockSize: %u", (U32)windowSize, (U32)blockSize);
if (zc->workSpaceSize < neededSpace) { /* too small : resize */
DEBUGLOG(4, "Need to update workSpaceSize from %uK to %uK",
@@ -897,16 +1011,20 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
zc->workSpaceSize = neededSpace;
ptr = zc->workSpace;
- /* entropy space */
+ /* Statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */
- assert(zc->workSpaceSize >= sizeof(ZSTD_entropyCTables_t));
- zc->entropy = (ZSTD_entropyCTables_t*)zc->workSpace;
+ assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t));
+ zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace;
+ zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1;
+ ptr = zc->blockState.nextCBlock + 1;
+ zc->entropyWorkspace = (U32*)ptr;
} }
/* init params */
zc->appliedParams = params;
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
zc->consumedSrcSize = 0;
+ zc->producedCSize = 0;
if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
zc->appliedParams.fParams.contentSizeFlag = 0;
DEBUGLOG(4, "pledged content size : %u ; flag : %u",
@@ -916,37 +1034,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
XXH64_reset(&zc->xxhState, 0);
zc->stage = ZSTDcs_init;
zc->dictID = 0;
- zc->loadedDictEnd = 0;
- zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
- zc->entropy->offcode_repeatMode = FSE_repeat_none;
- zc->entropy->matchlength_repeatMode = FSE_repeat_none;
- zc->entropy->litlength_repeatMode = FSE_repeat_none;
- zc->nextToUpdate = 1;
- zc->nextSrc = NULL;
- zc->base = NULL;
- zc->dictBase = NULL;
- zc->dictLimit = 0;
- zc->lowLimit = 0;
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->seqStore.rep[i] = repStartValue[i]; }
- zc->hashLog3 = hashLog3;
- zc->optState.litLengthSum = 0;
-
- ptr = zc->entropy + 1;
-
- /* opt parser space */
- if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) {
- DEBUGLOG(4, "reserving optimal parser space");
- assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
- zc->optState.litFreq = (U32*)ptr;
- zc->optState.litLengthFreq = zc->optState.litFreq + (1<<Litbits);
- zc->optState.matchLengthFreq = zc->optState.litLengthFreq + (MaxLL+1);
- zc->optState.offCodeFreq = zc->optState.matchLengthFreq + (MaxML+1);
- ptr = zc->optState.offCodeFreq + (MaxOff+1);
- zc->optState.matchTable = (ZSTD_match_t*)ptr;
- ptr = zc->optState.matchTable + ZSTD_OPT_NUM+1;
- zc->optState.priceTable = (ZSTD_optimal_t*)ptr;
- ptr = zc->optState.priceTable + ZSTD_OPT_NUM+1;
- }
+
+ ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
+
+ ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32;
/* ldm hash table */
/* initialize bucketOffsets table later for pointer alignment */
@@ -956,16 +1047,15 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
zc->ldmState.hashTable = (ldmEntry_t*)ptr;
ptr = zc->ldmState.hashTable + ldmHSize;
+ zc->ldmSequences = (rawSeq*)ptr;
+ ptr = zc->ldmSequences + maxNbLdmSeq;
+ zc->maxNbLdmSequences = maxNbLdmSeq;
+
+ memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
}
+ assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
- /* table Space */
- DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset);
- if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
- assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
- zc->hashTable = (U32*)(ptr);
- zc->chainTable = zc->hashTable + hSize;
- zc->hashTable3 = zc->chainTable + chainSize;
- ptr = zc->hashTable3 + h3Size;
+ ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, &params.cParams, crp, /* forCCtx */ 1);
/* sequences storage */
zc->seqStore.sequencesStart = (seqDef*)ptr;
@@ -984,7 +1074,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
memset(ptr, 0, ldmBucketSize);
zc->ldmState.bucketOffsets = (BYTE*)ptr;
ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
+ ZSTD_window_clear(&zc->ldmState.window);
}
+ ZSTD_referenceExternalSequences(zc, NULL, 0);
/* buffers */
zc->inBuffSize = buffInSize;
@@ -1002,9 +1094,61 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
* do not use with extDict variant ! */
void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
int i;
- for (i=0; i<ZSTD_REP_NUM; i++) cctx->seqStore.rep[i] = 0;
+ for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
+ assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
}
+static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
+ const ZSTD_CDict* cdict,
+ unsigned windowLog,
+ ZSTD_frameParameters fParams,
+ U64 pledgedSrcSize,
+ ZSTD_buffered_policy_e zbuff)
+{
+ { ZSTD_CCtx_params params = cctx->requestedParams;
+ /* Copy only compression parameters related to tables. */
+ params.cParams = cdict->cParams;
+ if (windowLog) params.cParams.windowLog = windowLog;
+ params.fParams = fParams;
+ ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+ ZSTDcrp_noMemset, zbuff);
+ assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy);
+ assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog);
+ assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog);
+ }
+
+ /* copy tables */
+ { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog);
+ size_t const hSize = (size_t)1 << cdict->cParams.hashLog;
+ size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
+ assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
+ assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
+ assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */
+ assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
+ memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
+ }
+ /* Zero the hashTable3, since the cdict never fills it */
+ { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
+ assert(cdict->matchState.hashLog3 == 0);
+ memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
+ }
+
+ /* copy dictionary offsets */
+ {
+ ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
+ ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
+ dstMatchState->window = srcMatchState->window;
+ dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+ dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
+ dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+ }
+ cctx->dictID = cdict->dictID;
+
+ /* copy block state */
+ memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+ return 0;
+}
/*! ZSTD_copyCCtx_internal() :
* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
@@ -1015,7 +1159,6 @@ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
* @return : 0, or an error code */
static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
const ZSTD_CCtx* srcCCtx,
- unsigned windowLog,
ZSTD_frameParameters fParams,
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
@@ -1027,41 +1170,39 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
{ ZSTD_CCtx_params params = dstCCtx->requestedParams;
/* Copy only compression parameters related to tables. */
params.cParams = srcCCtx->appliedParams.cParams;
- if (windowLog) params.cParams.windowLog = windowLog;
params.fParams = fParams;
ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
ZSTDcrp_noMemset, zbuff);
+ assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
+ assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
+ assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
+ assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
+ assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
}
/* copy tables */
{ size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
- size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
+ size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
- assert((U32*)dstCCtx->chainTable == (U32*)dstCCtx->hashTable + hSize); /* chainTable must follow hashTable */
- assert((U32*)dstCCtx->hashTable3 == (U32*)dstCCtx->chainTable + chainSize);
- memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace); /* presumes all tables follow each other */
+ assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
+ assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize);
+ memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */
}
/* copy dictionary offsets */
- dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
- dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
- dstCCtx->nextSrc = srcCCtx->nextSrc;
- dstCCtx->base = srcCCtx->base;
- dstCCtx->dictBase = srcCCtx->dictBase;
- dstCCtx->dictLimit = srcCCtx->dictLimit;
- dstCCtx->lowLimit = srcCCtx->lowLimit;
- dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
- dstCCtx->dictID = srcCCtx->dictID;
-
- /* copy entropy tables */
- memcpy(dstCCtx->entropy, srcCCtx->entropy, sizeof(ZSTD_entropyCTables_t));
- /* copy repcodes */
{
- int i;
- for (i = 0; i < ZSTD_REP_NUM; ++i)
- dstCCtx->seqStore.rep[i] = srcCCtx->seqStore.rep[i];
+ ZSTD_matchState_t const* srcMatchState = &srcCCtx->blockState.matchState;
+ ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
+ dstMatchState->window = srcMatchState->window;
+ dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+ dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
+ dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
}
+ dstCCtx->dictID = srcCCtx->dictID;
+
+ /* copy block state */
+ memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
return 0;
}
@@ -1080,51 +1221,69 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
- 0 /*windowLog from srcCCtx*/, fParams, pledgedSrcSize,
+ fParams, pledgedSrcSize,
zbuff);
}
+#define ZSTD_ROWSIZE 16
/*! ZSTD_reduceTable() :
- * reduce table indexes by `reducerValue` */
-static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
+ * reduce table indexes by `reducerValue`, or squash to zero.
+ * PreserveMark preserves "unsorted mark" for btlazy2 strategy.
+ * It must be set to a clear 0/1 value, to remove branch during inlining.
+ * Presume table size is a multiple of ZSTD_ROWSIZE
+ * to help auto-vectorization */
+FORCE_INLINE_TEMPLATE void
+ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
+{
+ int const nbRows = (int)size / ZSTD_ROWSIZE;
+ int cellNb = 0;
+ int rowNb;
+ assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
+ assert(size < (1U<<31)); /* can be casted to int */
+ for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
+ int column;
+ for (column=0; column<ZSTD_ROWSIZE; column++) {
+ if (preserveMark) {
+ U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
+ table[cellNb] += adder;
+ }
+ if (table[cellNb] < reducerValue) table[cellNb] = 0;
+ else table[cellNb] -= reducerValue;
+ cellNb++;
+ } }
+}
+
+static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
{
- U32 u;
- for (u=0 ; u < size ; u++) {
- if (table[u] < reducerValue) table[u] = 0;
- else table[u] -= reducerValue;
- }
+ ZSTD_reduceTable_internal(table, size, reducerValue, 0);
}
-/*! ZSTD_ldm_reduceTable() :
- * reduce table indexes by `reducerValue` */
-static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
- U32 const reducerValue)
+static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
{
- U32 u;
- for (u = 0; u < size; u++) {
- if (table[u].offset < reducerValue) table[u].offset = 0;
- else table[u].offset -= reducerValue;
- }
+ ZSTD_reduceTable_internal(table, size, reducerValue, 1);
}
/*! ZSTD_reduceIndex() :
* rescale all indexes to avoid future overflow (indexes are U32) */
static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
{
- { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
- ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
-
- { U32 const chainSize = (zc->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((U32)1 << zc->appliedParams.cParams.chainLog);
- ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
+ ZSTD_matchState_t* const ms = &zc->blockState.matchState;
+ { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
+ ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
+ }
- { U32 const h3Size = (zc->hashLog3) ? (U32)1 << zc->hashLog3 : 0;
- ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
+ if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
+ U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
+ if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
+ ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
+ else
+ ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
+ }
- { if (zc->appliedParams.ldmParams.enableLdm) {
- U32 const ldmHSize = (U32)1 << zc->appliedParams.ldmParams.hashLog;
- ZSTD_ldm_reduceTable(zc->ldmState.hashTable, ldmHSize, reducerValue);
- }
+ if (ms->hashLog3) {
+ U32 const h3Size = (U32)1 << ms->hashLog3;
+ ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
}
}
@@ -1199,10 +1358,12 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons
static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
-static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy,
- ZSTD_strategy strategy,
+static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
+ const void* src, size_t srcSize,
+ U32* workspace, const int bmi2)
{
size_t const minGain = ZSTD_minGain(srcSize);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
@@ -1211,34 +1372,51 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy,
symbolEncodingType_e hType = set_compressed;
size_t cLitSize;
+ DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
+ disableLiteralCompression);
+
+ /* Prepare nextEntropy assuming reusing the existing table */
+ nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode;
+ memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable,
+ sizeof(prevEntropy->hufCTable));
+
+ if (disableLiteralCompression)
+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */
-# define LITERAL_NOENTROPY 63
- { size_t const minLitSize = entropy->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
+# define COMPRESS_LITERALS_SIZE_MIN 63
+ { size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
- { HUF_repeat repeat = entropy->hufCTable_repeatMode;
+ { HUF_repeat repeat = prevEntropy->hufCTable_repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
- entropy->workspace, sizeof(entropy->workspace), (HUF_CElt*)entropy->hufCTable, &repeat, preferRepeat)
+ workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2)
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
- entropy->workspace, sizeof(entropy->workspace), (HUF_CElt*)entropy->hufCTable, &repeat, preferRepeat);
- if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
- else { entropy->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */
+ workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2);
+ if (repeat != HUF_repeat_none) {
+ /* reused the existing table */
+ hType = set_repeat;
+ }
}
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
- entropy->hufCTable_repeatMode = HUF_repeat_none;
+ memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
if (cLitSize==1) {
- entropy->hufCTable_repeatMode = HUF_repeat_none;
+ memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
}
+ if (hType == set_compressed) {
+ /* using a newly constructed table */
+ nextEntropy->hufCTable_repeatMode = HUF_repeat_check;
+ }
+
/* Build header */
switch(lhSize)
{
@@ -1332,10 +1510,11 @@ symbolEncodingType_e ZSTD_selectEncodingType(
MEM_STATIC
size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
- FSE_CTable* CTable, U32 FSELog, symbolEncodingType_e type,
+ FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
U32* count, U32 max,
BYTE const* codeTable, size_t nbSeq,
S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+ FSE_CTable const* prevCTable, size_t prevCTableSize,
void* workspace, size_t workspaceSize)
{
BYTE* op = (BYTE*)dst;
@@ -1344,12 +1523,13 @@ size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
switch (type) {
case set_rle:
*op = codeTable[0];
- CHECK_F(FSE_buildCTable_rle(CTable, (BYTE)max));
+ CHECK_F(FSE_buildCTable_rle(nextCTable, (BYTE)max));
return 1;
case set_repeat:
+ memcpy(nextCTable, prevCTable, prevCTableSize);
return 0;
case set_basic:
- CHECK_F(FSE_buildCTable_wksp(CTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
+ CHECK_F(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
return 0;
case set_compressed: {
S16 norm[MaxSeq + 1];
@@ -1363,7 +1543,7 @@ size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return NCountSize;
- CHECK_F(FSE_buildCTable_wksp(CTable, norm, max, tableLog, workspace, workspaceSize));
+ CHECK_F(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
return NCountSize;
}
}
@@ -1371,8 +1551,8 @@ size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
}
}
-MEM_STATIC
-size_t ZSTD_encodeSequences(
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_encodeSequences_body(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
@@ -1419,7 +1599,8 @@ size_t ZSTD_encodeSequences(
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
sequences[n].litLength,
sequences[n].matchLength + MINMATCH,
- sequences[n].offset); /* 32b*/ /* 64b*/
+ sequences[n].offset);
+ /* 32b*/ /* 64b*/
/* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
@@ -1445,8 +1626,11 @@ size_t ZSTD_encodeSequences(
BIT_flushBits(&blockStream); /* (7)*/
} }
+ DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
FSE_flushCState(&blockStream, &stateMatchLength);
+ DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
FSE_flushCState(&blockStream, &stateOffsetBits);
+ DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
FSE_flushCState(&blockStream, &stateLitLength);
{ size_t const streamSize = BIT_closeCStream(&blockStream);
@@ -1455,16 +1639,77 @@ size_t ZSTD_encodeSequences(
}
}
+static size_t
+ZSTD_encodeSequences_default(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+ return ZSTD_encodeSequences_body(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+}
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_encodeSequences_bmi2(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+ return ZSTD_encodeSequences_body(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+}
+
+#endif
+
+size_t ZSTD_encodeSequences(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+{
+#if DYNAMIC_BMI2
+ if (bmi2) {
+ return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+ }
+#endif
+ (void)bmi2;
+ return ZSTD_encodeSequences_default(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+}
+
MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
- ZSTD_entropyCTables_t* entropy,
- ZSTD_compressionParameters const* cParams,
- void* dst, size_t dstCapacity)
+ ZSTD_entropyCTables_t const* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ ZSTD_CCtx_params const* cctxParams,
+ void* dst, size_t dstCapacity, U32* workspace,
+ const int bmi2)
{
- const int longOffsets = cParams->windowLog > STREAM_ACCUMULATOR_MIN;
+ const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
U32 count[MaxSeq+1];
- FSE_CTable* CTable_LitLength = entropy->litlengthCTable;
- FSE_CTable* CTable_OffsetBits = entropy->offcodeCTable;
- FSE_CTable* CTable_MatchLength = entropy->matchlengthCTable;
+ FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+ FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+ FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
const seqDef* const sequences = seqStorePtr->sequencesStart;
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
@@ -1476,13 +1721,17 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
BYTE* seqHead;
- ZSTD_STATIC_ASSERT(sizeof(entropy->workspace) >= (1<<MAX(MLFSELog,LLFSELog)));
+ ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
/* Compress literals */
{ const BYTE* const literals = seqStorePtr->litStart;
size_t const litSize = seqStorePtr->lit - literals;
size_t const cSize = ZSTD_compressLiterals(
- entropy, cParams->strategy, op, dstCapacity, literals, litSize);
+ prevEntropy, nextEntropy,
+ cctxParams->cParams.strategy, cctxParams->disableLiteralCompression,
+ op, dstCapacity,
+ literals, litSize,
+ workspace, bmi2);
if (ZSTD_isError(cSize))
return cSize;
assert(cSize <= dstCapacity);
@@ -1497,7 +1746,15 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
- if (nbSeq==0) return op - ostart;
+ if (nbSeq==0) {
+ memcpy(nextEntropy->litlengthCTable, prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable));
+ nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+ memcpy(nextEntropy->offcodeCTable, prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable));
+ nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+ memcpy(nextEntropy->matchlengthCTable, prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable));
+ nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+ return op - ostart;
+ }
/* seqHead : flags for FSE encoding type */
seqHead = op++;
@@ -1506,36 +1763,42 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ZSTD_seqToCodes(seqStorePtr);
/* build CTable for Literal Lengths */
{ U32 max = MaxLL;
- size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace);
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
DEBUGLOG(5, "Building LL table");
- LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
+ nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+ LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
- entropy->workspace, sizeof(entropy->workspace));
+ prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
+ workspace, HUF_WORKSPACE_SIZE);
if (ZSTD_isError(countSize)) return countSize;
op += countSize;
} }
/* build CTable for Offsets */
{ U32 max = MaxOff;
- size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace);
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace);
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
DEBUGLOG(5, "Building OF table");
- Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
+ nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+ Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
- entropy->workspace, sizeof(entropy->workspace));
+ prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
+ workspace, HUF_WORKSPACE_SIZE);
if (ZSTD_isError(countSize)) return countSize;
op += countSize;
} }
/* build CTable for MatchLengths */
{ U32 max = MaxML;
- size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace);
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
DEBUGLOG(5, "Building ML table");
- MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
+ nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+ MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
- entropy->workspace, sizeof(entropy->workspace));
+ prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
+ workspace, HUF_WORKSPACE_SIZE);
if (ZSTD_isError(countSize)) return countSize;
op += countSize;
} }
@@ -1548,7 +1811,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq,
- longOffsets);
+ longOffsets, bmi2);
if (ZSTD_isError(bitstreamSize)) return bitstreamSize;
op += bitstreamSize;
}
@@ -1557,48 +1820,40 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
}
MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
- ZSTD_entropyCTables_t* entropy,
- ZSTD_compressionParameters const* cParams,
+ ZSTD_entropyCTables_t const* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ ZSTD_CCtx_params const* cctxParams,
void* dst, size_t dstCapacity,
- size_t srcSize)
+ size_t srcSize, U32* workspace, int bmi2)
{
- size_t const cSize = ZSTD_compressSequences_internal(seqStorePtr, entropy, cParams,
- dst, dstCapacity);
- /* If the srcSize <= dstCapacity, then there is enough space to write a
- * raw uncompressed block. Since we ran out of space, the block must not
- * be compressible, so fall back to a raw uncompressed block.
+ size_t const cSize = ZSTD_compressSequences_internal(
+ seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity,
+ workspace, bmi2);
+ /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
+ * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
*/
- int const uncompressibleError = (cSize == ERROR(dstSize_tooSmall)) && (srcSize <= dstCapacity);
- if (ZSTD_isError(cSize) && !uncompressibleError)
- return cSize;
+ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
+ return 0; /* block not compressed */
+ if (ZSTD_isError(cSize)) return cSize;
+
+ /* Check compressibility */
+ { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */
+ if (cSize >= maxCSize) return 0; /* block not compressed */
+ }
+
/* We check that dictionaries have offset codes available for the first
* block. After the first block, the offcode table might not have large
* enough codes to represent the offsets in the data.
*/
- if (entropy->offcode_repeatMode == FSE_repeat_valid)
- entropy->offcode_repeatMode = FSE_repeat_check;
-
- /* Check compressibility */
- { size_t const minGain = ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */
- size_t const maxCSize = srcSize - minGain;
- if (cSize >= maxCSize || uncompressibleError) {
- entropy->hufCTable_repeatMode = HUF_repeat_none;
- entropy->offcode_repeatMode = FSE_repeat_none;
- entropy->matchlength_repeatMode = FSE_repeat_none;
- entropy->litlength_repeatMode = FSE_repeat_none;
- return 0; /* block not compressed */
- } }
- assert(!ZSTD_isError(cSize));
+ if (nextEntropy->offcode_repeatMode == FSE_repeat_valid)
+ nextEntropy->offcode_repeatMode = FSE_repeat_check;
- /* block is compressed => confirm repcodes in history */
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->rep[i] = seqStorePtr->repToConfirm[i]; }
return cSize;
}
/* ZSTD_selectBlockCompressor() :
* Not static, but internal use only (used by long distance matcher)
* assumption : strat is a valid strategy */
-typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
{
static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
@@ -1632,32 +1887,83 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
ssPtr->longLengthID = 0;
}
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
{
- DEBUGLOG(5, "ZSTD_compressBlock_internal : dstCapacity = %u", (U32)dstCapacity);
- if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1)
+ ZSTD_matchState_t* const ms = &zc->blockState.matchState;
+ DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+ (U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
+ if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+ ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
return 0; /* don't even attempt compression below a certain srcSize */
+ }
ZSTD_resetSeqStore(&(zc->seqStore));
/* limited update after a very long match */
- { const BYTE* const base = zc->base;
+ { const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const U32 current = (U32)(istart-base);
- if (current > zc->nextToUpdate + 384)
- zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384));
+ if (current > ms->nextToUpdate + 384)
+ ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384));
}
- /* find and store sequences */
- { U32 const extDict = zc->lowLimit < zc->dictLimit;
- const ZSTD_blockCompressor blockCompressor =
- zc->appliedParams.ldmParams.enableLdm
- ? (extDict ? ZSTD_compressBlock_ldm_extDict : ZSTD_compressBlock_ldm)
- : ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
- size_t const lastLLSize = blockCompressor(zc, src, srcSize);
- const BYTE* const anchor = (const BYTE*)src + srcSize - lastLLSize;
- ZSTD_storeLastLiterals(&zc->seqStore, anchor, lastLLSize);
+
+ /* select and store sequences */
+ { U32 const extDict = ZSTD_window_hasExtDict(ms->window);
+ size_t lastLLSize;
+ { int i;
+ for (i = 0; i < ZSTD_REP_NUM; ++i)
+ zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
+ }
+ if (zc->externSeqStore.pos < zc->externSeqStore.size) {
+ assert(!zc->appliedParams.ldmParams.enableLdm);
+ /* Updates ldmSeqStore.pos */
+ lastLLSize =
+ ZSTD_ldm_blockCompress(&zc->externSeqStore,
+ ms, &zc->seqStore,
+ zc->blockState.nextCBlock->rep,
+ &zc->appliedParams.cParams,
+ src, srcSize, extDict);
+ assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
+ } else if (zc->appliedParams.ldmParams.enableLdm) {
+ rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
+
+ ldmSeqStore.seq = zc->ldmSequences;
+ ldmSeqStore.capacity = zc->maxNbLdmSequences;
+ /* Updates ldmSeqStore.size */
+ CHECK_F(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
+ &zc->appliedParams.ldmParams,
+ src, srcSize));
+ /* Updates ldmSeqStore.pos */
+ lastLLSize =
+ ZSTD_ldm_blockCompress(&ldmSeqStore,
+ ms, &zc->seqStore,
+ zc->blockState.nextCBlock->rep,
+ &zc->appliedParams.cParams,
+ src, srcSize, extDict);
+ assert(ldmSeqStore.pos == ldmSeqStore.size);
+ } else { /* not long range mode */
+ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
+ lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
+ }
+ { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
+ ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
+ } }
+
+ /* encode sequences and literals */
+ { size_t const cSize = ZSTD_compressSequences(&zc->seqStore,
+ &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+ &zc->appliedParams,
+ dst, dstCapacity,
+ srcSize, zc->entropyWorkspace, zc->bmi2);
+ if (ZSTD_isError(cSize) || cSize == 0) return cSize;
+ /* confirm repcodes and entropy tables */
+ { ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
+ zc->blockState.prevCBlock = zc->blockState.nextCBlock;
+ zc->blockState.nextCBlock = tmp;
+ }
+ return cSize;
}
- /* encode */
- return ZSTD_compressSequences(&zc->seqStore, zc->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize);
}
@@ -1686,54 +1992,27 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
XXH64_update(&cctx->xxhState, src, srcSize);
while (remaining) {
+ ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
if (remaining < blockSize) blockSize = remaining;
- /* preemptive overflow correction:
- * 1. correction is large enough:
- * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog - blockSize
- * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
- *
- * current - newCurrent
- * > (3<<29 + 1<<windowLog - blockSize) - (1<<windowLog + 1<<chainLog)
- * > (3<<29 - blockSize) - (1<<chainLog)
- * > (3<<29 - blockSize) - (1<<30) (NOTE: chainLog <= 30)
- * > 1<<29 - 1<<17
- *
- * 2. (ip+blockSize - cctx->base) doesn't overflow:
- * In 32 bit mode we limit windowLog to 30 so we don't get
- * differences larger than 1<<31-1.
- * 3. cctx->lowLimit < 1<<32:
- * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
- */
- if (cctx->lowLimit > (3U<<29)) {
- U32 const cycleMask = ((U32)1 << ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy)) - 1;
- U32 const current = (U32)(ip - cctx->base);
- U32 const newCurrent = (current & cycleMask) + ((U32)1 << cctx->appliedParams.cParams.windowLog);
- U32 const correction = current - newCurrent;
+ if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) {
+ U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
+ U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
- assert(current > newCurrent);
- assert(correction > 1<<28); /* Loose bound, should be about 1<<29 */
+
ZSTD_reduceIndex(cctx, correction);
- cctx->base += correction;
- cctx->dictBase += correction;
- cctx->lowLimit -= correction;
- cctx->dictLimit -= correction;
- if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
- else cctx->nextToUpdate -= correction;
- DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x\n", correction, cctx->lowLimit);
- }
- /* enforce maxDist */
- if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
- U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
- if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
- if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
+ if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
+ else ms->nextToUpdate -= correction;
+ ms->loadedDictEnd = 0;
}
+ ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd);
+ if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
{ size_t cSize = ZSTD_compressBlock_internal(cctx,
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
@@ -1810,16 +2089,44 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
return pos;
}
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
+{
+ if (dstCapacity < ZSTD_blockHeaderSize) return ERROR(dstSize_tooSmall);
+ { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */
+ MEM_writeLE24(dst, cBlockHeader24);
+ return ZSTD_blockHeaderSize;
+ }
+}
+
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+{
+ if (cctx->stage != ZSTDcs_init)
+ return ERROR(stage_wrong);
+ if (cctx->appliedParams.ldmParams.enableLdm)
+ return ERROR(parameter_unsupported);
+ cctx->externSeqStore.seq = seq;
+ cctx->externSeqStore.size = nbSeq;
+ cctx->externSeqStore.capacity = nbSeq;
+ cctx->externSeqStore.pos = 0;
+ return 0;
+}
+
static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
U32 frame, U32 lastFrameChunk)
{
- const BYTE* const ip = (const BYTE*) src;
+ ZSTD_matchState_t* ms = &cctx->blockState.matchState;
size_t fhSize = 0;
- DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u", cctx->stage);
+ DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
+ cctx->stage, (U32)srcSize);
if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
if (frame && (cctx->stage==ZSTDcs_init)) {
@@ -1833,26 +2140,11 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
if (!srcSize) return fhSize; /* do not generate an empty block if no input */
- /* Check if blocks follow each other */
- if (src != cctx->nextSrc) {
- /* not contiguous */
- size_t const distanceFromBase = (size_t)(cctx->nextSrc - cctx->base);
- cctx->lowLimit = cctx->dictLimit;
- assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
- cctx->dictLimit = (U32)distanceFromBase;
- cctx->dictBase = cctx->base;
- cctx->base = ip - distanceFromBase;
- cctx->nextToUpdate = cctx->dictLimit;
- if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */
- }
- cctx->nextSrc = ip + srcSize;
-
- /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
- if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
- ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
- U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
- cctx->lowLimit = lowLimitMax;
+ if (!ZSTD_window_update(&ms->window, src, srcSize)) {
+ ms->nextToUpdate = ms->window.dictLimit;
}
+ if (cctx->appliedParams.ldmParams.enableLdm)
+ ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize);
{ size_t const cSize = frame ?
@@ -1860,6 +2152,14 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
if (ZSTD_isError(cSize)) return cSize;
cctx->consumedSrcSize += srcSize;
+ cctx->producedCSize += (cSize + fhSize);
+ if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */
+ if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) {
+ DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u",
+ (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
+ return ERROR(srcSize_wrong);
+ }
+ }
return cSize + fhSize;
}
}
@@ -1868,14 +2168,15 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
+ DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (U32)srcSize);
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
}
size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
{
- ZSTD_compressionParameters const cParams =
- ZSTD_getCParamsFromCCtxParams(cctx->appliedParams, 0, 0);
+ ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
+ assert(!ZSTD_checkCParams(cParams));
return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
}
@@ -1889,50 +2190,45 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const
/*! ZSTD_loadDictionaryContent() :
* @return : 0, or an error code
*/
-static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* src, size_t srcSize)
{
const BYTE* const ip = (const BYTE*) src;
const BYTE* const iend = ip + srcSize;
+ ZSTD_compressionParameters const* cParams = &params->cParams;
- /* input becomes current prefix */
- zc->lowLimit = zc->dictLimit;
- zc->dictLimit = (U32)(zc->nextSrc - zc->base);
- zc->dictBase = zc->base;
- zc->base = ip - zc->dictLimit;
- zc->nextToUpdate = zc->dictLimit;
- zc->loadedDictEnd = zc->appliedParams.forceWindow ? 0 : (U32)(iend - zc->base);
+ ZSTD_window_update(&ms->window, src, srcSize);
+ ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
- zc->nextSrc = iend;
if (srcSize <= HASH_READ_SIZE) return 0;
- switch(zc->appliedParams.cParams.strategy)
+ switch(params->cParams.strategy)
{
case ZSTD_fast:
- ZSTD_fillHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
+ ZSTD_fillHashTable(ms, cParams, iend);
break;
case ZSTD_dfast:
- ZSTD_fillDoubleHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
+ ZSTD_fillDoubleHashTable(ms, cParams, iend);
break;
case ZSTD_greedy:
case ZSTD_lazy:
case ZSTD_lazy2:
if (srcSize >= HASH_READ_SIZE)
- ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->appliedParams.cParams.searchLength);
+ ZSTD_insertAndFindFirstIndex(ms, cParams, iend-HASH_READ_SIZE);
break;
- case ZSTD_btlazy2:
+ case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
case ZSTD_btopt:
case ZSTD_btultra:
if (srcSize >= HASH_READ_SIZE)
- ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, (U32)1 << zc->appliedParams.cParams.searchLog, zc->appliedParams.cParams.searchLength);
+ ZSTD_updateTree(ms, cParams, iend-HASH_READ_SIZE, iend);
break;
default:
assert(0); /* not possible : not a valid strategy id */
}
- zc->nextToUpdate = (U32)(iend - zc->base);
+ ms->nextToUpdate = (U32)(iend - ms->window.base);
return 0;
}
@@ -1956,25 +2252,26 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym
* https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
*/
/*! ZSTD_loadZstdDictionary() :
- * @return : 0, or an error code
+ * @return : dictID, or an error code
* assumptions : magic number supposed already checked
* dictSize supposed > 8
*/
-static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* dict, size_t dictSize, void* workspace)
{
const BYTE* dictPtr = (const BYTE*)dict;
const BYTE* const dictEnd = dictPtr + dictSize;
short offcodeNCount[MaxOff+1];
unsigned offcodeMaxValue = MaxOff;
+ size_t dictID;
- ZSTD_STATIC_ASSERT(sizeof(cctx->entropy->workspace) >= (1<<MAX(MLFSELog,LLFSELog)));
+ ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
dictPtr += 4; /* skip magic number */
- cctx->dictID = cctx->appliedParams.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
+ dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
dictPtr += 4;
{ unsigned maxSymbolValue = 255;
- size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)cctx->entropy->hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
+ size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
if (maxSymbolValue < 255) return ERROR(dictionary_corrupted);
dictPtr += hufHeaderSize;
@@ -1985,7 +2282,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
- CHECK_E( FSE_buildCTable_wksp(cctx->entropy->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
+ CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
dictionary_corrupted);
dictPtr += offcodeHeaderSize;
}
@@ -1997,7 +2294,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
/* Every match length code must have non-zero probability */
CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
- CHECK_E( FSE_buildCTable_wksp(cctx->entropy->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
+ CHECK_E( FSE_buildCTable_wksp(bs->entropy.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE),
dictionary_corrupted);
dictPtr += matchlengthHeaderSize;
}
@@ -2009,15 +2306,15 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
/* Every literal length code must have non-zero probability */
CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
- CHECK_E( FSE_buildCTable_wksp(cctx->entropy->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
+ CHECK_E( FSE_buildCTable_wksp(bs->entropy.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE),
dictionary_corrupted);
dictPtr += litlengthHeaderSize;
}
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
- cctx->seqStore.rep[0] = MEM_readLE32(dictPtr+0);
- cctx->seqStore.rep[1] = MEM_readLE32(dictPtr+4);
- cctx->seqStore.rep[2] = MEM_readLE32(dictPtr+8);
+ bs->rep[0] = MEM_readLE32(dictPtr+0);
+ bs->rep[1] = MEM_readLE32(dictPtr+4);
+ bs->rep[2] = MEM_readLE32(dictPtr+8);
dictPtr += 12;
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
@@ -2031,50 +2328,55 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
/* All repCodes must be <= dictContentSize and != 0*/
{ U32 u;
for (u=0; u<3; u++) {
- if (cctx->seqStore.rep[u] == 0) return ERROR(dictionary_corrupted);
- if (cctx->seqStore.rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
+ if (bs->rep[u] == 0) return ERROR(dictionary_corrupted);
+ if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
} }
- cctx->entropy->hufCTable_repeatMode = HUF_repeat_valid;
- cctx->entropy->offcode_repeatMode = FSE_repeat_valid;
- cctx->entropy->matchlength_repeatMode = FSE_repeat_valid;
- cctx->entropy->litlength_repeatMode = FSE_repeat_valid;
- return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
+ bs->entropy.hufCTable_repeatMode = HUF_repeat_valid;
+ bs->entropy.offcode_repeatMode = FSE_repeat_valid;
+ bs->entropy.matchlength_repeatMode = FSE_repeat_valid;
+ bs->entropy.litlength_repeatMode = FSE_repeat_valid;
+ CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize));
+ return dictID;
}
}
/** ZSTD_compress_insertDictionary() :
-* @return : 0, or an error code */
-static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx,
+* @return : dictID, or an error code */
+static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms,
+ ZSTD_CCtx_params const* params,
const void* dict, size_t dictSize,
- ZSTD_dictMode_e dictMode)
+ ZSTD_dictContentType_e dictContentType,
+ void* workspace)
{
DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
if ((dict==NULL) || (dictSize<=8)) return 0;
+ ZSTD_reset_compressedBlockState(bs);
+
/* dict restricted modes */
- if (dictMode==ZSTD_dm_rawContent)
- return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
+ if (dictContentType == ZSTD_dct_rawContent)
+ return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
- if (dictMode == ZSTD_dm_auto) {
+ if (dictContentType == ZSTD_dct_auto) {
DEBUGLOG(4, "raw content dictionary detected");
- return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
+ return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
}
- if (dictMode == ZSTD_dm_fullDict)
+ if (dictContentType == ZSTD_dct_fullDict)
return ERROR(dictionary_wrong);
assert(0); /* impossible */
}
/* dict as full zstd dictionary */
- return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
+ return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, workspace);
}
/*! ZSTD_compressBegin_internal() :
* @return : 0, or an error code */
size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
@@ -2086,19 +2388,26 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
if (cdict && cdict->dictContentSize>0) {
cctx->requestedParams = params;
- return ZSTD_copyCCtx_internal(cctx, cdict->refContext,
- params.cParams.windowLog, params.fParams, pledgedSrcSize,
- zbuff);
+ return ZSTD_resetCCtx_usingCDict(cctx, cdict, params.cParams.windowLog,
+ params.fParams, pledgedSrcSize, zbuff);
}
CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_continue, zbuff) );
- return ZSTD_compress_insertDictionary(cctx, dict, dictSize, dictMode);
+ {
+ size_t const dictID = ZSTD_compress_insertDictionary(
+ cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+ &params, dict, dictSize, dictContentType, cctx->entropyWorkspace);
+ if (ZSTD_isError(dictID)) return dictID;
+ assert(dictID <= (size_t)(U32)-1);
+ cctx->dictID = (U32)dictID;
+ }
+ return 0;
}
size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params,
unsigned long long pledgedSrcSize)
@@ -2107,7 +2416,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
/* compression parameters verification and optimization */
CHECK_F( ZSTD_checkCParams(params.cParams) );
return ZSTD_compressBegin_internal(cctx,
- dict, dictSize, dictMode,
+ dict, dictSize, dictContentType,
cdict,
params, pledgedSrcSize,
ZSTDb_not_buffered);
@@ -2122,18 +2431,18 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
ZSTD_CCtx_params const cctxParams =
ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
return ZSTD_compressBegin_advanced_internal(cctx,
- dict, dictSize, ZSTD_dm_auto,
+ dict, dictSize, ZSTD_dct_auto,
NULL /*cdict*/,
cctxParams, pledgedSrcSize);
}
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
{
- ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
+ ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
ZSTD_CCtx_params const cctxParams =
ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
- DEBUGLOG(4, "ZSTD_compressBegin_usingDict");
- return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
+ DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize);
+ return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
}
@@ -2152,7 +2461,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
BYTE* op = ostart;
size_t fhSize = 0;
- DEBUGLOG(5, "ZSTD_writeEpilogue");
+ DEBUGLOG(4, "ZSTD_writeEpilogue");
if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */
/* special case : empty frame */
@@ -2176,6 +2485,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
if (cctx->appliedParams.fParams.checksumFlag) {
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
if (dstCapacity<4) return ERROR(dstSize_tooSmall);
+ DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", checksum);
MEM_writeLE32(op, checksum);
op += 4;
}
@@ -2184,7 +2494,6 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
return op-ostart;
}
-
size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
@@ -2242,25 +2551,27 @@ size_t ZSTD_compress_advanced_internal(
const void* dict,size_t dictSize,
ZSTD_CCtx_params params)
{
- DEBUGLOG(4, "ZSTD_compress_advanced_internal");
- CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
+ DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)",
+ (U32)srcSize);
+ CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
params, srcSize, ZSTDb_not_buffered) );
return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
}
-size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
const void* dict, size_t dictSize, int compressionLevel)
{
- ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0);
- params.fParams.contentSizeFlag = 1;
- DEBUGLOG(4, "ZSTD_compress_usingDict (level=%i, srcSize=%u, dictSize=%u)",
- compressionLevel, (U32)srcSize, (U32)dictSize);
- return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
+ ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0);
+ ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+ assert(params.fParams.contentSizeFlag == 1);
+ ZSTD_CCtxParam_setParameter(&cctxParams, ZSTD_p_compressLiterals, compressionLevel>=0);
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams);
}
-size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
+size_t ZSTD_compressCCtx (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
{
- return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
+ DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize);
+ return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
}
size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
@@ -2284,9 +2595,7 @@ size_t ZSTD_estimateCDictSize_advanced(
ZSTD_dictLoadMethod_e dictLoadMethod)
{
DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict));
- DEBUGLOG(5, "CCtx estimate : %u",
- (U32)ZSTD_estimateCCtxSize_usingCParams(cParams));
- return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize_usingCParams(cParams)
+ return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
}
@@ -2300,23 +2609,24 @@ size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support sizeof on NULL */
DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict));
- DEBUGLOG(5, "ZSTD_sizeof_CCtx : %u", (U32)ZSTD_sizeof_CCtx(cdict->refContext));
- return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
+ return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
}
static size_t ZSTD_initCDict_internal(
ZSTD_CDict* cdict,
const void* dictBuffer, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
ZSTD_compressionParameters cParams)
{
- DEBUGLOG(3, "ZSTD_initCDict_internal, mode %u", (U32)dictMode);
+ DEBUGLOG(3, "ZSTD_initCDict_internal, dictContentType %u", (U32)dictContentType);
+ assert(!ZSTD_checkCParams(cParams));
+ cdict->cParams = cParams;
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
cdict->dictBuffer = NULL;
cdict->dictContent = dictBuffer;
} else {
- void* const internalBuffer = ZSTD_malloc(dictSize, cdict->refContext->customMem);
+ void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem);
cdict->dictBuffer = internalBuffer;
cdict->dictContent = internalBuffer;
if (!internalBuffer) return ERROR(memory_allocation);
@@ -2324,13 +2634,31 @@ static size_t ZSTD_initCDict_internal(
}
cdict->dictContentSize = dictSize;
- { ZSTD_CCtx_params cctxParams = cdict->refContext->requestedParams;
- cctxParams.cParams = cParams;
- CHECK_F( ZSTD_compressBegin_internal(cdict->refContext,
- cdict->dictContent, dictSize, dictMode,
- NULL,
- cctxParams, ZSTD_CONTENTSIZE_UNKNOWN,
- ZSTDb_not_buffered) );
+ /* Reset the state to no dictionary */
+ ZSTD_reset_compressedBlockState(&cdict->cBlockState);
+ { void* const end = ZSTD_reset_matchState(
+ &cdict->matchState,
+ (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
+ &cParams, ZSTDcrp_continue, /* forCCtx */ 0);
+ assert(end == (char*)cdict->workspace + cdict->workspaceSize);
+ (void)end;
+ }
+ /* (Maybe) load the dictionary
+ * Skips loading the dictionary if it is <= 8 bytes.
+ */
+ { ZSTD_CCtx_params params;
+ memset(&params, 0, sizeof(params));
+ params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
+ params.fParams.contentSizeFlag = 1;
+ params.cParams = cParams;
+ { size_t const dictID = ZSTD_compress_insertDictionary(
+ &cdict->cBlockState, &cdict->matchState, &params,
+ cdict->dictContent, cdict->dictContentSize,
+ dictContentType, cdict->workspace);
+ if (ZSTD_isError(dictID)) return dictID;
+ assert(dictID <= (size_t)(U32)-1);
+ cdict->dictID = (U32)dictID;
+ }
}
return 0;
@@ -2338,24 +2666,27 @@ static size_t ZSTD_initCDict_internal(
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
{
- DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (U32)dictMode);
+ DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (U32)dictContentType);
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
{ ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
- ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
+ size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+ void* const workspace = ZSTD_malloc(workspaceSize, customMem);
- if (!cdict || !cctx) {
+ if (!cdict || !workspace) {
ZSTD_free(cdict, customMem);
- ZSTD_freeCCtx(cctx);
+ ZSTD_free(workspace, customMem);
return NULL;
}
- cdict->refContext = cctx;
+ cdict->customMem = customMem;
+ cdict->workspace = workspace;
+ cdict->workspaceSize = workspaceSize;
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dictBuffer, dictSize,
- dictLoadMethod, dictMode,
+ dictLoadMethod, dictContentType,
cParams) )) {
ZSTD_freeCDict(cdict);
return NULL;
@@ -2369,7 +2700,7 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL
{
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
return ZSTD_createCDict_advanced(dict, dictSize,
- ZSTD_dlm_byCopy, ZSTD_dm_auto,
+ ZSTD_dlm_byCopy, ZSTD_dct_auto,
cParams, ZSTD_defaultCMem);
}
@@ -2377,15 +2708,15 @@ ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int
{
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
return ZSTD_createCDict_advanced(dict, dictSize,
- ZSTD_dlm_byRef, ZSTD_dm_auto,
+ ZSTD_dlm_byRef, ZSTD_dct_auto,
cParams, ZSTD_defaultCMem);
}
size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support free on NULL */
- { ZSTD_customMem const cMem = cdict->refContext->customMem;
- ZSTD_freeCCtx(cdict->refContext);
+ { ZSTD_customMem const cMem = cdict->customMem;
+ ZSTD_free(cdict->workspace, cMem);
ZSTD_free(cdict->dictBuffer, cMem);
ZSTD_free(cdict, cMem);
return 0;
@@ -2405,18 +2736,18 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
* Note : there is no corresponding "free" function.
* Since workspace was allocated externally, it must be freed externally.
*/
-ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
+const ZSTD_CDict* ZSTD_initStaticCDict(
+ void* workspace, size_t workspaceSize,
const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
ZSTD_compressionParameters cParams)
{
- size_t const cctxSize = ZSTD_estimateCCtxSize_usingCParams(cParams);
+ size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize)
- + cctxSize;
+ + HUF_WORKSPACE_SIZE + matchStateSize;
ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
void* ptr;
- DEBUGLOG(4, "(size_t)workspace & 7 : %u", (U32)(size_t)workspace & 7);
if ((size_t)workspace & 7) return NULL; /* 8-aligned */
DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
(U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize));
@@ -2429,19 +2760,22 @@ ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
} else {
ptr = cdict+1;
}
- cdict->refContext = ZSTD_initStaticCCtx(ptr, cctxSize);
+ cdict->workspace = ptr;
+ cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize;
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize,
- ZSTD_dlm_byRef, dictMode,
+ ZSTD_dlm_byRef, dictContentType,
cParams) ))
return NULL;
return cdict;
}
-ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) {
- return cdict->refContext->appliedParams.cParams;
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
+{
+ assert(cdict != NULL);
+ return cdict->cParams;
}
/* ZSTD_compressBegin_usingCDict_advanced() :
@@ -2454,9 +2788,18 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
if (cdict==NULL) return ERROR(dictionary_wrong);
{ ZSTD_CCtx_params params = cctx->requestedParams;
params.cParams = ZSTD_getCParamsFromCDict(cdict);
+ /* Increase window log to fit the entire dictionary and source if the
+ * source size is known. Limit the increase to 19, which is the
+ * window log for compression level 1 with the largest source size.
+ */
+ if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+ U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
+ U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
+ params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog);
+ }
params.fParams = fParams;
return ZSTD_compressBegin_internal(cctx,
- NULL, 0, ZSTD_dm_auto,
+ NULL, 0, ZSTD_dct_auto,
cdict,
params, pledgedSrcSize,
ZSTDb_not_buffered);
@@ -2534,29 +2877,30 @@ size_t ZSTD_CStreamOutSize(void)
return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
}
-static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs,
- const void* const dict, size_t const dictSize, ZSTD_dictMode_e const dictMode,
+static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
+ const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
const ZSTD_CDict* const cdict,
ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize)
{
- DEBUGLOG(4, "ZSTD_resetCStream_internal");
+ DEBUGLOG(4, "ZSTD_resetCStream_internal (disableLiteralCompression=%i)",
+ params.disableLiteralCompression);
/* params are supposed to be fully validated at this point */
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
- CHECK_F( ZSTD_compressBegin_internal(zcs,
- dict, dictSize, dictMode,
+ CHECK_F( ZSTD_compressBegin_internal(cctx,
+ dict, dictSize, dictContentType,
cdict,
params, pledgedSrcSize,
ZSTDb_buffered) );
- zcs->inToCompress = 0;
- zcs->inBuffPos = 0;
- zcs->inBuffTarget = zcs->blockSize
- + (zcs->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
- zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
- zcs->streamStage = zcss_load;
- zcs->frameEnded = 0;
+ cctx->inToCompress = 0;
+ cctx->inBuffPos = 0;
+ cctx->inBuffTarget = cctx->blockSize
+ + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
+ cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
+ cctx->streamStage = zcss_load;
+ cctx->frameEnded = 0;
return 0; /* ready to go */
}
@@ -2568,8 +2912,8 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize);
if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
params.fParams.contentSizeFlag = 1;
- params.cParams = ZSTD_getCParamsFromCCtxParams(params, pledgedSrcSize, 0);
- return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize);
+ params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, 0);
+ return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize);
}
/*! ZSTD_initCStream_internal() :
@@ -2592,7 +2936,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
}
ZSTD_freeCDict(zcs->cdictLocal);
zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
- ZSTD_dlm_byCopy, ZSTD_dm_auto,
+ ZSTD_dlm_byCopy, ZSTD_dct_auto,
params.cParams, zcs->customMem);
zcs->cdict = zcs->cdictLocal;
if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
@@ -2605,10 +2949,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
zcs->cdict = cdict;
}
- params.compressionLevel = ZSTD_CLEVEL_CUSTOM; /* enforce usage of cParams, instead of a dynamic derivation from cLevel (but does that happen ?) */
- zcs->requestedParams = params;
-
- return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize);
+ return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize);
}
/* ZSTD_initCStream_usingCDict_advanced() :
@@ -2637,20 +2978,22 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */
}
+
/* ZSTD_initCStream_advanced() :
- * pledgedSrcSize must be correct.
+ * pledgedSrcSize must be exact.
* if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
* dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */
size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
- ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u",
(U32)pledgedSrcSize, params.fParams.contentSizeFlag);
CHECK_F( ZSTD_checkCParams(params.cParams) );
if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */
- return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize);
+ { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
+ return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize);
+ }
}
size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
@@ -2687,7 +3030,7 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
/** ZSTD_compressStream_generic():
* internal function for all *compressStream*() variants and *compress_generic()
- * non-static, because can be called from zstdmt.c
+ * non-static, because can be called from zstdmt_compress.c
* @return : hint size for next input */
size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
ZSTD_outBuffer* output,
@@ -2862,46 +3205,56 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
DEBUGLOG(4, "ZSTD_compress_generic : transparent init stage");
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */
params.cParams = ZSTD_getCParamsFromCCtxParams(
- cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
+ &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
#ifdef ZSTD_MULTITHREAD
- if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN)
- params.nbThreads = 1; /* do not invoke multi-threading when src size is too small */
- if (params.nbThreads > 1) {
- if (cctx->mtctx == NULL || (params.nbThreads != ZSTDMT_getNbThreads(cctx->mtctx))) {
- DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbThreads=%u (previous: %u)",
- params.nbThreads, ZSTDMT_getNbThreads(cctx->mtctx));
+ if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
+ params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
+ }
+ if (params.nbWorkers > 0) {
+ /* mt context creation */
+ if (cctx->mtctx == NULL || (params.nbWorkers != ZSTDMT_getNbWorkers(cctx->mtctx))) {
+ DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbWorkers=%u",
+ params.nbWorkers);
+ if (cctx->mtctx != NULL)
+ DEBUGLOG(4, "ZSTD_compress_generic: previous nbWorkers was %u",
+ ZSTDMT_getNbWorkers(cctx->mtctx));
ZSTDMT_freeCCtx(cctx->mtctx);
- cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem);
+ cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
if (cctx->mtctx == NULL) return ERROR(memory_allocation);
}
- DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbThreads=%u", params.nbThreads);
+ /* mt compression */
+ DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
CHECK_F( ZSTDMT_initCStream_internal(
cctx->mtctx,
- prefixDict.dict, prefixDict.dictSize, ZSTD_dm_rawContent,
+ prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent,
cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
cctx->streamStage = zcss_load;
- cctx->appliedParams.nbThreads = params.nbThreads;
+ cctx->appliedParams.nbWorkers = params.nbWorkers;
} else
#endif
- { CHECK_F( ZSTD_resetCStream_internal(
- cctx, prefixDict.dict, prefixDict.dictSize,
- prefixDict.dictMode, cctx->cdict, params,
- cctx->pledgedSrcSizePlusOne-1) );
+ { CHECK_F( ZSTD_resetCStream_internal(cctx,
+ prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
+ cctx->cdict,
+ params, cctx->pledgedSrcSizePlusOne-1) );
assert(cctx->streamStage == zcss_load);
- assert(cctx->appliedParams.nbThreads <= 1);
+ assert(cctx->appliedParams.nbWorkers == 0);
} }
/* compression stage */
#ifdef ZSTD_MULTITHREAD
- if (cctx->appliedParams.nbThreads > 1) {
- size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
- if ( ZSTD_isError(flushMin)
- || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
- ZSTD_startNewCompression(cctx);
+ if (cctx->appliedParams.nbWorkers > 0) {
+ if (cctx->cParamsChanged) {
+ ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
+ cctx->cParamsChanged = 0;
}
- return flushMin;
- }
+ { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
+ if ( ZSTD_isError(flushMin)
+ || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
+ ZSTD_startNewCompression(cctx);
+ }
+ return flushMin;
+ } }
#endif
CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
DEBUGLOG(5, "completed ZSTD_compress_generic");
@@ -2927,7 +3280,7 @@ size_t ZSTD_compress_generic_simpleArgs (
/*====== Finalize ======*/
/*! ZSTD_flushStream() :
-* @return : amount of data remaining to flush */
+ * @return : amount of data remaining to flush */
size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
{
ZSTD_inBuffer input = { NULL, 0, 0 };
@@ -2959,11 +3312,11 @@ int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
{ /* "default" - guarantees a monotonically increasing memory budget */
/* W, C, H, S, L, TL, strat */
- { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */
- { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */
- { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */
- { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3 */
- { 20, 17, 18, 1, 5, 16, ZSTD_dfast }, /* level 4 */
+ { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
+ { 19, 13, 14, 1, 7, 1, ZSTD_fast }, /* level 1 */
+ { 19, 15, 16, 1, 6, 1, ZSTD_fast }, /* level 2 */
+ { 20, 16, 17, 1, 5, 8, ZSTD_dfast }, /* level 3 */
+ { 20, 17, 18, 1, 5, 8, ZSTD_dfast }, /* level 4 */
{ 20, 17, 18, 2, 5, 16, ZSTD_greedy }, /* level 5 */
{ 21, 17, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
{ 21, 18, 19, 3, 5, 16, ZSTD_lazy }, /* level 7 */
@@ -2972,9 +3325,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
{ 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
{ 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
- { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */
- { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */
- { 22, 21, 22, 4, 5, 16, ZSTD_btlazy2 }, /* level 15 */
+ { 22, 21, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
+ { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
+ { 22, 22, 22, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
{ 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */
{ 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */
{ 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */
@@ -2985,8 +3338,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
},
{ /* for srcSize <= 256 KB */
/* W, C, H, S, L, T, strat */
- { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */
- { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */
+ { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
+ { 18, 13, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */
{ 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
{ 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
{ 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
@@ -2997,8 +3350,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
- { 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/
- { 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */
+ { 18, 18, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 12.*/
+ { 18, 19, 17, 7, 4, 8, ZSTD_btlazy2 }, /* level 13 */
{ 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
{ 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
{ 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
@@ -3011,9 +3364,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
},
{ /* for srcSize <= 128 KB */
/* W, C, H, S, L, T, strat */
- { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */
- { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */
- { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */
+ { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* level 0 - not used */
+ { 17, 12, 13, 1, 6, 1, ZSTD_fast }, /* level 1 */
+ { 17, 13, 16, 1, 5, 1, ZSTD_fast }, /* level 2 */
{ 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
{ 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
{ 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
@@ -3037,9 +3390,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
},
{ /* for srcSize <= 16 KB */
/* W, C, H, S, L, T, strat */
- { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */
- { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */
- { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */
+ { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
+ { 14, 14, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */
+ { 14, 14, 14, 1, 4, 1, ZSTD_fast }, /* level 2 */
{ 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
{ 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
{ 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
@@ -3063,47 +3416,22 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
},
};
-#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
-/* This function just controls
- * the monotonic memory budget increase of ZSTD_defaultCParameters[0].
- * Run once, on first ZSTD_getCParams() usage, if ZSTD_DEBUG is enabled
- */
-MEM_STATIC void ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget(void)
-{
- int level;
- for (level=1; level<ZSTD_maxCLevel(); level++) {
- ZSTD_compressionParameters const c1 = ZSTD_defaultCParameters[0][level];
- ZSTD_compressionParameters const c2 = ZSTD_defaultCParameters[0][level+1];
- assert(c1.windowLog <= c2.windowLog);
-# define ZSTD_TABLECOST(h,c) ((1<<(h)) + (1<<(c)))
- assert(ZSTD_TABLECOST(c1.hashLog, c1.chainLog) <= ZSTD_TABLECOST(c2.hashLog, c2.chainLog));
- }
-}
-#endif
-
/*! ZSTD_getCParams() :
-* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
+* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
* Size values are optional, provide 0 if not known or unused */
ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
{
size_t const addedSize = srcSizeHint ? 0 : 500;
U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1;
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
-
-#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
- static int g_monotonicTest = 1;
- if (g_monotonicTest) {
- ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget();
- g_monotonicTest=0;
- }
-#endif
-
- DEBUGLOG(4, "ZSTD_getCParams: cLevel=%i, srcSize=%u, dictSize=%u => table %u",
- compressionLevel, (U32)srcSizeHint, (U32)dictSize, tableID);
- if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default; no negative compressionLevel yet */
- if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
- { ZSTD_compressionParameters const cp = ZSTD_defaultCParameters[tableID][compressionLevel];
- return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }
+ int row = compressionLevel;
+ DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel);
+ if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
+ if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */
+ if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
+ { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+ if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */
+ return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }
}
@@ -3113,6 +3441,7 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long l
ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
ZSTD_parameters params;
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
+ DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
memset(&params, 0, sizeof(params));
params.cParams = cParams;
params.fParams.contentSizeFlag = 1;
diff --git a/thirdparty/zstd/compress/zstd_compress_internal.h b/thirdparty/zstd/compress/zstd_compress_internal.h
index f104fe981e..81f12ca6df 100644
--- a/thirdparty/zstd/compress/zstd_compress_internal.h
+++ b/thirdparty/zstd/compress/zstd_compress_internal.h
@@ -30,8 +30,14 @@ extern "C" {
/*-*************************************
* Constants
***************************************/
-static const U32 g_searchStrength = 8;
-#define HASH_READ_SIZE 8
+#define kSearchStrength 8
+#define HASH_READ_SIZE 8
+#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
+ It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
+ It's not a big deal though : candidate will just be sorted again.
+ Additionnally, candidate position 1 will be lost.
+ But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
/*-*************************************
@@ -43,7 +49,7 @@ typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
typedef struct ZSTD_prefixDict_s {
const void* dict;
size_t dictSize;
- ZSTD_dictMode_e dictMode;
+ ZSTD_dictContentType_e dictContentType;
} ZSTD_prefixDict;
typedef struct {
@@ -51,7 +57,6 @@ typedef struct {
FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
- U32 workspace[HUF_WORKSPACE_SIZE_U32];
HUF_repeat hufCTable_repeatMode;
FSE_repeat offcode_repeatMode;
FSE_repeat matchlength_repeatMode;
@@ -94,11 +99,43 @@ typedef struct {
} optState_t;
typedef struct {
+ ZSTD_entropyCTables_t entropy;
+ U32 rep[ZSTD_REP_NUM];
+} ZSTD_compressedBlockState_t;
+
+typedef struct {
+ BYTE const* nextSrc; /* next block here to continue on current prefix */
+ BYTE const* base; /* All regular indexes relative to this position */
+ BYTE const* dictBase; /* extDict indexes relative to this position */
+ U32 dictLimit; /* below that point, need extDict */
+ U32 lowLimit; /* below that point, no more data */
+} ZSTD_window_t;
+
+typedef struct {
+ ZSTD_window_t window; /* State for window round buffer management */
+ U32 loadedDictEnd; /* index of end of dictionary */
+ U32 nextToUpdate; /* index from which to continue table update */
+ U32 nextToUpdate3; /* index from which to continue table update */
+ U32 hashLog3; /* dispatch table : larger == faster, more memory */
+ U32* hashTable;
+ U32* hashTable3;
+ U32* chainTable;
+ optState_t opt; /* optimal parser state */
+} ZSTD_matchState_t;
+
+typedef struct {
+ ZSTD_compressedBlockState_t* prevCBlock;
+ ZSTD_compressedBlockState_t* nextCBlock;
+ ZSTD_matchState_t matchState;
+} ZSTD_blockState_t;
+
+typedef struct {
U32 offset;
U32 checksum;
} ldmEntry_t;
typedef struct {
+ ZSTD_window_t window; /* State for the window round buffer management */
ldmEntry_t* hashTable;
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
U64 hashPower; /* Used to compute the rolling hash.
@@ -111,60 +148,68 @@ typedef struct {
U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
U32 minMatchLength; /* Minimum match length */
U32 hashEveryLog; /* Log number of entries to skip */
+ U32 windowLog; /* Window log for the LDM */
} ldmParams_t;
+typedef struct {
+ U32 offset;
+ U32 litLength;
+ U32 matchLength;
+} rawSeq;
+
+typedef struct {
+ rawSeq* seq; /* The start of the sequences */
+ size_t pos; /* The position where reading stopped. <= size. */
+ size_t size; /* The number of sequences. <= capacity. */
+ size_t capacity; /* The capacity of the `seq` pointer */
+} rawSeqStore_t;
+
struct ZSTD_CCtx_params_s {
ZSTD_format_e format;
ZSTD_compressionParameters cParams;
ZSTD_frameParameters fParams;
int compressionLevel;
- U32 forceWindow; /* force back-references to respect limit of
+ int disableLiteralCompression;
+ int forceWindow; /* force back-references to respect limit of
* 1<<wLog, even for dictionary */
/* Multithreading: used to pass parameters to mtctx */
- U32 nbThreads;
+ unsigned nbWorkers;
unsigned jobSize;
unsigned overlapSizeLog;
/* Long distance matching parameters */
ldmParams_t ldmParams;
- /* For use with createCCtxParams() and freeCCtxParams() only */
+ /* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem;
-
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
struct ZSTD_CCtx_s {
- const BYTE* nextSrc; /* next block here to continue on current prefix */
- const BYTE* base; /* All regular indexes relative to this position */
- const BYTE* dictBase; /* extDict indexes relative to this position */
- U32 dictLimit; /* below that point, need extDict */
- U32 lowLimit; /* below that point, no more data */
- U32 nextToUpdate; /* index from which to continue dictionary update */
- U32 nextToUpdate3; /* index from which to continue dictionary update */
- U32 hashLog3; /* dispatch table : larger == faster, more memory */
- U32 loadedDictEnd; /* index of end of dictionary */
ZSTD_compressionStage_e stage;
- U32 dictID;
+ int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
+ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
ZSTD_CCtx_params requestedParams;
ZSTD_CCtx_params appliedParams;
+ U32 dictID;
void* workSpace;
size_t workSpaceSize;
size_t blockSize;
- U64 pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
- U64 consumedSrcSize;
+ unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
+ unsigned long long consumedSrcSize;
+ unsigned long long producedCSize;
XXH64_state_t xxhState;
ZSTD_customMem customMem;
size_t staticSize;
- seqStore_t seqStore; /* sequences storage ptrs */
- optState_t optState;
- ldmState_t ldmState; /* long distance matching state */
- U32* hashTable;
- U32* hashTable3;
- U32* chainTable;
- ZSTD_entropyCTables_t* entropy;
+ seqStore_t seqStore; /* sequences storage ptrs */
+ ldmState_t ldmState; /* long distance matching state */
+ rawSeq* ldmSequences; /* Storage for the ldm output sequences */
+ size_t maxNbLdmSequences;
+ rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
+ ZSTD_blockState_t blockState;
+ U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
/* streaming */
char* inBuff;
@@ -191,6 +236,12 @@ struct ZSTD_CCtx_s {
};
+typedef size_t (*ZSTD_blockCompressor) (
+ ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
+
+
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
{
static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
@@ -359,10 +410,12 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* co
}
/** ZSTD_count_2segments() :
-* can count match length with `ip` & `match` in 2 different segments.
-* convention : on reaching mEnd, match count continue starting from iStart
-*/
-MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
+ * can count match length with `ip` & `match` in 2 different segments.
+ * convention : on reaching mEnd, match count continue starting from iStart
+ */
+MEM_STATIC size_t
+ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
+ const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
{
const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
size_t const matchLength = ZSTD_count(ip, match, vEnd);
@@ -372,8 +425,8 @@ MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const
/*-*************************************
-* Hashes
-***************************************/
+ * Hashes
+ ***************************************/
static const U32 prime3bytes = 506832829U;
static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
@@ -411,6 +464,171 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
}
}
+/*-*************************************
+* Round buffer management
+***************************************/
+/* Max current allowed */
+#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
+/* Maximum chunk size before overflow correction needs to be called again */
+#define ZSTD_CHUNKSIZE_MAX \
+ ( ((U32)-1) /* Maximum ending current index */ \
+ - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */
+
+/**
+ * ZSTD_window_clear():
+ * Clears the window containing the history by simply setting it to empty.
+ */
+MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
+{
+ size_t const endT = (size_t)(window->nextSrc - window->base);
+ U32 const end = (U32)endT;
+
+ window->lowLimit = end;
+ window->dictLimit = end;
+}
+
+/**
+ * ZSTD_window_hasExtDict():
+ * Returns non-zero if the window has a non-empty extDict.
+ */
+MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
+{
+ return window.lowLimit < window.dictLimit;
+}
+
+/**
+ * ZSTD_window_needOverflowCorrection():
+ * Returns non-zero if the indices are getting too large and need overflow
+ * protection.
+ */
+MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+ void const* srcEnd)
+{
+ U32 const current = (U32)((BYTE const*)srcEnd - window.base);
+ return current > ZSTD_CURRENT_MAX;
+}
+
+/**
+ * ZSTD_window_correctOverflow():
+ * Reduces the indices to protect from index overflow.
+ * Returns the correction made to the indices, which must be applied to every
+ * stored index.
+ *
+ * The least significant cycleLog bits of the indices must remain the same,
+ * which may be 0. Every index up to maxDist in the past must be valid.
+ * NOTE: (maxDist & cycleMask) must be zero.
+ */
+MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+ U32 maxDist, void const* src)
+{
+ /* preemptive overflow correction:
+ * 1. correction is large enough:
+ * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
+ * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
+ *
+ * current - newCurrent
+ * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
+ * > (3<<29) - (1<<chainLog)
+ * > (3<<29) - (1<<30) (NOTE: chainLog <= 30)
+ * > 1<<29
+ *
+ * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
+ * After correction, current is less than (1<<chainLog + 1<<windowLog).
+ * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
+ * In 32-bit mode we are safe, because (chainLog <= 29), so
+ * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
+ * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
+ * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
+ */
+ U32 const cycleMask = (1U << cycleLog) - 1;
+ U32 const current = (U32)((BYTE const*)src - window->base);
+ U32 const newCurrent = (current & cycleMask) + maxDist;
+ U32 const correction = current - newCurrent;
+ assert((maxDist & cycleMask) == 0);
+ assert(current > newCurrent);
+ /* Loose bound, should be around 1<<29 (see above) */
+ assert(correction > 1<<28);
+
+ window->base += correction;
+ window->dictBase += correction;
+ window->lowLimit -= correction;
+ window->dictLimit -= correction;
+
+ DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
+ window->lowLimit);
+ return correction;
+}
+
+/**
+ * ZSTD_window_enforceMaxDist():
+ * Updates lowLimit so that:
+ * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
+ * This allows a simple check that index >= lowLimit to see if index is valid.
+ * This must be called before a block compression call, with srcEnd as the block
+ * source end.
+ * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
+ * This is because dictionaries are allowed to be referenced as long as the last
+ * byte of the dictionary is in the window, but once they are out of range,
+ * they cannot be referenced. If loadedDictEndPtr is NULL, we use
+ * loadedDictEnd == 0.
+ */
+MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
+ void const* srcEnd, U32 maxDist,
+ U32* loadedDictEndPtr)
+{
+ U32 const current = (U32)((BYTE const*)srcEnd - window->base);
+ U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
+ if (current > maxDist + loadedDictEnd) {
+ U32 const newLowLimit = current - maxDist;
+ if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
+ if (window->dictLimit < window->lowLimit) {
+ DEBUGLOG(5, "Update dictLimit from %u to %u", window->dictLimit,
+ window->lowLimit);
+ window->dictLimit = window->lowLimit;
+ }
+ if (loadedDictEndPtr)
+ *loadedDictEndPtr = 0;
+ }
+}
+
+/**
+ * ZSTD_window_update():
+ * Updates the window by appending [src, src + srcSize) to the window.
+ * If it is not contiguous, the current prefix becomes the extDict, and we
+ * forget about the extDict. Handles overlap of the prefix and extDict.
+ * Returns non-zero if the segment is contiguous.
+ */
+MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+ void const* src, size_t srcSize)
+{
+ BYTE const* const ip = (BYTE const*)src;
+ U32 contiguous = 1;
+ /* Check if blocks follow each other */
+ if (src != window->nextSrc) {
+ /* not contiguous */
+ size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
+ DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u",
+ window->dictLimit);
+ window->lowLimit = window->dictLimit;
+ assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
+ window->dictLimit = (U32)distanceFromBase;
+ window->dictBase = window->base;
+ window->base = ip - distanceFromBase;
+ // ms->nextToUpdate = window->dictLimit;
+ if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
+ contiguous = 0;
+ }
+ window->nextSrc = ip + srcSize;
+ /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
+ if ( (ip+srcSize > window->dictBase + window->lowLimit)
+ & (ip < window->dictBase + window->dictLimit)) {
+ ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
+ U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
+ window->lowLimit = lowLimitMax;
+ }
+ return contiguous;
+}
+
#if defined (__cplusplus)
}
#endif
@@ -421,6 +639,13 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
* These prototypes shall only be called from within lib/compress
* ============================================================== */
+/* ZSTD_getCParamsFromCCtxParams() :
+ * cParams are built depending on compressionLevel, src size hints,
+ * LDM and manually set compression parameters.
+ */
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+ const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
+
/*! ZSTD_initCStream_internal() :
* Private use only. Init streaming operation.
* expects params to be valid.
@@ -446,7 +671,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
* Private use only. To be called from zstdmt_compress.c. */
size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params,
unsigned long long pledgedSrcSize);
@@ -459,4 +684,26 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
const void* dict,size_t dictSize,
ZSTD_CCtx_params params);
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
+
+
+/* ZSTD_referenceExternalSequences() :
+ * Must be called before starting a compression operation.
+ * seqs must parse a prefix of the source.
+ * This cannot be used when long range matching is enabled.
+ * Zstd will use these sequences, and pass the literals to a secondary block
+ * compressor.
+ * @return : An error code on failure.
+ * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
+ * access and data corruption.
+ */
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+
+
#endif /* ZSTD_COMPRESS_H */
diff --git a/thirdparty/zstd/compress/zstd_double_fast.c b/thirdparty/zstd/compress/zstd_double_fast.c
index fee5127c35..86e6b39621 100644
--- a/thirdparty/zstd/compress/zstd_double_fast.c
+++ b/thirdparty/zstd/compress/zstd_double_fast.c
@@ -12,44 +12,58 @@
#include "zstd_double_fast.h"
-void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls)
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+ ZSTD_compressionParameters const* cParams,
+ void const* end)
{
- U32* const hashLarge = cctx->hashTable;
- U32 const hBitsL = cctx->appliedParams.cParams.hashLog;
- U32* const hashSmall = cctx->chainTable;
- U32 const hBitsS = cctx->appliedParams.cParams.chainLog;
- const BYTE* const base = cctx->base;
- const BYTE* ip = base + cctx->nextToUpdate;
+ U32* const hashLarge = ms->hashTable;
+ U32 const hBitsL = cParams->hashLog;
+ U32 const mls = cParams->searchLength;
+ U32* const hashSmall = ms->chainTable;
+ U32 const hBitsS = cParams->chainLog;
+ const BYTE* const base = ms->window.base;
+ const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
- const size_t fastHashFillStep = 3;
-
- while(ip <= iend) {
- hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
- hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
- ip += fastHashFillStep;
+ const U32 fastHashFillStep = 3;
+
+ /* Always insert every fastHashFillStep position into the hash tables.
+ * Insert the other positions into the large hash table if their entry
+ * is empty.
+ */
+ for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
+ U32 const current = (U32)(ip - base);
+ U32 i;
+ for (i = 0; i < fastHashFillStep; ++i) {
+ size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
+ size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
+ if (i == 0)
+ hashSmall[smHash] = current + i;
+ if (i == 0 || hashLarge[lgHash] == 0)
+ hashLarge[lgHash] = current + i;
+ }
}
}
FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
- const void* src, size_t srcSize,
- const U32 mls)
+size_t ZSTD_compressBlock_doubleFast_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
+ U32 const mls /* template */)
{
- U32* const hashLong = cctx->hashTable;
- const U32 hBitsL = cctx->appliedParams.cParams.hashLog;
- U32* const hashSmall = cctx->chainTable;
- const U32 hBitsS = cctx->appliedParams.cParams.chainLog;
- seqStore_t* seqStorePtr = &(cctx->seqStore);
- const BYTE* const base = cctx->base;
+ U32* const hashLong = ms->hashTable;
+ const U32 hBitsL = cParams->hashLog;
+ U32* const hashSmall = ms->chainTable;
+ const U32 hBitsS = cParams->chainLog;
+ const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
- const U32 lowestIndex = cctx->dictLimit;
+ const U32 lowestIndex = ms->window.dictLimit;
const BYTE* const lowest = base + lowestIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
- U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
+ U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
/* init */
@@ -76,7 +90,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
/* favor repcode */
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
U32 offset;
if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
@@ -99,14 +113,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
}
} else {
- ip += ((ip-anchor) >> g_searchStrength) + 1;
+ ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
}
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
@@ -129,61 +143,63 @@ size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
/* save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
- seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
+ rep[0] = offset_1 ? offset_1 : offsetSaved;
+ rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_doubleFast(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- const U32 mls = ctx->appliedParams.cParams.searchLength;
+ const U32 mls = cParams->searchLength;
switch(mls)
{
default: /* includes case 3 */
case 4 :
- return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4);
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4);
case 5 :
- return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5);
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5);
case 6 :
- return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6);
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6);
case 7 :
- return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7);
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7);
}
}
-static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize,
- const U32 mls)
+static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
+ U32 const mls /* template */)
{
- U32* const hashLong = ctx->hashTable;
- U32 const hBitsL = ctx->appliedParams.cParams.hashLog;
- U32* const hashSmall = ctx->chainTable;
- U32 const hBitsS = ctx->appliedParams.cParams.chainLog;
- seqStore_t* seqStorePtr = &(ctx->seqStore);
- const BYTE* const base = ctx->base;
- const BYTE* const dictBase = ctx->dictBase;
+ U32* const hashLong = ms->hashTable;
+ U32 const hBitsL = cParams->hashLog;
+ U32* const hashSmall = ms->chainTable;
+ U32 const hBitsS = cParams->chainLog;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
- const U32 lowestIndex = ctx->lowLimit;
+ const U32 lowestIndex = ms->window.lowLimit;
const BYTE* const dictStart = dictBase + lowestIndex;
- const U32 dictLimit = ctx->dictLimit;
+ const U32 dictLimit = ms->window.dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
+ U32 offset_1=rep[0], offset_2=rep[1];
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
@@ -209,7 +225,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
ip++;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
@@ -220,7 +236,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -245,10 +261,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
}
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else {
- ip += ((ip-anchor) >> g_searchStrength) + 1;
+ ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
} }
@@ -272,7 +288,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@@ -283,27 +299,29 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
} } }
/* save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;
+ rep[0] = offset_1;
+ rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_doubleFast_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- U32 const mls = ctx->appliedParams.cParams.searchLength;
+ U32 const mls = cParams->searchLength;
switch(mls)
{
default: /* includes case 3 */
case 4 :
- return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4);
+ return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 4);
case 5 :
- return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5);
+ return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 5);
case 6 :
- return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6);
+ return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 6);
case 7 :
- return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7);
+ return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 7);
}
}
diff --git a/thirdparty/zstd/compress/zstd_double_fast.h b/thirdparty/zstd/compress/zstd_double_fast.h
index 75e0415809..6d80b2774c 100644
--- a/thirdparty/zstd/compress/zstd_double_fast.h
+++ b/thirdparty/zstd/compress/zstd_double_fast.h
@@ -16,11 +16,18 @@ extern "C" {
#endif
#include "mem.h" /* U32 */
-#include "zstd.h" /* ZSTD_CCtx, size_t */
+#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+ ZSTD_compressionParameters const* cParams,
+ void const* end);
+size_t ZSTD_compressBlock_doubleFast(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
-void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls);
-size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstd_fast.c b/thirdparty/zstd/compress/zstd_fast.c
index 7b56c3d6ad..df4d28b340 100644
--- a/thirdparty/zstd/compress/zstd_fast.c
+++ b/thirdparty/zstd/compress/zstd_fast.c
@@ -12,39 +12,48 @@
#include "zstd_fast.h"
-void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+ ZSTD_compressionParameters const* cParams,
+ void const* end)
{
- U32* const hashTable = zc->hashTable;
- U32 const hBits = zc->appliedParams.cParams.hashLog;
- const BYTE* const base = zc->base;
- const BYTE* ip = base + zc->nextToUpdate;
+ U32* const hashTable = ms->hashTable;
+ U32 const hBits = cParams->hashLog;
+ U32 const mls = cParams->searchLength;
+ const BYTE* const base = ms->window.base;
+ const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
- const size_t fastHashFillStep = 3;
+ const U32 fastHashFillStep = 3;
- while(ip <= iend) {
- hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
- ip += fastHashFillStep;
+ /* Always insert every fastHashFillStep position into the hash table.
+ * Insert the other positions if their hash entry is empty.
+ */
+ for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
+ U32 const current = (U32)(ip - base);
+ U32 i;
+ for (i = 0; i < fastHashFillStep; ++i) {
+ size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
+ if (i == 0 || hashTable[hash] == 0)
+ hashTable[hash] = current + i;
+ }
}
}
-
FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
- const void* src, size_t srcSize,
- const U32 mls)
+size_t ZSTD_compressBlock_fast_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize,
+ U32 const hlog, U32 const stepSize, U32 const mls)
{
- U32* const hashTable = cctx->hashTable;
- U32 const hBits = cctx->appliedParams.cParams.hashLog;
- seqStore_t* seqStorePtr = &(cctx->seqStore);
- const BYTE* const base = cctx->base;
+ U32* const hashTable = ms->hashTable;
+ const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
- const U32 lowestIndex = cctx->dictLimit;
+ const U32 lowestIndex = ms->window.dictLimit;
const BYTE* const lowest = base + lowestIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
- U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
+ U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
/* init */
@@ -57,7 +66,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
/* Main Search Loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
size_t mLength;
- size_t const h = ZSTD_hashPtr(ip, hBits, mls);
+ size_t const h = ZSTD_hashPtr(ip, hlog, mls);
U32 const current = (U32)(ip-base);
U32 const matchIndex = hashTable[h];
const BYTE* match = base + matchIndex;
@@ -66,21 +75,21 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
- U32 offset;
- if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
- ip += ((ip-anchor) >> g_searchStrength) + 1;
+ if ( (matchIndex <= lowestIndex)
+ || (MEM_read32(match) != MEM_read32(ip)) ) {
+ assert(stepSize >= 1);
+ ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
- offset = (U32)(ip-match);
- while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
- offset_2 = offset_1;
- offset_1 = offset;
-
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
- }
+ { U32 const offset = (U32)(ip-match);
+ while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+ offset_2 = offset_1;
+ offset_1 = offset;
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ } }
/* match found */
ip += mLength;
@@ -88,8 +97,8 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
if (ip <= ilimit) {
/* Fill Table */
- hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */
- hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
+ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
+ hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
/* check immediate repcode */
while ( (ip <= ilimit)
&& ( (offset_2>0)
@@ -97,65 +106,67 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
/* store sequence */
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
- hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
+ hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
/* save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
- seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
+ rep[0] = offset_1 ? offset_1 : offsetSaved;
+ rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_fast(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- const U32 mls = ctx->appliedParams.cParams.searchLength;
+ U32 const hlog = cParams->hashLog;
+ U32 const mls = cParams->searchLength;
+ U32 const stepSize = cParams->targetLength;
switch(mls)
{
default: /* includes case 3 */
case 4 :
- return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4);
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
case 5 :
- return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5);
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
case 6 :
- return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6);
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
case 7 :
- return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7);
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
}
}
-static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize,
- const U32 mls)
+static size_t ZSTD_compressBlock_fast_extDict_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize,
+ U32 const hlog, U32 const stepSize, U32 const mls)
{
- U32* hashTable = ctx->hashTable;
- const U32 hBits = ctx->appliedParams.cParams.hashLog;
- seqStore_t* seqStorePtr = &(ctx->seqStore);
- const BYTE* const base = ctx->base;
- const BYTE* const dictBase = ctx->dictBase;
+ U32* hashTable = ms->hashTable;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
- const U32 lowestIndex = ctx->lowLimit;
+ const U32 lowestIndex = ms->window.lowLimit;
const BYTE* const dictStart = dictBase + lowestIndex;
- const U32 dictLimit = ctx->dictLimit;
+ const U32 dictLimit = ms->window.dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
+ U32 offset_1=rep[0], offset_2=rep[1];
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
- const size_t h = ZSTD_hashPtr(ip, hBits, mls);
+ const size_t h = ZSTD_hashPtr(ip, hlog, mls);
const U32 matchIndex = hashTable[h];
const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
const BYTE* match = matchBase + matchIndex;
@@ -171,11 +182,12 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
ip++;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
if ( (matchIndex < lowestIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
- ip += ((ip-anchor) >> g_searchStrength) + 1;
+ assert(stepSize >= 1);
+ ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
{ const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
@@ -186,7 +198,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
offset = current - matchIndex;
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} }
/* found a match : store it */
@@ -195,8 +207,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
if (ip <= ilimit) {
/* Fill Table */
- hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
- hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
+ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
+ hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
/* check immediate repcode */
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
@@ -207,8 +219,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
- hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+ hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
continue;
@@ -217,27 +229,31 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
} } }
/* save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;
+ rep[0] = offset_1;
+ rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_fast_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- U32 const mls = ctx->appliedParams.cParams.searchLength;
+ U32 const hlog = cParams->hashLog;
+ U32 const mls = cParams->searchLength;
+ U32 const stepSize = cParams->targetLength;
switch(mls)
{
default: /* includes case 3 */
case 4 :
- return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4);
+ return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
case 5 :
- return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5);
+ return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
case 6 :
- return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6);
+ return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
case 7 :
- return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7);
+ return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
}
}
diff --git a/thirdparty/zstd/compress/zstd_fast.h b/thirdparty/zstd/compress/zstd_fast.h
index d8b7771954..f0438ad5b4 100644
--- a/thirdparty/zstd/compress/zstd_fast.h
+++ b/thirdparty/zstd/compress/zstd_fast.h
@@ -16,13 +16,17 @@ extern "C" {
#endif
#include "mem.h" /* U32 */
-#include "zstd.h" /* ZSTD_CCtx, size_t */
+#include "zstd_compress_internal.h"
-void ZSTD_fillHashTable(ZSTD_CCtx* zc, const void* end, const U32 mls);
-size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize);
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+ ZSTD_compressionParameters const* cParams,
+ void const* end);
+size_t ZSTD_compressBlock_fast(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstd_lazy.c b/thirdparty/zstd/compress/zstd_lazy.c
index 6d4804961d..9f158123f0 100644
--- a/thirdparty/zstd/compress/zstd_lazy.c
+++ b/thirdparty/zstd/compress/zstd_lazy.c
@@ -15,76 +15,90 @@
/*-*************************************
* Binary Tree search
***************************************/
-/** ZSTD_insertBt1() : add one or multiple positions to tree.
- * ip : assumed <= iend-8 .
- * @return : nb of positions added */
-static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
- const BYTE* const ip, const BYTE* const iend,
- U32 nbCompares, U32 const mls, U32 const extDict)
+
+void ZSTD_updateDUBT(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, const BYTE* iend,
+ U32 mls)
{
- U32* const hashTable = zc->hashTable;
- U32 const hashLog = zc->appliedParams.cParams.hashLog;
- size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
- U32* const bt = zc->chainTable;
- U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
+ U32* const hashTable = ms->hashTable;
+ U32 const hashLog = cParams->hashLog;
+
+ U32* const bt = ms->chainTable;
+ U32 const btLog = cParams->chainLog - 1;
+ U32 const btMask = (1 << btLog) - 1;
+
+ const BYTE* const base = ms->window.base;
+ U32 const target = (U32)(ip - base);
+ U32 idx = ms->nextToUpdate;
+
+ if (idx != target)
+ DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
+ idx, target, ms->window.dictLimit);
+ assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
+ (void)iend;
+
+ assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
+ for ( ; idx < target ; idx++) {
+ size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
+ U32 const matchIndex = hashTable[h];
+
+ U32* const nextCandidatePtr = bt + 2*(idx&btMask);
+ U32* const sortMarkPtr = nextCandidatePtr + 1;
+
+ DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
+ hashTable[h] = idx; /* Update Hash Table */
+ *nextCandidatePtr = matchIndex; /* update BT like a chain */
+ *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
+ }
+ ms->nextToUpdate = target;
+}
+
+
+/** ZSTD_insertDUBT1() :
+ * sort one already inserted but unsorted position
+ * assumption : current >= btlow == (current - btmask)
+ * doesn't fail */
+static void ZSTD_insertDUBT1(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ U32 current, const BYTE* inputEnd,
+ U32 nbCompares, U32 btLow, int extDict)
+{
+ U32* const bt = ms->chainTable;
+ U32 const btLog = cParams->chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
- U32 matchIndex = hashTable[h];
size_t commonLengthSmaller=0, commonLengthLarger=0;
- const BYTE* const base = zc->base;
- const BYTE* const dictBase = zc->dictBase;
- const U32 dictLimit = zc->dictLimit;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
+ const U32 dictLimit = ms->window.dictLimit;
+ const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
+ const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* match;
- const U32 current = (U32)(ip-base);
- const U32 btLow = btMask >= current ? 0 : current - btMask;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = smallerPtr + 1;
+ U32 matchIndex = *smallerPtr;
U32 dummy32; /* to be nullified at the end */
- U32 const windowLow = zc->lowLimit;
- U32 matchEndIdx = current+8+1;
- size_t bestLength = 8;
-#ifdef ZSTD_C_PREDICT
- U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
- U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
- predictedSmall += (predictedSmall>0);
- predictedLarge += (predictedLarge>0);
-#endif /* ZSTD_C_PREDICT */
-
- DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
+ U32 const windowLow = ms->window.lowLimit;
- assert(ip <= iend-8); /* required for h calculation */
- hashTable[h] = current; /* Update Hash Table */
+ DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
+ current, dictLimit, windowLow);
+ assert(current >= btLow);
+ assert(ip < iend); /* condition for ZSTD_count */
while (nbCompares-- && (matchIndex > windowLow)) {
U32* const nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
assert(matchIndex < current);
-#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
- const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
- if (matchIndex == predictedSmall) {
- /* no need to check length, result known */
- *smallerPtr = matchIndex;
- if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
- smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
- matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
- predictedSmall = predictPtr[1] + (predictPtr[1]>0);
- continue;
- }
- if (matchIndex == predictedLarge) {
- *largerPtr = matchIndex;
- if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
- largerPtr = nextPtr;
- matchIndex = nextPtr[0];
- predictedLarge = predictPtr[0] + (predictPtr[0]>0);
- continue;
- }
-#endif
-
- if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
- assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
- match = base + matchIndex;
+ if ( (!extDict)
+ || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
+ || (current < dictLimit) /* both in extDict */) {
+ const BYTE* const mBase = !extDict || ((matchIndex+matchLength) >= dictLimit) ? base : dictBase;
+ assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
+ || (current < dictLimit) );
+ match = mBase + matchIndex;
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
} else {
match = dictBase + matchIndex;
@@ -93,11 +107,8 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
}
- if (matchLength > bestLength) {
- bestLength = matchLength;
- if (matchLength > matchEndIdx - matchIndex)
- matchEndIdx = matchIndex + (U32)matchLength;
- }
+ DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
+ current, matchIndex, (U32)matchLength);
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
@@ -108,6 +119,8 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
*smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
+ DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
+ matchIndex, btLow, nextPtr[1]);
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
} else {
@@ -115,184 +128,205 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
*largerPtr = matchIndex;
commonLengthLarger = matchLength;
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
+ DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
+ matchIndex, btLow, nextPtr[0]);
largerPtr = nextPtr;
matchIndex = nextPtr[0];
} }
*smallerPtr = *largerPtr = 0;
- if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
- assert(matchEndIdx > current + 8);
- return matchEndIdx - (current + 8);
}
-FORCE_INLINE_TEMPLATE
-void ZSTD_updateTree_internal(ZSTD_CCtx* zc,
- const BYTE* const ip, const BYTE* const iend,
- const U32 nbCompares, const U32 mls, const U32 extDict)
-{
- const BYTE* const base = zc->base;
- U32 const target = (U32)(ip - base);
- U32 idx = zc->nextToUpdate;
- DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
- idx, target, extDict);
-
- while(idx < target)
- idx += ZSTD_insertBt1(zc, base+idx, iend, nbCompares, mls, extDict);
- zc->nextToUpdate = target;
-}
-void ZSTD_updateTree(ZSTD_CCtx* zc,
- const BYTE* const ip, const BYTE* const iend,
- const U32 nbCompares, const U32 mls)
+static size_t ZSTD_DUBT_findBestMatch (
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* const ip, const BYTE* const iend,
+ size_t* offsetPtr,
+ U32 const mls,
+ U32 const extDict)
{
- ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 0 /*extDict*/);
-}
-
-void ZSTD_updateTree_extDict(ZSTD_CCtx* zc,
- const BYTE* const ip, const BYTE* const iend,
- const U32 nbCompares, const U32 mls)
-{
- ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 1 /*extDict*/);
-}
+ U32* const hashTable = ms->hashTable;
+ U32 const hashLog = cParams->hashLog;
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
+ U32 matchIndex = hashTable[h];
+ const BYTE* const base = ms->window.base;
+ U32 const current = (U32)(ip-base);
+ U32 const windowLow = ms->window.lowLimit;
-static size_t ZSTD_insertBtAndFindBestMatch (
- ZSTD_CCtx* zc,
- const BYTE* const ip, const BYTE* const iend,
- size_t* offsetPtr,
- U32 nbCompares, const U32 mls,
- U32 extDict)
-{
- U32* const hashTable = zc->hashTable;
- U32 const hashLog = zc->appliedParams.cParams.hashLog;
- size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
- U32* const bt = zc->chainTable;
- U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
+ U32* const bt = ms->chainTable;
+ U32 const btLog = cParams->chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
- U32 matchIndex = hashTable[h];
- size_t commonLengthSmaller=0, commonLengthLarger=0;
- const BYTE* const base = zc->base;
- const BYTE* const dictBase = zc->dictBase;
- const U32 dictLimit = zc->dictLimit;
- const BYTE* const dictEnd = dictBase + dictLimit;
- const BYTE* const prefixStart = base + dictLimit;
- const U32 current = (U32)(ip-base);
- const U32 btLow = btMask >= current ? 0 : current - btMask;
- const U32 windowLow = zc->lowLimit;
- U32* smallerPtr = bt + 2*(current&btMask);
- U32* largerPtr = bt + 2*(current&btMask) + 1;
- U32 matchEndIdx = current+8+1;
- U32 dummy32; /* to be nullified at the end */
- size_t bestLength = 0;
+ U32 const btLow = (btMask >= current) ? 0 : current - btMask;
+ U32 const unsortLimit = MAX(btLow, windowLow);
+
+ U32* nextCandidate = bt + 2*(matchIndex&btMask);
+ U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+ U32 nbCompares = 1U << cParams->searchLog;
+ U32 nbCandidates = nbCompares;
+ U32 previousCandidate = 0;
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
assert(ip <= iend-8); /* required for h calculation */
- hashTable[h] = current; /* Update Hash Table */
- while (nbCompares-- && (matchIndex > windowLow)) {
- U32* const nextPtr = bt + 2*(matchIndex & btMask);
- size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
- const BYTE* match;
+ /* reach end of unsorted candidates list */
+ while ( (matchIndex > unsortLimit)
+ && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
+ && (nbCandidates > 1) ) {
+ DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
+ matchIndex);
+ *unsortedMark = previousCandidate;
+ previousCandidate = matchIndex;
+ matchIndex = *nextCandidate;
+ nextCandidate = bt + 2*(matchIndex&btMask);
+ unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+ nbCandidates --;
+ }
- if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
- match = base + matchIndex;
- matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
- } else {
- match = dictBase + matchIndex;
- matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
- if (matchIndex+matchLength >= dictLimit)
- match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
- }
+ if ( (matchIndex > unsortLimit)
+ && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
+ matchIndex);
+ *nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
+ }
+
+ /* batch sort stacked candidates */
+ matchIndex = previousCandidate;
+ while (matchIndex) { /* will end on matchIndex == 0 */
+ U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
+ U32 const nextCandidateIdx = *nextCandidateIdxPtr;
+ ZSTD_insertDUBT1(ms, cParams, matchIndex, iend,
+ nbCandidates, unsortLimit, extDict);
+ matchIndex = nextCandidateIdx;
+ nbCandidates++;
+ }
- if (matchLength > bestLength) {
- if (matchLength > matchEndIdx - matchIndex)
- matchEndIdx = matchIndex + (U32)matchLength;
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
- if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
- break; /* drop, to guarantee consistency (miss a little bit of compression) */
+ /* find longest match */
+ { size_t commonLengthSmaller=0, commonLengthLarger=0;
+ const BYTE* const dictBase = ms->window.dictBase;
+ const U32 dictLimit = ms->window.dictLimit;
+ const BYTE* const dictEnd = dictBase + dictLimit;
+ const BYTE* const prefixStart = base + dictLimit;
+ U32* smallerPtr = bt + 2*(current&btMask);
+ U32* largerPtr = bt + 2*(current&btMask) + 1;
+ U32 matchEndIdx = current+8+1;
+ U32 dummy32; /* to be nullified at the end */
+ size_t bestLength = 0;
+
+ matchIndex = hashTable[h];
+ hashTable[h] = current; /* Update Hash Table */
+
+ while (nbCompares-- && (matchIndex > windowLow)) {
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
+ const BYTE* match;
+
+ if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
+ match = base + matchIndex;
+ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+ } else {
+ match = dictBase + matchIndex;
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+ if (matchIndex+matchLength >= dictLimit)
+ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
}
- }
- if (match[matchLength] < ip[matchLength]) {
- /* match is smaller than current */
- *smallerPtr = matchIndex; /* update smaller idx */
- commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
- if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
- smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
- matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
- } else {
- /* match is larger than current */
- *largerPtr = matchIndex;
- commonLengthLarger = matchLength;
- if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
- largerPtr = nextPtr;
- matchIndex = nextPtr[0];
- } }
+ if (matchLength > bestLength) {
+ if (matchLength > matchEndIdx - matchIndex)
+ matchEndIdx = matchIndex + (U32)matchLength;
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
+ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
+ break; /* drop, to guarantee consistency (miss a little bit of compression) */
+ }
+ }
- *smallerPtr = *largerPtr = 0;
+ if (match[matchLength] < ip[matchLength]) {
+ /* match is smaller than current */
+ *smallerPtr = matchIndex; /* update smaller idx */
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
+ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
+ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
+ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
+ } else {
+ /* match is larger than current */
+ *largerPtr = matchIndex;
+ commonLengthLarger = matchLength;
+ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
+ largerPtr = nextPtr;
+ matchIndex = nextPtr[0];
+ } }
+
+ *smallerPtr = *largerPtr = 0;
- assert(matchEndIdx > current+8);
- zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
- return bestLength;
+ assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
+ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
+ if (bestLength >= MINMATCH) {
+ U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+ DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+ current, (U32)bestLength, (U32)*offsetPtr, mIndex);
+ }
+ return bestLength;
+ }
}
/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
static size_t ZSTD_BtFindBestMatch (
- ZSTD_CCtx* zc,
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 mls)
+ const U32 mls /* template */)
{
- if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
- ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
- return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
+ DEBUGLOG(7, "ZSTD_BtFindBestMatch");
+ if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
+ ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
+ return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 0);
}
static size_t ZSTD_BtFindBestMatch_selectMLS (
- ZSTD_CCtx* zc, /* Index table will be updated */
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iLimit,
- size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 matchLengthSearch)
+ size_t* offsetPtr)
{
- switch(matchLengthSearch)
+ switch(cParams->searchLength)
{
default : /* includes case 3 */
- case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
- case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+ case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4);
+ case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5);
case 7 :
- case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+ case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6);
}
}
/** Tree updater, providing best match */
static size_t ZSTD_BtFindBestMatch_extDict (
- ZSTD_CCtx* zc,
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 mls)
+ const U32 mls)
{
- if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
- ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
- return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
+ DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict");
+ if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
+ ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
+ return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 1);
}
static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
- ZSTD_CCtx* zc, /* Index table will be updated */
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iLimit,
- size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 matchLengthSearch)
+ size_t* offsetPtr)
{
- switch(matchLengthSearch)
+ switch(cParams->searchLength)
{
default : /* includes case 3 */
- case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
- case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+ case 4 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 4);
+ case 5 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 5);
case 7 :
- case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+ case 6 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 6);
}
}
@@ -305,15 +339,17 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
/* Update chains up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
-U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
+static U32 ZSTD_insertAndFindFirstIndex_internal(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, U32 const mls)
{
- U32* const hashTable = zc->hashTable;
- const U32 hashLog = zc->appliedParams.cParams.hashLog;
- U32* const chainTable = zc->chainTable;
- const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1;
- const BYTE* const base = zc->base;
+ U32* const hashTable = ms->hashTable;
+ const U32 hashLog = cParams->hashLog;
+ U32* const chainTable = ms->chainTable;
+ const U32 chainMask = (1 << cParams->chainLog) - 1;
+ const BYTE* const base = ms->window.base;
const U32 target = (U32)(ip - base);
- U32 idx = zc->nextToUpdate;
+ U32 idx = ms->nextToUpdate;
while(idx < target) { /* catch up */
size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
@@ -322,35 +358,42 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
idx++;
}
- zc->nextToUpdate = target;
+ ms->nextToUpdate = target;
return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
}
+U32 ZSTD_insertAndFindFirstIndex(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip)
+{
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, cParams->searchLength);
+}
+
/* inlining is important to hardwire a hot branch (template emulation) */
FORCE_INLINE_TEMPLATE
size_t ZSTD_HcFindBestMatch_generic (
- ZSTD_CCtx* zc, /* Index table will be updated */
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 mls, const U32 extDict)
+ const U32 mls, const U32 extDict)
{
- U32* const chainTable = zc->chainTable;
- const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog);
+ U32* const chainTable = ms->chainTable;
+ const U32 chainSize = (1 << cParams->chainLog);
const U32 chainMask = chainSize-1;
- const BYTE* const base = zc->base;
- const BYTE* const dictBase = zc->dictBase;
- const U32 dictLimit = zc->dictLimit;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
+ const U32 dictLimit = ms->window.dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
- const U32 lowLimit = zc->lowLimit;
+ const U32 lowLimit = ms->window.lowLimit;
const U32 current = (U32)(ip-base);
const U32 minChain = current > chainSize ? current - chainSize : 0;
- int nbAttempts=maxNbAttempts;
+ U32 nbAttempts = 1U << cParams->searchLog;
size_t ml=4-1;
/* HC4 match finder */
- U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
+ U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
size_t currentMl=0;
@@ -381,35 +424,33 @@ size_t ZSTD_HcFindBestMatch_generic (
FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
- ZSTD_CCtx* zc,
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iLimit,
- size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 matchLengthSearch)
+ size_t* offsetPtr)
{
- switch(matchLengthSearch)
+ switch(cParams->searchLength)
{
default : /* includes case 3 */
- case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
- case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 0);
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 0);
case 7 :
- case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 0);
}
}
FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
- ZSTD_CCtx* const zc,
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iLimit,
- size_t* const offsetPtr,
- U32 const maxNbAttempts, U32 const matchLengthSearch)
+ size_t* const offsetPtr)
{
- switch(matchLengthSearch)
+ switch(cParams->searchLength)
{
default : /* includes case 3 */
- case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
- case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 1);
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 1);
case 7 :
- case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 1);
}
}
@@ -418,30 +459,29 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
* Common parser - lazy strategy
*********************************/
FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize,
- const U32 searchMethod, const U32 depth)
+size_t ZSTD_compressBlock_lazy_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore,
+ U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams,
+ const void* src, size_t srcSize,
+ const U32 searchMethod, const U32 depth)
{
- seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- const BYTE* const base = ctx->base + ctx->dictLimit;
+ const BYTE* const base = ms->window.base + ms->window.dictLimit;
- U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
- U32 const mls = ctx->appliedParams.cParams.searchLength;
-
- typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
- size_t* offsetPtr,
- U32 maxNbAttempts, U32 matchLengthSearch);
+ typedef size_t (*searchMax_f)(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
- U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0;
+ U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
/* init */
ip += (ip==base);
- ctx->nextToUpdate3 = ctx->nextToUpdate;
+ ms->nextToUpdate3 = ms->nextToUpdate;
{ U32 const maxRep = (U32)(ip-base);
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
@@ -462,13 +502,13 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
/* first search (depth 0) */
{ size_t offsetFound = 99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
if (ml2 > matchLength)
matchLength = ml2, start = ip, offset=offsetFound;
}
if (matchLength < 4) {
- ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
+ ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
continue;
}
@@ -484,7 +524,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
matchLength = mlRep, offset = 0, start = ip;
}
{ size_t offset2=99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -503,7 +543,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
matchLength = ml2, offset = 0, start = ip;
}
{ size_t offset2=99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -528,7 +568,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
- ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
@@ -538,73 +578,80 @@ _storeSequence:
/* store sequence */
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
} }
/* Save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
- seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
+ rep[0] = offset_1 ? offset_1 : savedOffset;
+ rep[1] = offset_2 ? offset_2 : savedOffset;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btlazy2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
}
-size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_lazy2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
}
-size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_lazy(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
}
-size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_greedy(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
}
FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize,
- const U32 searchMethod, const U32 depth)
+size_t ZSTD_compressBlock_lazy_extDict_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore,
+ U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams,
+ const void* src, size_t srcSize,
+ const U32 searchMethod, const U32 depth)
{
- seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- const BYTE* const base = ctx->base;
- const U32 dictLimit = ctx->dictLimit;
- const U32 lowestIndex = ctx->lowLimit;
+ const BYTE* const base = ms->window.base;
+ const U32 dictLimit = ms->window.dictLimit;
+ const U32 lowestIndex = ms->window.lowLimit;
const BYTE* const prefixStart = base + dictLimit;
- const BYTE* const dictBase = ctx->dictBase;
+ const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const dictEnd = dictBase + dictLimit;
- const BYTE* const dictStart = dictBase + ctx->lowLimit;
+ const BYTE* const dictStart = dictBase + lowestIndex;
- const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
- const U32 mls = ctx->appliedParams.cParams.searchLength;
-
- typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
- size_t* offsetPtr,
- U32 maxNbAttempts, U32 matchLengthSearch);
+ typedef size_t (*searchMax_f)(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
- U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1];
+ U32 offset_1 = rep[0], offset_2 = rep[1];
/* init */
- ctx->nextToUpdate3 = ctx->nextToUpdate;
+ ms->nextToUpdate3 = ms->nextToUpdate;
ip += (ip == prefixStart);
/* Match Loop */
@@ -628,13 +675,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
/* first search (depth 0) */
{ size_t offsetFound = 99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
if (ml2 > matchLength)
matchLength = ml2, start = ip, offset=offsetFound;
}
if (matchLength < 4) {
- ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
+ ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
continue;
}
@@ -661,7 +708,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
/* search match, depth 1 */
{ size_t offset2=99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -691,7 +738,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
/* search match, depth 2 */
{ size_t offset2=99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -713,7 +760,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
- ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
@@ -728,7 +775,7 @@ _storeSequence:
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
@@ -737,29 +784,41 @@ _storeSequence:
} }
/* Save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;
+ rep[0] = offset_1;
+ rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_greedy_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
}
-size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_lazy_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
+
{
- return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
}
-size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_lazy2_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
+
{
- return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
}
-size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btlazy2_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
+
{
- return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
}
diff --git a/thirdparty/zstd/compress/zstd_lazy.h b/thirdparty/zstd/compress/zstd_lazy.h
index 74e1fd6970..bda064f199 100644
--- a/thirdparty/zstd/compress/zstd_lazy.h
+++ b/thirdparty/zstd/compress/zstd_lazy.h
@@ -15,22 +15,39 @@
extern "C" {
#endif
-#include "mem.h" /* U32 */
-#include "zstd.h" /* ZSTD_CCtx, size_t */
-
-U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls);
-void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
-void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
-
-size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+#include "zstd_compress_internal.h"
+
+U32 ZSTD_insertAndFindFirstIndex(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip);
+
+void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+
+size_t ZSTD_compressBlock_btlazy2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_greedy_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btlazy2_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstd_ldm.c b/thirdparty/zstd/compress/zstd_ldm.c
index be50872cf7..bffd8a3dfa 100644
--- a/thirdparty/zstd/compress/zstd_ldm.c
+++ b/thirdparty/zstd/compress/zstd_ldm.c
@@ -17,36 +17,45 @@
#define LDM_HASH_RLOG 7
#define LDM_HASH_CHAR_OFFSET 10
-size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+ ZSTD_compressionParameters const* cParams)
{
+ U32 const windowLog = cParams->windowLog;
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
- params->enableLdm = enableLdm>0;
- params->hashLog = 0;
- params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
- params->minMatchLength = LDM_MIN_MATCH_LENGTH;
- params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET;
- return 0;
-}
-
-void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog)
-{
+ DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
+ if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
+ if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
+ if (cParams->strategy >= ZSTD_btopt) {
+ /* Get out of the way of the optimal parser */
+ U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
+ assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
+ assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
+ params->minMatchLength = minMatch;
+ }
if (params->hashLog == 0) {
params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
}
- if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) {
+ if (params->hashEveryLog == 0) {
params->hashEveryLog =
windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
}
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
}
-size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
- size_t const ldmHSize = ((size_t)1) << hashLog;
- size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
+size_t ZSTD_ldm_getTableSize(ldmParams_t params)
+{
+ size_t const ldmHSize = ((size_t)1) << params.hashLog;
+ size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
size_t const ldmBucketSize =
- ((size_t)1) << (hashLog - ldmBucketSizeLog);
- return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
+ ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+ size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
+ return params.enableLdm ? totalSize : 0;
+}
+
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
+{
+ return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
}
/** ZSTD_ldm_getSmallHash() :
@@ -167,6 +176,7 @@ static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
}
U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
+ DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength);
assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
}
@@ -205,21 +215,22 @@ static size_t ZSTD_ldm_countBackwardsMatch(
*
* The tables for the other strategies are filled within their
* block compressors. */
-static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
+static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
+ ZSTD_compressionParameters const* cParams,
+ void const* end)
{
const BYTE* const iend = (const BYTE*)end;
- const U32 mls = zc->appliedParams.cParams.searchLength;
- switch(zc->appliedParams.cParams.strategy)
+ switch(cParams->strategy)
{
case ZSTD_fast:
- ZSTD_fillHashTable(zc, iend, mls);
- zc->nextToUpdate = (U32)(iend - zc->base);
+ ZSTD_fillHashTable(ms, cParams, iend);
+ ms->nextToUpdate = (U32)(iend - ms->window.base);
break;
case ZSTD_dfast:
- ZSTD_fillDoubleHashTable(zc, iend, mls);
- zc->nextToUpdate = (U32)(iend - zc->base);
+ ZSTD_fillDoubleHashTable(ms, cParams, iend);
+ ms->nextToUpdate = (U32)(iend - ms->window.base);
break;
case ZSTD_greedy:
@@ -268,69 +279,62 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
* Sets cctx->nextToUpdate to a position corresponding closer to anchor
* if it is far way
* (after a long match, only update tables a limited amount). */
-static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor)
+static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
{
- U32 const current = (U32)(anchor - cctx->base);
- if (current > cctx->nextToUpdate + 1024) {
- cctx->nextToUpdate =
- current - MIN(512, current - cctx->nextToUpdate - 1024);
+ U32 const current = (U32)(anchor - ms->window.base);
+ if (current > ms->nextToUpdate + 1024) {
+ ms->nextToUpdate =
+ current - MIN(512, current - ms->nextToUpdate - 1024);
}
}
-typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-/* defined in zstd_compress.c */
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
-
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
- const void* src, size_t srcSize)
+static size_t ZSTD_ldm_generateSequences_internal(
+ ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
+ ldmParams_t const* params, void const* src, size_t srcSize)
{
- ldmState_t* const ldmState = &(cctx->ldmState);
- const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
- const U64 hashPower = ldmState->hashPower;
- const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
- const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
- const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
- seqStore_t* const seqStorePtr = &(cctx->seqStore);
- const BYTE* const base = cctx->base;
- const BYTE* const istart = (const BYTE*)src;
- const BYTE* ip = istart;
- const BYTE* anchor = istart;
- const U32 lowestIndex = cctx->dictLimit;
- const BYTE* const lowest = base + lowestIndex;
- const BYTE* const iend = istart + srcSize;
- const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
-
- const ZSTD_blockCompressor blockCompressor =
- ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0);
- U32* const repToConfirm = seqStorePtr->repToConfirm;
- U32 savedRep[ZSTD_REP_NUM];
+ /* LDM parameters */
+ int const extDict = ZSTD_window_hasExtDict(ldmState->window);
+ U32 const minMatchLength = params->minMatchLength;
+ U64 const hashPower = ldmState->hashPower;
+ U32 const hBits = params->hashLog - params->bucketSizeLog;
+ U32 const ldmBucketSize = 1U << params->bucketSizeLog;
+ U32 const hashEveryLog = params->hashEveryLog;
+ U32 const ldmTagMask = (1U << params->hashEveryLog) - 1;
+ /* Prefix and extDict parameters */
+ U32 const dictLimit = ldmState->window.dictLimit;
+ U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
+ BYTE const* const base = ldmState->window.base;
+ BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
+ BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
+ BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
+ BYTE const* const lowPrefixPtr = base + dictLimit;
+ /* Input bounds */
+ BYTE const* const istart = (BYTE const*)src;
+ BYTE const* const iend = istart + srcSize;
+ BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
+ /* Input positions */
+ BYTE const* anchor = istart;
+ BYTE const* ip = istart;
+ /* Rolling hash */
+ BYTE const* lastHashed = NULL;
U64 rollingHash = 0;
- const BYTE* lastHashed = NULL;
- size_t i, lastLiterals;
- /* Save seqStorePtr->rep and copy repToConfirm */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
-
- /* Main Search Loop */
- while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
+ while (ip <= ilimit) {
size_t mLength;
U32 const current = (U32)(ip - base);
size_t forwardMatchLength = 0, backwardMatchLength = 0;
ldmEntry_t* bestEntry = NULL;
if (ip != istart) {
rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
- lastHashed[ldmParams.minMatchLength],
+ lastHashed[minMatchLength],
hashPower);
} else {
- rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
+ rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength);
}
lastHashed = ip;
/* Do not insert and do not look for a match */
- if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
- ldmTagMask) {
+ if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) {
ip++;
continue;
}
@@ -340,27 +344,49 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
ldmEntry_t* const bucket =
ZSTD_ldm_getBucket(ldmState,
ZSTD_ldm_getSmallHash(rollingHash, hBits),
- ldmParams);
+ *params);
ldmEntry_t* cur;
size_t bestMatchLength = 0;
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
- const BYTE* const pMatch = cur->offset + base;
size_t curForwardMatchLength, curBackwardMatchLength,
curTotalMatchLength;
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
continue;
}
-
- curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
- if (curForwardMatchLength < ldmParams.minMatchLength) {
- continue;
+ if (extDict) {
+ BYTE const* const curMatchBase =
+ cur->offset < dictLimit ? dictBase : base;
+ BYTE const* const pMatch = curMatchBase + cur->offset;
+ BYTE const* const matchEnd =
+ cur->offset < dictLimit ? dictEnd : iend;
+ BYTE const* const lowMatchPtr =
+ cur->offset < dictLimit ? dictStart : lowPrefixPtr;
+
+ curForwardMatchLength = ZSTD_count_2segments(
+ ip, pMatch, iend,
+ matchEnd, lowPrefixPtr);
+ if (curForwardMatchLength < minMatchLength) {
+ continue;
+ }
+ curBackwardMatchLength =
+ ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
+ lowMatchPtr);
+ curTotalMatchLength = curForwardMatchLength +
+ curBackwardMatchLength;
+ } else { /* !extDict */
+ BYTE const* const pMatch = base + cur->offset;
+ curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
+ if (curForwardMatchLength < minMatchLength) {
+ continue;
+ }
+ curBackwardMatchLength =
+ ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
+ lowPrefixPtr);
+ curTotalMatchLength = curForwardMatchLength +
+ curBackwardMatchLength;
}
- curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
- ip, anchor, pMatch, lowest);
- curTotalMatchLength = curForwardMatchLength +
- curBackwardMatchLength;
if (curTotalMatchLength > bestMatchLength) {
bestMatchLength = curTotalMatchLength;
@@ -375,7 +401,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
if (bestEntry == NULL) {
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
hBits, current,
- ldmParams);
+ *params);
ip++;
continue;
}
@@ -384,324 +410,244 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
mLength = forwardMatchLength + backwardMatchLength;
ip -= backwardMatchLength;
- /* Call the block compressor on the remaining literals */
{
+ /* Store the sequence:
+ * ip = current - backwardMatchLength
+ * The match is at (bestEntry->offset - backwardMatchLength)
+ */
U32 const matchIndex = bestEntry->offset;
- const BYTE* const match = base + matchIndex - backwardMatchLength;
- U32 const offset = (U32)(ip - match);
-
- /* Overwrite rep codes */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = repToConfirm[i];
-
- /* Fill tables for block compressor */
- ZSTD_ldm_limitTableUpdate(cctx, anchor);
- ZSTD_ldm_fillFastTables(cctx, anchor);
-
- /* Call block compressor and get remaining literals */
- lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
- cctx->nextToUpdate = (U32)(ip - base);
-
- /* Update repToConfirm with the new offset */
- for (i = ZSTD_REP_NUM - 1; i > 0; i--)
- repToConfirm[i] = repToConfirm[i-1];
- repToConfirm[0] = offset;
-
- /* Store the sequence with the leftover literals */
- ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
- offset + ZSTD_REP_MOVE, mLength - MINMATCH);
+ U32 const offset = current - matchIndex;
+ rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
+
+ /* Out of sequence storage */
+ if (rawSeqStore->size == rawSeqStore->capacity)
+ return ERROR(dstSize_tooSmall);
+ seq->litLength = (U32)(ip - anchor);
+ seq->matchLength = (U32)mLength;
+ seq->offset = offset;
+ rawSeqStore->size++;
}
/* Insert the current entry into the hash table */
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
(U32)(lastHashed - base),
- ldmParams);
+ *params);
assert(ip + backwardMatchLength == lastHashed);
/* Fill the hash table from lastHashed+1 to ip+mLength*/
/* Heuristic: don't need to fill the entire table at end of block */
- if (ip + mLength < ilimit) {
+ if (ip + mLength <= ilimit) {
rollingHash = ZSTD_ldm_fillLdmHashTable(
ldmState, rollingHash, lastHashed,
- ip + mLength, base, hBits, ldmParams);
+ ip + mLength, base, hBits, *params);
lastHashed = ip + mLength - 1;
}
ip += mLength;
anchor = ip;
- /* Check immediate repcode */
- while ( (ip < ilimit)
- && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
- && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
-
- size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
- iend) + 4;
- /* Swap repToConfirm[1] <=> repToConfirm[0] */
- {
- U32 const tmpOff = repToConfirm[1];
- repToConfirm[1] = repToConfirm[0];
- repToConfirm[0] = tmpOff;
- }
-
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
-
- /* Fill the hash table from lastHashed+1 to ip+rLength*/
- if (ip + rLength < ilimit) {
- rollingHash = ZSTD_ldm_fillLdmHashTable(
- ldmState, rollingHash, lastHashed,
- ip + rLength, base, hBits, ldmParams);
- lastHashed = ip + rLength - 1;
- }
- ip += rLength;
- anchor = ip;
- }
}
-
- /* Overwrite rep */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = repToConfirm[i];
-
- ZSTD_ldm_limitTableUpdate(cctx, anchor);
- ZSTD_ldm_fillFastTables(cctx, anchor);
-
- lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
- cctx->nextToUpdate = (U32)(iend - base);
-
- /* Restore seqStorePtr->rep */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = savedRep[i];
-
- /* Return the last literals size */
- return lastLiterals;
+ return iend - anchor;
}
-size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
+/*! ZSTD_ldm_reduceTable() :
+ * reduce table indexes by `reducerValue` */
+static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
+ U32 const reducerValue)
{
- return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize);
+ U32 u;
+ for (u = 0; u < size; u++) {
+ if (table[u].offset < reducerValue) table[u].offset = 0;
+ else table[u].offset -= reducerValue;
+ }
}
-static size_t ZSTD_compressBlock_ldm_extDict_generic(
- ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
+size_t ZSTD_ldm_generateSequences(
+ ldmState_t* ldmState, rawSeqStore_t* sequences,
+ ldmParams_t const* params, void const* src, size_t srcSize)
{
- ldmState_t* const ldmState = &(ctx->ldmState);
- const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
- const U64 hashPower = ldmState->hashPower;
- const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
- const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
- const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
- seqStore_t* const seqStorePtr = &(ctx->seqStore);
- const BYTE* const base = ctx->base;
- const BYTE* const dictBase = ctx->dictBase;
- const BYTE* const istart = (const BYTE*)src;
- const BYTE* ip = istart;
- const BYTE* anchor = istart;
- const U32 lowestIndex = ctx->lowLimit;
- const BYTE* const dictStart = dictBase + lowestIndex;
- const U32 dictLimit = ctx->dictLimit;
- const BYTE* const lowPrefixPtr = base + dictLimit;
- const BYTE* const dictEnd = dictBase + dictLimit;
- const BYTE* const iend = istart + srcSize;
- const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
-
- const ZSTD_blockCompressor blockCompressor =
- ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1);
- U32* const repToConfirm = seqStorePtr->repToConfirm;
- U32 savedRep[ZSTD_REP_NUM];
- U64 rollingHash = 0;
- const BYTE* lastHashed = NULL;
- size_t i, lastLiterals;
-
- /* Save seqStorePtr->rep and copy repToConfirm */
- for (i = 0; i < ZSTD_REP_NUM; i++) {
- savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
- }
-
- /* Search Loop */
- while (ip < ilimit) { /* < instead of <=, because (ip+1) */
- size_t mLength;
- const U32 current = (U32)(ip-base);
- size_t forwardMatchLength = 0, backwardMatchLength = 0;
- ldmEntry_t* bestEntry = NULL;
- if (ip != istart) {
- rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
- lastHashed[ldmParams.minMatchLength],
- hashPower);
+ U32 const maxDist = 1U << params->windowLog;
+ BYTE const* const istart = (BYTE const*)src;
+ BYTE const* const iend = istart + srcSize;
+ size_t const kMaxChunkSize = 1 << 20;
+ size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
+ size_t chunk;
+ size_t leftoverSize = 0;
+
+ assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
+ /* Check that ZSTD_window_update() has been called for this chunk prior
+ * to passing it to this function.
+ */
+ assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
+ /* The input could be very large (in zstdmt), so it must be broken up into
+ * chunks to enforce the maximmum distance and handle overflow correction.
+ */
+ assert(sequences->pos <= sequences->size);
+ assert(sequences->size <= sequences->capacity);
+ for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
+ BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
+ size_t const remaining = (size_t)(iend - chunkStart);
+ BYTE const *const chunkEnd =
+ (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
+ size_t const chunkSize = chunkEnd - chunkStart;
+ size_t newLeftoverSize;
+ size_t const prevSize = sequences->size;
+
+ assert(chunkStart < iend);
+ /* 1. Perform overflow correction if necessary. */
+ if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
+ U32 const ldmHSize = 1U << params->hashLog;
+ U32 const correction = ZSTD_window_correctOverflow(
+ &ldmState->window, /* cycleLog */ 0, maxDist, src);
+ ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+ }
+ /* 2. We enforce the maximum offset allowed.
+ *
+ * kMaxChunkSize should be small enough that we don't lose too much of
+ * the window through early invalidation.
+ * TODO: * Test the chunk size.
+ * * Try invalidation after the sequence generation and test the
+ * the offset against maxDist directly.
+ */
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
+ /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
+ newLeftoverSize = ZSTD_ldm_generateSequences_internal(
+ ldmState, sequences, params, chunkStart, chunkSize);
+ if (ZSTD_isError(newLeftoverSize))
+ return newLeftoverSize;
+ /* 4. We add the leftover literals from previous iterations to the first
+ * newly generated sequence, or add the `newLeftoverSize` if none are
+ * generated.
+ */
+ /* Prepend the leftover literals from the last call */
+ if (prevSize < sequences->size) {
+ sequences->seq[prevSize].litLength += (U32)leftoverSize;
+ leftoverSize = newLeftoverSize;
} else {
- rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
+ assert(newLeftoverSize == chunkSize);
+ leftoverSize += chunkSize;
}
- lastHashed = ip;
+ }
+ return 0;
+}
- if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
- ldmTagMask) {
- /* Don't insert and don't look for a match */
- ip++;
- continue;
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
+ while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
+ rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
+ if (srcSize <= seq->litLength) {
+ /* Skip past srcSize literals */
+ seq->litLength -= (U32)srcSize;
+ return;
}
-
- /* Get the best entry and compute the match lengths */
- {
- ldmEntry_t* const bucket =
- ZSTD_ldm_getBucket(ldmState,
- ZSTD_ldm_getSmallHash(rollingHash, hBits),
- ldmParams);
- ldmEntry_t* cur;
- size_t bestMatchLength = 0;
- U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
-
- for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
- const BYTE* const curMatchBase =
- cur->offset < dictLimit ? dictBase : base;
- const BYTE* const pMatch = curMatchBase + cur->offset;
- const BYTE* const matchEnd =
- cur->offset < dictLimit ? dictEnd : iend;
- const BYTE* const lowMatchPtr =
- cur->offset < dictLimit ? dictStart : lowPrefixPtr;
- size_t curForwardMatchLength, curBackwardMatchLength,
- curTotalMatchLength;
-
- if (cur->checksum != checksum || cur->offset <= lowestIndex) {
- continue;
- }
-
- curForwardMatchLength = ZSTD_count_2segments(
- ip, pMatch, iend,
- matchEnd, lowPrefixPtr);
- if (curForwardMatchLength < ldmParams.minMatchLength) {
- continue;
- }
- curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
- ip, anchor, pMatch, lowMatchPtr);
- curTotalMatchLength = curForwardMatchLength +
- curBackwardMatchLength;
-
- if (curTotalMatchLength > bestMatchLength) {
- bestMatchLength = curTotalMatchLength;
- forwardMatchLength = curForwardMatchLength;
- backwardMatchLength = curBackwardMatchLength;
- bestEntry = cur;
+ srcSize -= seq->litLength;
+ seq->litLength = 0;
+ if (srcSize < seq->matchLength) {
+ /* Skip past the first srcSize of the match */
+ seq->matchLength -= (U32)srcSize;
+ if (seq->matchLength < minMatch) {
+ /* The match is too short, omit it */
+ if (rawSeqStore->pos + 1 < rawSeqStore->size) {
+ seq[1].litLength += seq[0].matchLength;
}
+ rawSeqStore->pos++;
}
+ return;
}
+ srcSize -= seq->matchLength;
+ seq->matchLength = 0;
+ rawSeqStore->pos++;
+ }
+}
- /* No match found -- continue searching */
- if (bestEntry == NULL) {
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
- (U32)(lastHashed - base),
- ldmParams);
- ip++;
- continue;
+/**
+ * If the sequence length is longer than remaining then the sequence is split
+ * between this block and the next.
+ *
+ * Returns the current sequence to handle, or if the rest of the block should
+ * be literals, it returns a sequence with offset == 0.
+ */
+static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
+ U32 const remaining, U32 const minMatch)
+{
+ rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
+ assert(sequence.offset > 0);
+ /* Likely: No partial sequence */
+ if (remaining >= sequence.litLength + sequence.matchLength) {
+ rawSeqStore->pos++;
+ return sequence;
+ }
+ /* Cut the sequence short (offset == 0 ==> rest is literals). */
+ if (remaining <= sequence.litLength) {
+ sequence.offset = 0;
+ } else if (remaining < sequence.litLength + sequence.matchLength) {
+ sequence.matchLength = remaining - sequence.litLength;
+ if (sequence.matchLength < minMatch) {
+ sequence.offset = 0;
}
+ }
+ /* Skip past `remaining` bytes for the future sequences. */
+ ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
+ return sequence;
+}
- /* Match found */
- mLength = forwardMatchLength + backwardMatchLength;
- ip -= backwardMatchLength;
-
- /* Call the block compressor on the remaining literals */
- {
- /* ip = current - backwardMatchLength
- * The match is at (bestEntry->offset - backwardMatchLength) */
- U32 const matchIndex = bestEntry->offset;
- U32 const offset = current - matchIndex;
-
- /* Overwrite rep codes */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = repToConfirm[i];
-
- /* Fill the hash table for the block compressor */
- ZSTD_ldm_limitTableUpdate(ctx, anchor);
- ZSTD_ldm_fillFastTables(ctx, anchor);
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
+ int const extDict)
+{
+ unsigned const minMatch = cParams->searchLength;
+ ZSTD_blockCompressor const blockCompressor =
+ ZSTD_selectBlockCompressor(cParams->strategy, extDict);
+ BYTE const* const base = ms->window.base;
+ /* Input bounds */
+ BYTE const* const istart = (BYTE const*)src;
+ BYTE const* const iend = istart + srcSize;
+ /* Input positions */
+ BYTE const* ip = istart;
+
+ assert(rawSeqStore->pos <= rawSeqStore->size);
+ assert(rawSeqStore->size <= rawSeqStore->capacity);
+ /* Loop through each sequence and apply the block compressor to the lits */
+ while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
+ /* maybeSplitSequence updates rawSeqStore->pos */
+ rawSeq const sequence = maybeSplitSequence(rawSeqStore,
+ (U32)(iend - ip), minMatch);
+ int i;
+ /* End signal */
+ if (sequence.offset == 0)
+ break;
- /* Call block compressor and get remaining literals */
- lastLiterals = blockCompressor(ctx, anchor, ip - anchor);
- ctx->nextToUpdate = (U32)(ip - base);
+ assert(sequence.offset <= (1U << cParams->windowLog));
+ assert(ip + sequence.litLength + sequence.matchLength <= iend);
- /* Update repToConfirm with the new offset */
+ /* Fill tables for block compressor */
+ ZSTD_ldm_limitTableUpdate(ms, ip);
+ ZSTD_ldm_fillFastTables(ms, cParams, ip);
+ /* Run the block compressor */
+ {
+ size_t const newLitLength =
+ blockCompressor(ms, seqStore, rep, cParams, ip,
+ sequence.litLength);
+ ip += sequence.litLength;
+ ms->nextToUpdate = (U32)(ip - base);
+ /* Update the repcodes */
for (i = ZSTD_REP_NUM - 1; i > 0; i--)
- repToConfirm[i] = repToConfirm[i-1];
- repToConfirm[0] = offset;
-
- /* Store the sequence with the leftover literals */
- ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
- offset + ZSTD_REP_MOVE, mLength - MINMATCH);
- }
-
- /* Insert the current entry into the hash table */
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
- (U32)(lastHashed - base),
- ldmParams);
-
- /* Fill the hash table from lastHashed+1 to ip+mLength */
- assert(ip + backwardMatchLength == lastHashed);
- if (ip + mLength < ilimit) {
- rollingHash = ZSTD_ldm_fillLdmHashTable(
- ldmState, rollingHash, lastHashed,
- ip + mLength, base, hBits,
- ldmParams);
- lastHashed = ip + mLength - 1;
- }
- ip += mLength;
- anchor = ip;
-
- /* check immediate repcode */
- while (ip < ilimit) {
- U32 const current2 = (U32)(ip-base);
- U32 const repIndex2 = current2 - repToConfirm[1];
- const BYTE* repMatch2 = repIndex2 < dictLimit ?
- dictBase + repIndex2 : base + repIndex2;
- if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
- (repIndex2 > lowestIndex)) /* intentional overflow */
- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
- const BYTE* const repEnd2 = repIndex2 < dictLimit ?
- dictEnd : iend;
- size_t const repLength2 =
- ZSTD_count_2segments(ip+4, repMatch2+4, iend,
- repEnd2, lowPrefixPtr) + 4;
-
- U32 tmpOffset = repToConfirm[1];
- repToConfirm[1] = repToConfirm[0];
- repToConfirm[0] = tmpOffset;
-
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
-
- /* Fill the hash table from lastHashed+1 to ip+repLength2*/
- if (ip + repLength2 < ilimit) {
- rollingHash = ZSTD_ldm_fillLdmHashTable(
- ldmState, rollingHash, lastHashed,
- ip + repLength2, base, hBits,
- ldmParams);
- lastHashed = ip + repLength2 - 1;
- }
- ip += repLength2;
- anchor = ip;
- continue;
- }
- break;
+ rep[i] = rep[i-1];
+ rep[0] = sequence.offset;
+ /* Store the sequence */
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
+ sequence.offset + ZSTD_REP_MOVE,
+ sequence.matchLength - MINMATCH);
+ ip += sequence.matchLength;
}
}
-
- /* Overwrite rep */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = repToConfirm[i];
-
- ZSTD_ldm_limitTableUpdate(ctx, anchor);
- ZSTD_ldm_fillFastTables(ctx, anchor);
-
- /* Call the block compressor one last time on the last literals */
- lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
- ctx->nextToUpdate = (U32)(iend - base);
-
- /* Restore seqStorePtr->rep */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = savedRep[i];
-
- /* Return the last literals size */
- return lastLiterals;
-}
-
-size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
-{
- return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
+ /* Fill the tables for the block compressor */
+ ZSTD_ldm_limitTableUpdate(ms, ip);
+ ZSTD_ldm_fillFastTables(ms, cParams, ip);
+ /* Compress the last literals */
+ {
+ size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
+ ip, iend - ip);
+ ms->nextToUpdate = (U32)(iend - base);
+ return lastLiterals;
+ }
}
diff --git a/thirdparty/zstd/compress/zstd_ldm.h b/thirdparty/zstd/compress/zstd_ldm.h
index 8f12c677aa..0c3789ff13 100644
--- a/thirdparty/zstd/compress/zstd_ldm.h
+++ b/thirdparty/zstd/compress/zstd_ldm.h
@@ -22,32 +22,71 @@ extern "C" {
***************************************/
#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX
-#define ZSTD_LDM_HASHEVERYLOG_NOTSET 9999
-/** ZSTD_compressBlock_ldm_generic() :
+/**
+ * ZSTD_ldm_generateSequences():
*
- * This is a block compressor intended for long distance matching.
+ * Generates the sequences using the long distance match finder.
+ * Generates long range matching sequences in `sequences`, which parse a prefix
+ * of the source. `sequences` must be large enough to store every sequence,
+ * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
+ * @returns 0 or an error code.
*
- * The function searches for matches of length at least
- * ldmParams.minMatchLength using a hash table in cctx->ldmState.
- * Matches can be at a distance of up to cParams.windowLog.
+ * NOTE: The user must have called ZSTD_window_update() for all of the input
+ * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
+ * NOTE: This function returns an error if it runs out of space to store
+ * sequences.
+ */
+size_t ZSTD_ldm_generateSequences(
+ ldmState_t* ldms, rawSeqStore_t* sequences,
+ ldmParams_t const* params, void const* src, size_t srcSize);
+
+/**
+ * ZSTD_ldm_blockCompress():
+ *
+ * Compresses a block using the predefined sequences, along with a secondary
+ * block compressor. The literals section of every sequence is passed to the
+ * secondary block compressor, and those sequences are interspersed with the
+ * predefined sequences. Returns the length of the last literals.
+ * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
+ * `rawSeqStore.seq` may also be updated to split the last sequence between two
+ * blocks.
+ * @return The length of the last literals.
+ *
+ * NOTE: The source must be at most the maximum block size, but the predefined
+ * sequences can be any size, and may be longer than the block. In the case that
+ * they are longer than the block, the last sequences may need to be split into
+ * two. We handle that case correctly, and update `rawSeqStore` appropriately.
+ * NOTE: This function does not return any errors.
+ */
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams,
+ void const* src, size_t srcSize,
+ int const extDict);
+
+/**
+ * ZSTD_ldm_skipSequences():
*
- * Upon finding a match, the unmatched literals are compressed using a
- * ZSTD_blockCompressor (depending on the strategy in the compression
- * parameters), which stores the matched sequences. The "long distance"
- * match is then stored with the remaining literals from the
- * ZSTD_blockCompressor. */
-size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* cctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize);
+ * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
+ * Avoids emitting matches less than `minMatch` bytes.
+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
+ U32 const minMatch);
-/** ZSTD_ldm_initializeParameters() :
- * Initialize the long distance matching parameters to their default values. */
-size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm);
/** ZSTD_ldm_getTableSize() :
- * Estimate the space needed for long distance matching tables. */
-size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog);
+ * Estimate the space needed for long distance matching tables or 0 if LDM is
+ * disabled.
+ */
+size_t ZSTD_ldm_getTableSize(ldmParams_t params);
+
+/** ZSTD_ldm_getSeqSpace() :
+ * Return an upper bound on the number of sequences that can be produced by
+ * the long distance matcher, or 0 if LDM is disabled.
+ */
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
/** ZSTD_ldm_getTableSize() :
* Return prime8bytes^(minMatchLength-1) */
@@ -58,8 +97,12 @@ U64 ZSTD_ldm_getHashPower(U32 minMatchLength);
* windowLog and params->hashLog.
*
* Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
- * params->hashLog if it is not). */
-void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog);
+ * params->hashLog if it is not).
+ *
+ * Ensures that the minMatchLength >= targetLength during optimal parsing.
+ */
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+ ZSTD_compressionParameters const* cParams);
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstd_opt.c b/thirdparty/zstd/compress/zstd_opt.c
index 7171ff5373..f63f0c5852 100644
--- a/thirdparty/zstd/compress/zstd_opt.c
+++ b/thirdparty/zstd/compress/zstd_opt.c
@@ -10,7 +10,6 @@
#include "zstd_compress_internal.h"
#include "zstd_opt.h"
-#include "zstd_lazy.h" /* ZSTD_updateTree, ZSTD_updateTree_extDict */
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
@@ -244,14 +243,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
/* Update hashTable3 up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* const cctx, const BYTE* const ip)
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
{
- U32* const hashTable3 = cctx->hashTable3;
- U32 const hashLog3 = cctx->hashLog3;
- const BYTE* const base = cctx->base;
- U32 idx = cctx->nextToUpdate3;
- U32 const target = cctx->nextToUpdate3 = (U32)(ip - base);
+ U32* const hashTable3 = ms->hashTable3;
+ U32 const hashLog3 = ms->hashLog3;
+ const BYTE* const base = ms->window.base;
+ U32 idx = ms->nextToUpdate3;
+ U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
+ assert(hashLog3 > 0);
while(idx < target) {
hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
@@ -265,36 +265,173 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* const cctx, const BYTE*
/*-*************************************
* Binary Tree search
***************************************/
+/** ZSTD_insertBt1() : add one or multiple positions to tree.
+ * ip : assumed <= iend-8 .
+ * @return : nb of positions added */
+static U32 ZSTD_insertBt1(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* const ip, const BYTE* const iend,
+ U32 const mls, U32 const extDict)
+{
+ U32* const hashTable = ms->hashTable;
+ U32 const hashLog = cParams->hashLog;
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
+ U32* const bt = ms->chainTable;
+ U32 const btLog = cParams->chainLog - 1;
+ U32 const btMask = (1 << btLog) - 1;
+ U32 matchIndex = hashTable[h];
+ size_t commonLengthSmaller=0, commonLengthLarger=0;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
+ const U32 dictLimit = ms->window.dictLimit;
+ const BYTE* const dictEnd = dictBase + dictLimit;
+ const BYTE* const prefixStart = base + dictLimit;
+ const BYTE* match;
+ const U32 current = (U32)(ip-base);
+ const U32 btLow = btMask >= current ? 0 : current - btMask;
+ U32* smallerPtr = bt + 2*(current&btMask);
+ U32* largerPtr = smallerPtr + 1;
+ U32 dummy32; /* to be nullified at the end */
+ U32 const windowLow = ms->window.lowLimit;
+ U32 matchEndIdx = current+8+1;
+ size_t bestLength = 8;
+ U32 nbCompares = 1U << cParams->searchLog;
+#ifdef ZSTD_C_PREDICT
+ U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
+ U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
+ predictedSmall += (predictedSmall>0);
+ predictedLarge += (predictedLarge>0);
+#endif /* ZSTD_C_PREDICT */
+
+ DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
+
+ assert(ip <= iend-8); /* required for h calculation */
+ hashTable[h] = current; /* Update Hash Table */
+
+ while (nbCompares-- && (matchIndex > windowLow)) {
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
+ assert(matchIndex < current);
+
+#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
+ const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
+ if (matchIndex == predictedSmall) {
+ /* no need to check length, result known */
+ *smallerPtr = matchIndex;
+ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
+ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
+ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
+ predictedSmall = predictPtr[1] + (predictPtr[1]>0);
+ continue;
+ }
+ if (matchIndex == predictedLarge) {
+ *largerPtr = matchIndex;
+ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
+ largerPtr = nextPtr;
+ matchIndex = nextPtr[0];
+ predictedLarge = predictPtr[0] + (predictPtr[0]>0);
+ continue;
+ }
+#endif
+
+ if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
+ assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
+ match = base + matchIndex;
+ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+ } else {
+ match = dictBase + matchIndex;
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+ if (matchIndex+matchLength >= dictLimit)
+ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
+ }
+
+ if (matchLength > bestLength) {
+ bestLength = matchLength;
+ if (matchLength > matchEndIdx - matchIndex)
+ matchEndIdx = matchIndex + (U32)matchLength;
+ }
+
+ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
+ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+ }
+
+ if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
+ /* match is smaller than current */
+ *smallerPtr = matchIndex; /* update smaller idx */
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
+ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
+ smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
+ matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
+ } else {
+ /* match is larger than current */
+ *largerPtr = matchIndex;
+ commonLengthLarger = matchLength;
+ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
+ largerPtr = nextPtr;
+ matchIndex = nextPtr[0];
+ } }
+
+ *smallerPtr = *largerPtr = 0;
+ if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
+ assert(matchEndIdx > current + 8);
+ return matchEndIdx - (current + 8);
+}
+
+FORCE_INLINE_TEMPLATE
+void ZSTD_updateTree_internal(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* const ip, const BYTE* const iend,
+ const U32 mls, const U32 extDict)
+{
+ const BYTE* const base = ms->window.base;
+ U32 const target = (U32)(ip - base);
+ U32 idx = ms->nextToUpdate;
+ DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
+ idx, target, extDict);
+
+ while(idx < target)
+ idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, extDict);
+ ms->nextToUpdate = target;
+}
+
+void ZSTD_updateTree(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, const BYTE* iend)
+{
+ ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, 0 /*extDict*/);
+}
+
FORCE_INLINE_TEMPLATE
U32 ZSTD_insertBtAndGetAllMatches (
- ZSTD_CCtx* zc,
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iLimit, int const extDict,
- U32 nbCompares, U32 const mls, U32 const sufficient_len,
U32 rep[ZSTD_REP_NUM], U32 const ll0,
- ZSTD_match_t* matches, const U32 lengthToBeat)
+ ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
{
- const BYTE* const base = zc->base;
+ U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+ const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
- U32 const hashLog = zc->appliedParams.cParams.hashLog;
+ U32 const hashLog = cParams->hashLog;
U32 const minMatch = (mls==3) ? 3 : 4;
- U32* const hashTable = zc->hashTable;
+ U32* const hashTable = ms->hashTable;
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
U32 matchIndex = hashTable[h];
- U32* const bt = zc->chainTable;
- U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
+ U32* const bt = ms->chainTable;
+ U32 const btLog = cParams->chainLog - 1;
U32 const btMask= (1U << btLog) - 1;
size_t commonLengthSmaller=0, commonLengthLarger=0;
- const BYTE* const dictBase = zc->dictBase;
- U32 const dictLimit = zc->dictLimit;
+ const BYTE* const dictBase = ms->window.dictBase;
+ U32 const dictLimit = ms->window.dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
U32 const btLow = btMask >= current ? 0 : current - btMask;
- U32 const windowLow = zc->lowLimit;
+ U32 const windowLow = ms->window.lowLimit;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
U32 dummy32; /* to be nullified at the end */
U32 mnum = 0;
+ U32 nbCompares = 1U << cParams->searchLog;
size_t bestLength = lengthToBeat-1;
DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
@@ -335,7 +472,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
/* HC3 match finder */
if ((mls == 3) /*static*/ && (bestLength < mls)) {
- U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
+ U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
if ((matchIndex3 > windowLow)
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
size_t mlen;
@@ -359,7 +496,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
mnum = 1;
if ( (mlen > sufficient_len) |
(ip+mlen == iLimit) ) { /* best possible length */
- zc->nextToUpdate = current+1; /* skip insertion */
+ ms->nextToUpdate = current+1; /* skip insertion */
return 1;
} } } }
@@ -416,30 +553,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
*smallerPtr = *largerPtr = 0;
assert(matchEndIdx > current+8);
- zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
+ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
return mnum;
}
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
- ZSTD_CCtx* zc, /* Index table will be updated */
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
- U32 const maxNbAttempts, U32 const matchLengthSearch, U32 const sufficient_len,
U32 rep[ZSTD_REP_NUM], U32 const ll0,
ZSTD_match_t* matches, U32 const lengthToBeat)
{
+ U32 const matchLengthSearch = cParams->searchLength;
DEBUGLOG(7, "ZSTD_BtGetAllMatches");
- if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
- if (extDict) ZSTD_updateTree_extDict(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
- else ZSTD_updateTree(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
+ if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
+ ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict);
switch(matchLengthSearch)
{
- case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, sufficient_len, rep, ll0, matches, lengthToBeat);
+ case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 3);
default :
- case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, sufficient_len, rep, ll0, matches, lengthToBeat);
- case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, sufficient_len, rep, ll0, matches, lengthToBeat);
+ case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 4);
+ case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 5);
case 7 :
- case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, sufficient_len, rep, ll0, matches, lengthToBeat);
+ case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 6);
}
}
@@ -527,36 +663,33 @@ static int ZSTD_literalsContribution_cached(
}
FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
+size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore,
+ U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams,
const void* src, size_t srcSize,
const int optLevel, const int extDict)
{
- seqStore_t* const seqStorePtr = &(ctx->seqStore);
- optState_t* const optStatePtr = &(ctx->optState);
+ optState_t* const optStatePtr = &ms->opt;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- const BYTE* const base = ctx->base;
- const BYTE* const prefixStart = base + ctx->dictLimit;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const prefixStart = base + ms->window.dictLimit;
- U32 const maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
- U32 const sufficient_len = MIN(ctx->appliedParams.cParams.targetLength, ZSTD_OPT_NUM -1);
- U32 const mls = ctx->appliedParams.cParams.searchLength;
- U32 const minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
+ U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+ U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable;
cachedLiteralPrice_t cachedLitPrice;
- U32 rep[ZSTD_REP_NUM];
/* init */
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
- ctx->nextToUpdate3 = ctx->nextToUpdate;
+ ms->nextToUpdate3 = ms->nextToUpdate;
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
ip += (ip==prefixStart);
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
/* Match Loop */
@@ -567,7 +700,7 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
/* find first match */
{ U32 const litlen = (U32)(ip - anchor);
U32 const ll0 = !litlen;
- U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, sufficient_len, rep, ll0, matches, minMatch);
+ U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, extDict, rep, ll0, matches, minMatch);
if (!nbMatches) { ip++; continue; }
/* initialize opt[0] */
@@ -653,7 +786,7 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
- U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, sufficient_len, opt[cur].rep, ll0, matches, minMatch);
+ U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch);
U32 matchNb;
if (!nbMatches) continue;
@@ -749,37 +882,42 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
}
ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
- ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH);
+ ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH);
anchor = ip;
} }
ZSTD_setLog2Prices(optStatePtr);
} /* while (ip < ilimit) */
- /* Save reps for next block */
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
-
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btopt(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
}
-size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btultra(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
}
-size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btopt_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
}
-size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btultra_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
}
diff --git a/thirdparty/zstd/compress/zstd_opt.h b/thirdparty/zstd/compress/zstd_opt.h
index 82e810c293..b8dc389f31 100644
--- a/thirdparty/zstd/compress/zstd_opt.h
+++ b/thirdparty/zstd/compress/zstd_opt.h
@@ -15,13 +15,25 @@
extern "C" {
#endif
-#include "zstd.h" /* ZSTD_CCtx, size_t */
+#include "zstd_compress_internal.h"
-size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+void ZSTD_updateTree(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, const BYTE* iend); /* used in ZSTD_loadDictionaryContent() */
-size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+size_t ZSTD_compressBlock_btopt(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btopt_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstdmt_compress.c b/thirdparty/zstd/compress/zstdmt_compress.c
index e51edf124f..c7a205d8c7 100644
--- a/thirdparty/zstd/compress/zstdmt_compress.c
+++ b/thirdparty/zstd/compress/zstdmt_compress.c
@@ -10,7 +10,8 @@
/* ====== Tuning parameters ====== */
-#define ZSTDMT_NBTHREADS_MAX 200
+#define ZSTDMT_NBWORKERS_MAX 200
+#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
#define ZSTDMT_OVERLAPLOG_DEFAULT 6
@@ -22,11 +23,18 @@
/* ====== Dependencies ====== */
#include <string.h> /* memcpy, memset */
+#include <limits.h> /* INT_MAX */
#include "pool.h" /* threadpool */
#include "threading.h" /* mutex */
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
+#include "zstd_ldm.h"
#include "zstdmt_compress.h"
+/* Guards code to support resizing the SeqPool.
+ * We will want to resize the SeqPool to save memory in the future.
+ * Until then, comment the code out since it is unused.
+ */
+#define ZSTD_RESIZE_SEQPOOL 0
/* ====== Debug ====== */
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
@@ -81,7 +89,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
typedef struct buffer_s {
void* start;
- size_t size;
+ size_t capacity;
} buffer_t;
static const buffer_t g_nullBuffer = { NULL, 0 };
@@ -95,9 +103,9 @@ typedef struct ZSTDMT_bufferPool_s {
buffer_t bTable[1]; /* variable size */
} ZSTDMT_bufferPool;
-static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
+static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
{
- unsigned const maxNbBuffers = 2*nbThreads + 3;
+ unsigned const maxNbBuffers = 2*nbWorkers + 3;
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
if (bufPool==NULL) return NULL;
@@ -129,17 +137,21 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
{
size_t const poolSize = sizeof(*bufPool)
- + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
+ + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
unsigned u;
size_t totalBufferSize = 0;
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
for (u=0; u<bufPool->totalBuffers; u++)
- totalBufferSize += bufPool->bTable[u].size;
+ totalBufferSize += bufPool->bTable[u].capacity;
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return poolSize + totalBufferSize;
}
+/* ZSTDMT_setBufferSize() :
+ * all future buffers provided by this buffer pool will have _at least_ this size
+ * note : it's better for all buffers to have same size,
+ * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */
static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize)
{
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
@@ -149,7 +161,9 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
}
/** ZSTDMT_getBuffer() :
- * assumption : bufPool must be valid */
+ * assumption : bufPool must be valid
+ * @return : a buffer, with start pointer and size
+ * note: allocation may fail, in this case, start==NULL and size==0 */
static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
{
size_t const bSize = bufPool->bufferSize;
@@ -157,12 +171,12 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
if (bufPool->nbBuffers) { /* try to use an existing buffer */
buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
- size_t const availBufferSize = buf.size;
+ size_t const availBufferSize = buf.capacity;
bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
/* large enough, but not too much */
DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
- bufPool->nbBuffers, (U32)buf.size);
+ bufPool->nbBuffers, (U32)buf.capacity);
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return buf;
}
@@ -176,12 +190,42 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
{ buffer_t buffer;
void* const start = ZSTD_malloc(bSize, bufPool->cMem);
buffer.start = start; /* note : start can be NULL if malloc fails ! */
- buffer.size = (start==NULL) ? 0 : bSize;
- DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize);
+ buffer.capacity = (start==NULL) ? 0 : bSize;
+ if (start==NULL) {
+ DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!");
+ } else {
+ DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize);
+ }
return buffer;
}
}
+#if ZSTD_RESIZE_SEQPOOL
+/** ZSTDMT_resizeBuffer() :
+ * assumption : bufPool must be valid
+ * @return : a buffer that is at least the buffer pool buffer size.
+ * If a reallocation happens, the data in the input buffer is copied.
+ */
+static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
+{
+ size_t const bSize = bufPool->bufferSize;
+ if (buffer.capacity < bSize) {
+ void* const start = ZSTD_malloc(bSize, bufPool->cMem);
+ buffer_t newBuffer;
+ newBuffer.start = start;
+ newBuffer.capacity = start == NULL ? 0 : bSize;
+ if (start != NULL) {
+ assert(newBuffer.capacity >= buffer.capacity);
+ memcpy(newBuffer.start, buffer.start, buffer.capacity);
+ DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize);
+ return newBuffer;
+ }
+ DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!");
+ }
+ return buffer;
+}
+#endif
+
/* store buffer for later re-use, up to pool capacity */
static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
{
@@ -191,7 +235,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
if (bufPool->nbBuffers < bufPool->totalBuffers) {
bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
- (U32)buf.size, (U32)(bufPool->nbBuffers-1));
+ (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return;
}
@@ -201,21 +245,73 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
ZSTD_free(buf.start, bufPool->cMem);
}
-/* Sets parameters relevant to the compression job, initializing others to
- * default values. Notably, nbThreads should probably be zero. */
-static ZSTD_CCtx_params ZSTDMT_makeJobCCtxParams(ZSTD_CCtx_params const params)
+
+/* ===== Seq Pool Wrapper ====== */
+
+static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0};
+
+typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
+
+static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
+{
+ return ZSTDMT_sizeof_bufferPool(seqPool);
+}
+
+static rawSeqStore_t bufferToSeq(buffer_t buffer)
{
- ZSTD_CCtx_params jobParams;
- memset(&jobParams, 0, sizeof(jobParams));
+ rawSeqStore_t seq = {NULL, 0, 0, 0};
+ seq.seq = (rawSeq*)buffer.start;
+ seq.capacity = buffer.capacity / sizeof(rawSeq);
+ return seq;
+}
- jobParams.cParams = params.cParams;
- jobParams.fParams = params.fParams;
- jobParams.compressionLevel = params.compressionLevel;
+static buffer_t seqToBuffer(rawSeqStore_t seq)
+{
+ buffer_t buffer;
+ buffer.start = seq.seq;
+ buffer.capacity = seq.capacity * sizeof(rawSeq);
+ return buffer;
+}
- jobParams.ldmParams = params.ldmParams;
- return jobParams;
+static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
+{
+ if (seqPool->bufferSize == 0) {
+ return kNullRawSeqStore;
+ }
+ return bufferToSeq(ZSTDMT_getBuffer(seqPool));
}
+#if ZSTD_RESIZE_SEQPOOL
+static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
+{
+ return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq)));
+}
+#endif
+
+static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
+{
+ ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq));
+}
+
+static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
+{
+ ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq));
+}
+
+static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
+{
+ ZSTDMT_seqPool* seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
+ ZSTDMT_setNbSeq(seqPool, 0);
+ return seqPool;
+}
+
+static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
+{
+ ZSTDMT_freeBufferPool(seqPool);
+}
+
+
+
/* ===== CCtx Pool ===== */
/* a single CCtx Pool can be invoked from multiple threads in parallel */
@@ -238,23 +334,24 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
}
/* ZSTDMT_createCCtxPool() :
- * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
-static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
+ * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
+static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
ZSTD_customMem cMem)
{
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
- sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
+ sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
+ assert(nbWorkers > 0);
if (!cctxPool) return NULL;
if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
ZSTD_free(cctxPool, cMem);
return NULL;
}
cctxPool->cMem = cMem;
- cctxPool->totalCCtx = nbThreads;
+ cctxPool->totalCCtx = nbWorkers;
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
- DEBUGLOG(3, "cctxPool created, with %u threads", nbThreads);
+ DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
return cctxPool;
}
@@ -262,15 +359,16 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
{
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
- { unsigned const nbThreads = cctxPool->totalCCtx;
+ { unsigned const nbWorkers = cctxPool->totalCCtx;
size_t const poolSize = sizeof(*cctxPool)
- + (nbThreads-1)*sizeof(ZSTD_CCtx*);
+ + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
unsigned u;
size_t totalCCtxSize = 0;
- for (u=0; u<nbThreads; u++) {
+ for (u=0; u<nbWorkers; u++) {
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
}
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
+ assert(nbWorkers > 0);
return poolSize + totalCCtxSize;
}
}
@@ -297,111 +395,318 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
if (pool->availCCtx < pool->totalCCtx)
pool->cctx[pool->availCCtx++] = cctx;
else {
- /* pool overflow : should not happen, since totalCCtx==nbThreads */
- DEBUGLOG(5, "CCtx pool overflow : free cctx");
+ /* pool overflow : should not happen, since totalCCtx==nbWorkers */
+ DEBUGLOG(4, "CCtx pool overflow : free cctx");
ZSTD_freeCCtx(cctx);
}
ZSTD_pthread_mutex_unlock(&pool->poolMutex);
}
+/* ==== Serial State ==== */
-/* ===== Thread worker ===== */
+typedef struct {
+ void const* start;
+ size_t size;
+} range_t;
typedef struct {
- buffer_t src;
- const void* srcStart;
- size_t prefixSize;
- size_t srcSize;
- buffer_t dstBuff;
- size_t cSize;
- size_t dstFlushed;
- unsigned firstChunk;
- unsigned lastChunk;
- unsigned jobCompleted;
- unsigned jobScanned;
- ZSTD_pthread_mutex_t* jobCompleted_mutex;
- ZSTD_pthread_cond_t* jobCompleted_cond;
+ /* All variables in the struct are protected by mutex. */
+ ZSTD_pthread_mutex_t mutex;
+ ZSTD_pthread_cond_t cond;
ZSTD_CCtx_params params;
- const ZSTD_CDict* cdict;
- ZSTDMT_CCtxPool* cctxPool;
- ZSTDMT_bufferPool* bufPool;
- unsigned long long fullFrameSize;
+ ldmState_t ldmState;
+ XXH64_state_t xxhState;
+ unsigned nextJobID;
+ /* Protects ldmWindow.
+ * Must be acquired after the main mutex when acquiring both.
+ */
+ ZSTD_pthread_mutex_t ldmWindowMutex;
+ ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is udpated */
+ ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
+} serialState_t;
+
+static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params)
+{
+ /* Adjust parameters */
+ if (params.ldmParams.enableLdm) {
+ DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
+ params.ldmParams.windowLog = params.cParams.windowLog;
+ ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
+ assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
+ assert(params.ldmParams.hashEveryLog < 32);
+ serialState->ldmState.hashPower =
+ ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
+ } else {
+ memset(&params.ldmParams, 0, sizeof(params.ldmParams));
+ }
+ serialState->nextJobID = 0;
+ if (params.fParams.checksumFlag)
+ XXH64_reset(&serialState->xxhState, 0);
+ if (params.ldmParams.enableLdm) {
+ ZSTD_customMem cMem = params.customMem;
+ unsigned const hashLog = params.ldmParams.hashLog;
+ size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
+ unsigned const bucketLog =
+ params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
+ size_t const bucketSize = (size_t)1 << bucketLog;
+ unsigned const prevBucketLog =
+ serialState->params.ldmParams.hashLog -
+ serialState->params.ldmParams.bucketSizeLog;
+ /* Size the seq pool tables */
+ ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, params.jobSize));
+ /* Reset the window */
+ ZSTD_window_clear(&serialState->ldmState.window);
+ serialState->ldmWindow = serialState->ldmState.window;
+ /* Resize tables and output space if necessary. */
+ if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
+ ZSTD_free(serialState->ldmState.hashTable, cMem);
+ serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem);
+ }
+ if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
+ ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
+ serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem);
+ }
+ if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
+ return 1;
+ /* Zero the tables */
+ memset(serialState->ldmState.hashTable, 0, hashSize);
+ memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
+ }
+ serialState->params = params;
+ return 0;
+}
+
+static int ZSTDMT_serialState_init(serialState_t* serialState)
+{
+ int initError = 0;
+ memset(serialState, 0, sizeof(*serialState));
+ initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL);
+ initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL);
+ initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL);
+ initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL);
+ return initError;
+}
+
+static void ZSTDMT_serialState_free(serialState_t* serialState)
+{
+ ZSTD_customMem cMem = serialState->params.customMem;
+ ZSTD_pthread_mutex_destroy(&serialState->mutex);
+ ZSTD_pthread_cond_destroy(&serialState->cond);
+ ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex);
+ ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond);
+ ZSTD_free(serialState->ldmState.hashTable, cMem);
+ ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
+}
+
+static void ZSTDMT_serialState_update(serialState_t* serialState,
+ ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore,
+ range_t src, unsigned jobID)
+{
+ /* Wait for our turn */
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
+ while (serialState->nextJobID < jobID) {
+ ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex);
+ }
+ /* A future job may error and skip our job */
+ if (serialState->nextJobID == jobID) {
+ /* It is now our turn, do any processing necessary */
+ if (serialState->params.ldmParams.enableLdm) {
+ size_t error;
+ assert(seqStore.seq != NULL && seqStore.pos == 0 &&
+ seqStore.size == 0 && seqStore.capacity > 0);
+ ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
+ error = ZSTD_ldm_generateSequences(
+ &serialState->ldmState, &seqStore,
+ &serialState->params.ldmParams, src.start, src.size);
+ /* We provide a large enough buffer to never fail. */
+ assert(!ZSTD_isError(error)); (void)error;
+ /* Update ldmWindow to match the ldmState.window and signal the main
+ * thread if it is waiting for a buffer.
+ */
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
+ serialState->ldmWindow = serialState->ldmState.window;
+ ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
+ ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
+ }
+ if (serialState->params.fParams.checksumFlag && src.size > 0)
+ XXH64_update(&serialState->xxhState, src.start, src.size);
+ }
+ /* Now it is the next jobs turn */
+ serialState->nextJobID++;
+ ZSTD_pthread_cond_broadcast(&serialState->cond);
+ ZSTD_pthread_mutex_unlock(&serialState->mutex);
+
+ if (seqStore.size > 0) {
+ size_t const err = ZSTD_referenceExternalSequences(
+ jobCCtx, seqStore.seq, seqStore.size);
+ assert(serialState->params.ldmParams.enableLdm);
+ assert(!ZSTD_isError(err));
+ (void)err;
+ }
+}
+
+static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState,
+ unsigned jobID, size_t cSize)
+{
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
+ if (serialState->nextJobID <= jobID) {
+ assert(ZSTD_isError(cSize)); (void)cSize;
+ DEBUGLOG(5, "Skipping past job %u because of error", jobID);
+ serialState->nextJobID = jobID + 1;
+ ZSTD_pthread_cond_broadcast(&serialState->cond);
+
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
+ ZSTD_window_clear(&serialState->ldmWindow);
+ ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
+ ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
+ }
+ ZSTD_pthread_mutex_unlock(&serialState->mutex);
+
+}
+
+
+/* ------------------------------------------ */
+/* ===== Worker thread ===== */
+/* ------------------------------------------ */
+
+static const range_t kNullRange = { NULL, 0 };
+
+typedef struct {
+ size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
+ size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
+ ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */
+ ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */
+ ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */
+ ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */
+ ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */
+ serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */
+ buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
+ range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */
+ range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */
+ unsigned jobID; /* set by mtctx, then read by worker => no barrier */
+ unsigned firstJob; /* set by mtctx, then read by worker => no barrier */
+ unsigned lastJob; /* set by mtctx, then read by worker => no barrier */
+ ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */
+ const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */
+ unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */
+ size_t dstFlushed; /* used only by mtctx */
+ unsigned frameChecksumNeeded; /* used only by mtctx */
} ZSTDMT_jobDescription;
-/* ZSTDMT_compressChunk() : POOL_function type */
-void ZSTDMT_compressChunk(void* jobDescription)
+/* ZSTDMT_compressionJob() is a POOL_function type */
+void ZSTDMT_compressionJob(void* jobDescription)
{
ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
+ ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
- const void* const src = (const char*)job->srcStart + job->prefixSize;
+ rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
buffer_t dstBuff = job->dstBuff;
- DEBUGLOG(5, "ZSTDMT_compressChunk: job (first:%u) (last:%u) : prefixSize %u, srcSize %u ",
- job->firstChunk, job->lastChunk, (U32)job->prefixSize, (U32)job->srcSize);
+ /* Don't compute the checksum for chunks, since we compute it externally,
+ * but write it in the header.
+ */
+ if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
+ /* Don't run LDM for the chunks, since we handle it externally */
+ jobParams.ldmParams.enableLdm = 0;
+
+ /* ressources */
if (cctx==NULL) {
job->cSize = ERROR(memory_allocation);
goto _endJob;
}
-
- if (dstBuff.start == NULL) {
+ if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */
dstBuff = ZSTDMT_getBuffer(job->bufPool);
if (dstBuff.start==NULL) {
job->cSize = ERROR(memory_allocation);
goto _endJob;
}
- job->dstBuff = dstBuff;
- DEBUGLOG(5, "ZSTDMT_compressChunk: received dstBuff of size %u", (U32)dstBuff.size);
+ job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
}
+ /* init */
if (job->cdict) {
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dm_auto, job->cdict, job->params, job->fullFrameSize);
- DEBUGLOG(4, "ZSTDMT_compressChunk: init using CDict (windowLog=%u)", job->params.cParams.windowLog);
- assert(job->firstChunk); /* only allowed for first job */
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
+ assert(job->firstJob); /* only allowed for first job */
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
} else { /* srcStart points at reloaded section */
- U64 const pledgedSrcSize = job->firstChunk ? job->fullFrameSize : ZSTD_CONTENTSIZE_UNKNOWN;
- ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
- size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstChunk);
- if (ZSTD_isError(forceWindowError)) {
- DEBUGLOG(5, "ZSTD_CCtxParam_setParameter error : %s ", ZSTD_getErrorName(forceWindowError));
- job->cSize = forceWindowError;
- goto _endJob;
- }
- DEBUGLOG(5, "ZSTDMT_compressChunk: invoking ZSTD_compressBegin_advanced_internal with windowLog = %u ", jobParams.cParams.windowLog);
+ U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
+ { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
+ if (ZSTD_isError(forceWindowError)) {
+ job->cSize = forceWindowError;
+ goto _endJob;
+ } }
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
- job->srcStart, job->prefixSize, ZSTD_dm_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
- NULL,
+ job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
+ NULL, /*cdict*/
jobParams, pledgedSrcSize);
if (ZSTD_isError(initError)) {
- DEBUGLOG(5, "ZSTD_compressBegin_advanced_internal error : %s ", ZSTD_getErrorName(initError));
job->cSize = initError;
goto _endJob;
- } }
- }
- if (!job->firstChunk) { /* flush and overwrite frame header when it's not first job */
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
+ } } }
+
+ /* Perform serial step as early as possible, but after CCtx initialization */
+ ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
+
+ if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
+ size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; }
+ DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
ZSTD_invalidateRepCodes(cctx);
}
- DEBUGLOG(5, "Compressing into dstBuff of size %u", (U32)dstBuff.size);
- DEBUG_PRINTHEX(6, job->srcStart, 12);
- job->cSize = (job->lastChunk) ?
- ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
- ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
- DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u) ",
- (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
- DEBUGLOG(5, "dstBuff.size : %u ; => %s ", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
+ /* compress */
+ { size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX;
+ int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize);
+ const BYTE* ip = (const BYTE*) job->src.start;
+ BYTE* const ostart = (BYTE*)dstBuff.start;
+ BYTE* op = ostart;
+ BYTE* oend = op + dstBuff.capacity;
+ int chunkNb;
+ if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); /* check overflow */
+ DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
+ assert(job->cSize == 0);
+ for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
+ size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
+ if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
+ ip += chunkSize;
+ op += cSize; assert(op < oend);
+ /* stats */
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
+ job->cSize += cSize;
+ job->consumed = chunkSize * chunkNb;
+ DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)",
+ (U32)cSize, (U32)job->cSize);
+ ZSTD_pthread_cond_signal(&job->job_cond); /* warns some more data is ready to be flushed */
+ ZSTD_pthread_mutex_unlock(&job->job_mutex);
+ }
+ /* last block */
+ assert(chunkSize > 0); assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */
+ if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) {
+ size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
+ size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
+ size_t const cSize = (job->lastJob) ?
+ ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
+ ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
+ if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
+ /* stats */
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
+ job->cSize += cSize;
+ ZSTD_pthread_mutex_unlock(&job->job_mutex);
+ } }
_endJob:
+ ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
+ if (job->prefix.size > 0)
+ DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start);
+ DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start);
+ /* release resources */
+ ZSTDMT_releaseSeq(job->seqPool, rawSeqStore);
ZSTDMT_releaseCCtx(job->cctxPool, cctx);
- ZSTDMT_releaseBuffer(job->bufPool, job->src);
- job->src = g_nullBuffer; job->srcStart = NULL;
- ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
- job->jobCompleted = 1;
- job->jobScanned = 0;
- ZSTD_pthread_cond_signal(job->jobCompleted_cond);
- ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex);
+ /* report */
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
+ job->consumed = job->src.size;
+ ZSTD_pthread_cond_signal(&job->job_cond);
+ ZSTD_pthread_mutex_unlock(&job->job_mutex);
}
@@ -410,109 +715,141 @@ _endJob:
/* ------------------------------------------ */
typedef struct {
+ range_t prefix; /* read-only non-owned prefix buffer */
buffer_t buffer;
size_t filled;
} inBuff_t;
+typedef struct {
+ BYTE* buffer; /* The round input buffer. All jobs get references
+ * to pieces of the buffer. ZSTDMT_tryGetInputRange()
+ * handles handing out job input buffers, and makes
+ * sure it doesn't overlap with any pieces still in use.
+ */
+ size_t capacity; /* The capacity of buffer. */
+ size_t pos; /* The position of the current inBuff in the round
+ * buffer. Updated past the end if the inBuff once
+ * the inBuff is sent to the worker thread.
+ * pos <= capacity.
+ */
+} roundBuff_t;
+
+static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
+
struct ZSTDMT_CCtx_s {
POOL_ctx* factory;
ZSTDMT_jobDescription* jobs;
ZSTDMT_bufferPool* bufPool;
ZSTDMT_CCtxPool* cctxPool;
- ZSTD_pthread_mutex_t jobCompleted_mutex;
- ZSTD_pthread_cond_t jobCompleted_cond;
+ ZSTDMT_seqPool* seqPool;
ZSTD_CCtx_params params;
size_t targetSectionSize;
- size_t inBuffSize;
- size_t dictSize;
- size_t targetDictSize;
+ size_t targetPrefixSize;
+ roundBuff_t roundBuff;
inBuff_t inBuff;
- XXH64_state_t xxhState;
- unsigned singleThreaded;
+ int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create another one. */
+ serialState_t serial;
+ unsigned singleBlockingThread;
unsigned jobIDMask;
unsigned doneJobID;
unsigned nextJobID;
unsigned frameEnded;
unsigned allJobsCompleted;
unsigned long long frameContentSize;
+ unsigned long long consumed;
+ unsigned long long produced;
ZSTD_customMem cMem;
ZSTD_CDict* cdictLocal;
const ZSTD_CDict* cdict;
};
-static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
+static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem)
+{
+ U32 jobNb;
+ if (jobTable == NULL) return;
+ for (jobNb=0; jobNb<nbJobs; jobNb++) {
+ ZSTD_pthread_mutex_destroy(&jobTable[jobNb].job_mutex);
+ ZSTD_pthread_cond_destroy(&jobTable[jobNb].job_cond);
+ }
+ ZSTD_free(jobTable, cMem);
+}
+
+/* ZSTDMT_allocJobsTable()
+ * allocate and init a job table.
+ * update *nbJobsPtr to next power of 2 value, as size of table */
+static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
{
U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
U32 const nbJobs = 1 << nbJobsLog2;
+ U32 jobNb;
+ ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*)
+ ZSTD_calloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
+ int initError = 0;
+ if (jobTable==NULL) return NULL;
*nbJobsPtr = nbJobs;
- return (ZSTDMT_jobDescription*) ZSTD_calloc(
- nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
+ for (jobNb=0; jobNb<nbJobs; jobNb++) {
+ initError |= ZSTD_pthread_mutex_init(&jobTable[jobNb].job_mutex, NULL);
+ initError |= ZSTD_pthread_cond_init(&jobTable[jobNb].job_cond, NULL);
+ }
+ if (initError != 0) {
+ ZSTDMT_freeJobsTable(jobTable, nbJobs, cMem);
+ return NULL;
+ }
+ return jobTable;
}
-/* ZSTDMT_CCtxParam_setNbThreads():
+/* ZSTDMT_CCtxParam_setNbWorkers():
* Internal use only */
-size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads)
+size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
{
- if (nbThreads > ZSTDMT_NBTHREADS_MAX) nbThreads = ZSTDMT_NBTHREADS_MAX;
- if (nbThreads < 1) nbThreads = 1;
- params->nbThreads = nbThreads;
+ if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
+ params->nbWorkers = nbWorkers;
params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT;
params->jobSize = 0;
- return nbThreads;
-}
-
-/* ZSTDMT_getNbThreads():
- * @return nb threads currently active in mtctx.
- * mtctx must be valid */
-size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx)
-{
- assert(mtctx != NULL);
- return mtctx->params.nbThreads;
+ return nbWorkers;
}
-ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
+ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
{
ZSTDMT_CCtx* mtctx;
- U32 nbJobs = nbThreads + 2;
- DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbThreads = %u)", nbThreads);
+ U32 nbJobs = nbWorkers + 2;
+ int initError;
+ DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers);
- if (nbThreads < 1) return NULL;
- nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX);
+ if (nbWorkers < 1) return NULL;
+ nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX);
if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
/* invalid custom allocator */
return NULL;
mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
if (!mtctx) return NULL;
- ZSTDMT_CCtxParam_setNbThreads(&mtctx->params, nbThreads);
+ ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
mtctx->cMem = cMem;
mtctx->allJobsCompleted = 1;
- mtctx->factory = POOL_create_advanced(nbThreads, 0, cMem);
- mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
+ mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
+ mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
+ assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
mtctx->jobIDMask = nbJobs - 1;
- mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem);
- mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
- if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool) {
+ mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
+ mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
+ mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
+ initError = ZSTDMT_serialState_init(&mtctx->serial);
+ mtctx->roundBuff = kNullRoundBuff;
+ if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) {
ZSTDMT_freeCCtx(mtctx);
return NULL;
}
- if (ZSTD_pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL)) {
- ZSTDMT_freeCCtx(mtctx);
- return NULL;
- }
- if (ZSTD_pthread_cond_init(&mtctx->jobCompleted_cond, NULL)) {
- ZSTDMT_freeCCtx(mtctx);
- return NULL;
- }
- DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
+ DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers);
return mtctx;
}
-ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads)
+ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
{
- return ZSTDMT_createCCtx_advanced(nbThreads, ZSTD_defaultCMem);
+ return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
}
+
/* ZSTDMT_releaseAllJobResources() :
* note : ensure all workers are killed first ! */
static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
@@ -523,29 +860,26 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
mtctx->jobs[jobID].dstBuff = g_nullBuffer;
- DEBUGLOG(4, "job%02u: release src address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].src.start);
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].src);
- mtctx->jobs[jobID].src = g_nullBuffer;
+ mtctx->jobs[jobID].cSize = 0;
}
memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
- DEBUGLOG(4, "input: release address %08X", (U32)(size_t)mtctx->inBuff.buffer.start);
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer);
mtctx->inBuff.buffer = g_nullBuffer;
+ mtctx->inBuff.filled = 0;
mtctx->allJobsCompleted = 1;
}
-static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
+static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
{
DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
- while (zcs->doneJobID < zcs->nextJobID) {
- unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
- ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
- while (zcs->jobs[jobID].jobCompleted==0) {
- DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
- ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
+ while (mtctx->doneJobID < mtctx->nextJobID) {
+ unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
+ while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
+ DEBUGLOG(5, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */
+ ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
}
- ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
- zcs->doneJobID++;
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
+ mtctx->doneJobID++;
}
}
@@ -554,12 +888,14 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
if (mtctx==NULL) return 0; /* compatible with free on NULL */
POOL_free(mtctx->factory); /* stop and free worker threads */
ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */
- ZSTD_free(mtctx->jobs, mtctx->cMem);
+ ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
ZSTDMT_freeBufferPool(mtctx->bufPool);
ZSTDMT_freeCCtxPool(mtctx->cctxPool);
+ ZSTDMT_freeSeqPool(mtctx->seqPool);
+ ZSTDMT_serialState_free(&mtctx->serial);
ZSTD_freeCDict(mtctx->cdictLocal);
- ZSTD_pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
- ZSTD_pthread_cond_destroy(&mtctx->jobCompleted_cond);
+ if (mtctx->roundBuff.buffer)
+ ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
ZSTD_free(mtctx, mtctx->cMem);
return 0;
}
@@ -572,7 +908,9 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
+ ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
+ (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
+ ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
- + ZSTD_sizeof_CDict(mtctx->cdictLocal);
+ + ZSTDMT_sizeof_seqPool(mtctx->seqPool)
+ + ZSTD_sizeof_CDict(mtctx->cdictLocal)
+ + mtctx->roundBuff.capacity;
}
/* Internal only */
@@ -612,133 +950,224 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
}
}
+/* Sets parameters relevant to the compression job,
+ * initializing others to default values. */
+static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
+{
+ ZSTD_CCtx_params jobParams;
+ memset(&jobParams, 0, sizeof(jobParams));
+
+ jobParams.cParams = params.cParams;
+ jobParams.fParams = params.fParams;
+ jobParams.compressionLevel = params.compressionLevel;
+ jobParams.disableLiteralCompression = params.disableLiteralCompression;
+
+ return jobParams;
+}
+
+/*! ZSTDMT_updateCParams_whileCompressing() :
+ * Updates only a selected set of compression parameters, to remain compatible with current frame.
+ * New parameters will be applied to next compression job. */
+void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams)
+{
+ U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */
+ int const compressionLevel = cctxParams->compressionLevel;
+ DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
+ compressionLevel);
+ mtctx->params.compressionLevel = compressionLevel;
+ { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0);
+ cParams.windowLog = saved_wlog;
+ mtctx->params.cParams = cParams;
+ }
+}
+
+/* ZSTDMT_getNbWorkers():
+ * @return nb threads currently active in mtctx.
+ * mtctx must be valid */
+unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx)
+{
+ assert(mtctx != NULL);
+ return mtctx->params.nbWorkers;
+}
+
+/* ZSTDMT_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads.
+ * Note : mutex will be acquired during statistics collection. */
+ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
+{
+ ZSTD_frameProgression fps;
+ DEBUGLOG(6, "ZSTDMT_getFrameProgression");
+ fps.consumed = mtctx->consumed;
+ fps.produced = mtctx->produced;
+ fps.ingested = mtctx->consumed + mtctx->inBuff.filled;
+ { unsigned jobNb;
+ unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
+ DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
+ mtctx->doneJobID, lastJobNb, mtctx->jobReady)
+ for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
+ unsigned const wJobID = jobNb & mtctx->jobIDMask;
+ ZSTD_pthread_mutex_lock(&mtctx->jobs[wJobID].job_mutex);
+ { size_t const cResult = mtctx->jobs[wJobID].cSize;
+ size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
+ fps.consumed += mtctx->jobs[wJobID].consumed;
+ fps.ingested += mtctx->jobs[wJobID].src.size;
+ fps.produced += produced;
+ }
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
+ }
+ }
+ return fps;
+}
+
+
/* ------------------------------------------ */
/* ===== Multi-threaded compression ===== */
/* ------------------------------------------ */
-static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
- size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
- size_t const chunkMaxSize = chunkSizeTarget << 2;
- size_t const passSizeMax = chunkMaxSize * nbThreads;
- unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
- unsigned const nbChunksLarge = multiplier * nbThreads;
- unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
- unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
- return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
+static size_t ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
+{
+ if (params.ldmParams.enableLdm)
+ return MAX(21, params.cParams.chainLog + 4);
+ return MAX(20, params.cParams.windowLog + 2);
+}
+
+static size_t ZSTDMT_computeOverlapLog(ZSTD_CCtx_params const params)
+{
+ unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
+ if (params.ldmParams.enableLdm)
+ return (MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog);
+ return overlapRLog >= 9 ? 0 : (params.cParams.windowLog - overlapRLog);
}
+static unsigned ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) {
+ assert(nbWorkers>0);
+ { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
+ size_t const jobMaxSize = jobSizeTarget << 2;
+ size_t const passSizeMax = jobMaxSize * nbWorkers;
+ unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
+ unsigned const nbJobsLarge = multiplier * nbWorkers;
+ unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1;
+ unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers);
+ return (multiplier>1) ? nbJobsLarge : nbJobsSmall;
+} }
+
+/* ZSTDMT_compress_advanced_internal() :
+ * This is a blocking function : it will only give back control to caller after finishing its compression job.
+ */
static size_t ZSTDMT_compress_advanced_internal(
ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params const params)
+ ZSTD_CCtx_params params)
{
- ZSTD_CCtx_params const jobParams = ZSTDMT_makeJobCCtxParams(params);
- unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
- size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
- unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, params.nbThreads);
- size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
- size_t const avgChunkSize = (((proposedChunkSize-1) & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
+ ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
+ size_t const overlapSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
+ unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
+ size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
+ size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
const char* const srcStart = (const char*)src;
size_t remainingSrcSize = srcSize;
- unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
+ unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize)); /* presumes avgJobSize >= 256 KB, which should be the case */
size_t frameStartPos = 0, dstBufferPos = 0;
- XXH64_state_t xxh64;
- assert(jobParams.nbThreads == 0);
- assert(mtctx->cctxPool->totalCCtx == params.nbThreads);
+ assert(jobParams.nbWorkers == 0);
+ assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
- DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbChunks=%2u (rawSize=%u bytes; fixedSize=%u) ",
- nbChunks, (U32)proposedChunkSize, (U32)avgChunkSize);
- if (nbChunks==1) { /* fallback to single-thread mode */
+ params.jobSize = (U32)avgJobSize;
+ DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ",
+ nbJobs, (U32)proposedJobSize, (U32)avgJobSize);
+
+ if ((nbJobs==1) | (params.nbWorkers<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
+ DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
}
- assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */
- ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) );
- XXH64_reset(&xxh64, 0);
- if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
- U32 nbJobs = nbChunks;
- ZSTD_free(mtctx->jobs, mtctx->cMem);
+ assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
+ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
+ return ERROR(memory_allocation);
+
+ if (nbJobs > mtctx->jobIDMask+1) { /* enlarge job table */
+ U32 jobsTableSize = nbJobs;
+ ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
mtctx->jobIDMask = 0;
- mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
+ mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem);
if (mtctx->jobs==NULL) return ERROR(memory_allocation);
- mtctx->jobIDMask = nbJobs - 1;
+ assert((jobsTableSize != 0) && ((jobsTableSize & (jobsTableSize - 1)) == 0)); /* ensure jobsTableSize is a power of 2 */
+ mtctx->jobIDMask = jobsTableSize - 1;
}
{ unsigned u;
- for (u=0; u<nbChunks; u++) {
- size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
- size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
+ for (u=0; u<nbJobs; u++) {
+ size_t const jobSize = MIN(remainingSrcSize, avgJobSize);
+ size_t const dstBufferCapacity = ZSTD_compressBound(jobSize);
buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
size_t dictSize = u ? overlapSize : 0;
- mtctx->jobs[u].src = g_nullBuffer;
- mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
- mtctx->jobs[u].prefixSize = dictSize;
- mtctx->jobs[u].srcSize = chunkSize;
+ mtctx->jobs[u].prefix.start = srcStart + frameStartPos - dictSize;
+ mtctx->jobs[u].prefix.size = dictSize;
+ mtctx->jobs[u].src.start = srcStart + frameStartPos;
+ mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0); /* avoid job.src.size == 0 */
+ mtctx->jobs[u].consumed = 0;
+ mtctx->jobs[u].cSize = 0;
mtctx->jobs[u].cdict = (u==0) ? cdict : NULL;
mtctx->jobs[u].fullFrameSize = srcSize;
mtctx->jobs[u].params = jobParams;
/* do not calculate checksum within sections, but write it in header for first section */
- if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
mtctx->jobs[u].dstBuff = dstBuffer;
mtctx->jobs[u].cctxPool = mtctx->cctxPool;
mtctx->jobs[u].bufPool = mtctx->bufPool;
- mtctx->jobs[u].firstChunk = (u==0);
- mtctx->jobs[u].lastChunk = (u==nbChunks-1);
- mtctx->jobs[u].jobCompleted = 0;
- mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
- mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
-
- if (params.fParams.checksumFlag) {
- XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
- }
+ mtctx->jobs[u].seqPool = mtctx->seqPool;
+ mtctx->jobs[u].serial = &mtctx->serial;
+ mtctx->jobs[u].jobID = u;
+ mtctx->jobs[u].firstJob = (u==0);
+ mtctx->jobs[u].lastJob = (u==nbJobs-1);
- DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)chunkSize);
- DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
- POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
+ DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)jobSize);
+ DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12);
+ POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]);
- frameStartPos += chunkSize;
+ frameStartPos += jobSize;
dstBufferPos += dstBufferCapacity;
- remainingSrcSize -= chunkSize;
+ remainingSrcSize -= jobSize;
} }
/* collect result */
{ size_t error = 0, dstPos = 0;
- unsigned chunkID;
- for (chunkID=0; chunkID<nbChunks; chunkID++) {
- DEBUGLOG(5, "waiting for chunk %u ", chunkID);
- ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
- while (mtctx->jobs[chunkID].jobCompleted==0) {
- DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
- ZSTD_pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
+ unsigned jobID;
+ for (jobID=0; jobID<nbJobs; jobID++) {
+ DEBUGLOG(5, "waiting for job %u ", jobID);
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
+ while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
+ DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID);
+ ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
}
- ZSTD_pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
- DEBUGLOG(5, "ready to write chunk %u ", chunkID);
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
+ DEBUGLOG(5, "ready to write job %u ", jobID);
- mtctx->jobs[chunkID].srcStart = NULL;
- { size_t const cSize = mtctx->jobs[chunkID].cSize;
+ { size_t const cSize = mtctx->jobs[jobID].cSize;
if (ZSTD_isError(cSize)) error = cSize;
if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
- if (chunkID) { /* note : chunk 0 is written directly at dst, which is correct position */
+ if (jobID) { /* note : job 0 is written directly at dst, which is correct position */
if (!error)
- memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
- if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
- DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff);
+ memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize); /* may overlap when job compressed within dst */
+ if (jobID >= compressWithinDst) { /* job compressed into its own buffer, which must be released */
+ DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst);
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
} }
- mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
+ mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+ mtctx->jobs[jobID].cSize = 0;
dstPos += cSize ;
}
- } /* for (chunkID=0; chunkID<nbChunks; chunkID++) */
+ } /* for (jobID=0; jobID<nbJobs; jobID++) */
DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
if (params.fParams.checksumFlag) {
- U32 const checksum = (U32)XXH64_digest(&xxh64);
+ U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
if (dstPos + 4 > dstCapacity) {
error = ERROR(dstSize_tooSmall);
} else {
@@ -756,7 +1185,7 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
- ZSTD_parameters const params,
+ ZSTD_parameters params,
unsigned overlapLog)
{
ZSTD_CCtx_params cctxParams = mtctx->params;
@@ -787,66 +1216,104 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
/* ====================================== */
size_t ZSTDMT_initCStream_internal(
- ZSTDMT_CCtx* zcs,
- const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
+ ZSTDMT_CCtx* mtctx,
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
unsigned long long pledgedSrcSize)
{
- DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u, disableLiteralCompression=%i)",
+ (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx, params.disableLiteralCompression);
/* params are supposed to be fully validated at this point */
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
- assert(zcs->cctxPool->totalCCtx == params.nbThreads);
- zcs->singleThreaded = (params.nbThreads==1) | (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
-
- if (zcs->singleThreaded) {
- ZSTD_CCtx_params const singleThreadParams = ZSTDMT_makeJobCCtxParams(params);
- DEBUGLOG(4, "single thread mode");
- assert(singleThreadParams.nbThreads == 0);
- return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
+ assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
+
+ /* init */
+ if (params.jobSize == 0) {
+ params.jobSize = 1U << ZSTDMT_computeTargetJobLog(params);
+ }
+ if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
+
+ mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
+ if (mtctx->singleBlockingThread) {
+ ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
+ DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
+ assert(singleThreadParams.nbWorkers == 0);
+ return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
dict, dictSize, cdict,
singleThreadParams, pledgedSrcSize);
}
- DEBUGLOG(4, "multi-threading mode (%u threads)", params.nbThreads);
- if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
- ZSTDMT_waitForAllJobsCompleted(zcs);
- ZSTDMT_releaseAllJobResources(zcs);
- zcs->allJobsCompleted = 1;
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
+
+ if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */
+ ZSTDMT_waitForAllJobsCompleted(mtctx);
+ ZSTDMT_releaseAllJobResources(mtctx);
+ mtctx->allJobsCompleted = 1;
}
- zcs->params = params;
- zcs->frameContentSize = pledgedSrcSize;
+ mtctx->params = params;
+ mtctx->frameContentSize = pledgedSrcSize;
if (dict) {
- ZSTD_freeCDict(zcs->cdictLocal);
- zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
- ZSTD_dlm_byCopy, dictMode, /* note : a loadPrefix becomes an internal CDict */
- params.cParams, zcs->cMem);
- zcs->cdict = zcs->cdictLocal;
- if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
+ ZSTD_freeCDict(mtctx->cdictLocal);
+ mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
+ ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */
+ params.cParams, mtctx->cMem);
+ mtctx->cdict = mtctx->cdictLocal;
+ if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
} else {
- ZSTD_freeCDict(zcs->cdictLocal);
- zcs->cdictLocal = NULL;
- zcs->cdict = cdict;
+ ZSTD_freeCDict(mtctx->cdictLocal);
+ mtctx->cdictLocal = NULL;
+ mtctx->cdict = cdict;
}
- assert(params.overlapSizeLog <= 9);
- zcs->targetDictSize = (params.overlapSizeLog==0) ? 0 : (size_t)1 << (params.cParams.windowLog - (9 - params.overlapSizeLog));
- DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(zcs->targetDictSize>>10));
- zcs->targetSectionSize = params.jobSize ? params.jobSize : (size_t)1 << (params.cParams.windowLog + 2);
- if (zcs->targetSectionSize < ZSTDMT_JOBSIZE_MIN) zcs->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
- if (zcs->targetSectionSize < zcs->targetDictSize) zcs->targetSectionSize = zcs->targetDictSize; /* job size must be >= overlap size */
- DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(zcs->targetSectionSize>>10), params.jobSize);
- zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize;
- DEBUGLOG(4, "inBuff Size : %u KB", (U32)(zcs->inBuffSize>>10));
- ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) );
- zcs->inBuff.buffer = g_nullBuffer;
- zcs->dictSize = 0;
- zcs->doneJobID = 0;
- zcs->nextJobID = 0;
- zcs->frameEnded = 0;
- zcs->allJobsCompleted = 0;
- if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
+ mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
+ DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
+ mtctx->targetSectionSize = params.jobSize;
+ if (mtctx->targetSectionSize < ZSTDMT_JOBSIZE_MIN) mtctx->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
+ if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
+ DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
+ DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
+ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
+ {
+ /* If ldm is enabled we need windowSize space. */
+ size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
+ /* Two buffers of slack, plus extra space for the overlap
+ * This is the minimum slack that LDM works with. One extra because
+ * flush might waste up to targetSectionSize-1 bytes. Another extra
+ * for the overlap (if > 0), then one to fill which doesn't overlap
+ * with the LDM window.
+ */
+ size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0);
+ size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers;
+ /* Compute the total size, and always have enough slack */
+ size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1);
+ size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers;
+ size_t const capacity = MAX(windowSize, sectionsSize) + slackSize;
+ if (mtctx->roundBuff.capacity < capacity) {
+ if (mtctx->roundBuff.buffer)
+ ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
+ mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem);
+ if (mtctx->roundBuff.buffer == NULL) {
+ mtctx->roundBuff.capacity = 0;
+ return ERROR(memory_allocation);
+ }
+ mtctx->roundBuff.capacity = capacity;
+ }
+ }
+ DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10));
+ mtctx->roundBuff.pos = 0;
+ mtctx->inBuff.buffer = g_nullBuffer;
+ mtctx->inBuff.filled = 0;
+ mtctx->inBuff.prefix = kNullRange;
+ mtctx->doneJobID = 0;
+ mtctx->nextJobID = 0;
+ mtctx->frameEnded = 0;
+ mtctx->allJobsCompleted = 0;
+ mtctx->consumed = 0;
+ mtctx->produced = 0;
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
+ return ERROR(memory_allocation);
return 0;
}
@@ -855,11 +1322,11 @@ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
ZSTD_parameters params,
unsigned long long pledgedSrcSize)
{
- ZSTD_CCtx_params cctxParams = mtctx->params;
- DEBUGLOG(5, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
+ ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */
+ DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
cctxParams.cParams = params.cParams;
cctxParams.fParams = params.fParams;
- return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dm_auto, NULL,
+ return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL,
cctxParams, pledgedSrcSize);
}
@@ -869,10 +1336,10 @@ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
unsigned long long pledgedSrcSize)
{
ZSTD_CCtx_params cctxParams = mtctx->params;
+ if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict);
cctxParams.fParams = fParams;
- if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
- return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dm_auto, cdict,
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict,
cctxParams, pledgedSrcSize);
}
@@ -881,148 +1348,358 @@ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
* pledgedSrcSize can be zero == unknown (for the time being)
* prefer using ZSTD_CONTENTSIZE_UNKNOWN,
* as `0` might mean "empty" in the future */
-size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
+size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize)
{
if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
- if (zcs->params.nbThreads==1)
- return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
- return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, 0, zcs->params,
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params,
pledgedSrcSize);
}
-size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
- ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
- ZSTD_CCtx_params cctxParams = zcs->params;
+size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
+ ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+ ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */
+ DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel);
cctxParams.cParams = params.cParams;
cctxParams.fParams = params.fParams;
- return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
-}
-
-
-static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
-{
- unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
-
- DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
- zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
- zcs->jobs[jobID].src = zcs->inBuff.buffer;
- zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
- zcs->jobs[jobID].srcSize = srcSize;
- zcs->jobs[jobID].prefixSize = zcs->dictSize;
- assert(zcs->inBuff.filled >= srcSize + zcs->dictSize);
- zcs->jobs[jobID].params = zcs->params;
- /* do not calculate checksum within sections, but write it in header for first section */
- if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
- zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
- zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
- zcs->jobs[jobID].dstBuff = g_nullBuffer;
- zcs->jobs[jobID].cctxPool = zcs->cctxPool;
- zcs->jobs[jobID].bufPool = zcs->bufPool;
- zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
- zcs->jobs[jobID].lastChunk = endFrame;
- zcs->jobs[jobID].jobCompleted = 0;
- zcs->jobs[jobID].dstFlushed = 0;
- zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
- zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
-
- if (zcs->params.fParams.checksumFlag)
- XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->dictSize, srcSize);
-
- /* get a new buffer for next input */
- if (!endFrame) {
- size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
- zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
- if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
- zcs->jobs[jobID].jobCompleted = 1;
- zcs->nextJobID++;
- ZSTDMT_waitForAllJobsCompleted(zcs);
- ZSTDMT_releaseAllJobResources(zcs);
- return ERROR(memory_allocation);
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+
+/* ZSTDMT_writeLastEmptyBlock()
+ * Write a single empty block with an end-of-frame to finish a frame.
+ * Job must be created from streaming variant.
+ * This function is always successfull if expected conditions are fulfilled.
+ */
+static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
+{
+ assert(job->lastJob == 1);
+ assert(job->src.size == 0); /* last job is empty -> will be simplified into a last empty block */
+ assert(job->firstJob == 0); /* cannot be first job, as it also needs to create frame header */
+ assert(job->dstBuff.start == NULL); /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */
+ job->dstBuff = ZSTDMT_getBuffer(job->bufPool);
+ if (job->dstBuff.start == NULL) {
+ job->cSize = ERROR(memory_allocation);
+ return;
+ }
+ assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); /* no buffer should ever be that small */
+ job->src = kNullRange;
+ job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity);
+ assert(!ZSTD_isError(job->cSize));
+ assert(job->consumed == 0);
+}
+
+static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp)
+{
+ unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask;
+ int const endFrame = (endOp == ZSTD_e_end);
+
+ if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) {
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full");
+ assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask));
+ return 0;
+ }
+
+ if (!mtctx->jobReady) {
+ BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start;
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
+ mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size);
+ mtctx->jobs[jobID].src.start = src;
+ mtctx->jobs[jobID].src.size = srcSize;
+ assert(mtctx->inBuff.filled >= srcSize);
+ mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix;
+ mtctx->jobs[jobID].consumed = 0;
+ mtctx->jobs[jobID].cSize = 0;
+ mtctx->jobs[jobID].params = mtctx->params;
+ mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL;
+ mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize;
+ mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+ mtctx->jobs[jobID].cctxPool = mtctx->cctxPool;
+ mtctx->jobs[jobID].bufPool = mtctx->bufPool;
+ mtctx->jobs[jobID].seqPool = mtctx->seqPool;
+ mtctx->jobs[jobID].serial = &mtctx->serial;
+ mtctx->jobs[jobID].jobID = mtctx->nextJobID;
+ mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0);
+ mtctx->jobs[jobID].lastJob = endFrame;
+ mtctx->jobs[jobID].frameChecksumNeeded = endFrame && (mtctx->nextJobID>0) && mtctx->params.fParams.checksumFlag;
+ mtctx->jobs[jobID].dstFlushed = 0;
+
+ /* Update the round buffer pos and clear the input buffer to be reset */
+ mtctx->roundBuff.pos += srcSize;
+ mtctx->inBuff.buffer = g_nullBuffer;
+ mtctx->inBuff.filled = 0;
+ /* Set the prefix */
+ if (!endFrame) {
+ size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize);
+ mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize;
+ mtctx->inBuff.prefix.size = newPrefixSize;
+ } else { /* endFrame==1 => no need for another input buffer */
+ mtctx->inBuff.prefix = kNullRange;
+ mtctx->frameEnded = endFrame;
+ if (mtctx->nextJobID == 0) {
+ /* single job exception : checksum is already calculated directly within worker thread */
+ mtctx->params.fParams.checksumFlag = 0;
+ } }
+
+ if ( (srcSize == 0)
+ && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) {
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame");
+ assert(endOp == ZSTD_e_end); /* only possible case : need to end the frame with an empty last block */
+ ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID);
+ mtctx->nextJobID++;
+ return 0;
}
- zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
- memmove(zcs->inBuff.buffer.start,
- (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
- zcs->inBuff.filled);
- zcs->dictSize = newDictSize;
- } else { /* if (endFrame==1) */
- zcs->inBuff.buffer = g_nullBuffer;
- zcs->inBuff.filled = 0;
- zcs->dictSize = 0;
- zcs->frameEnded = 1;
- if (zcs->nextJobID == 0) {
- /* single chunk exception : checksum is calculated directly within worker thread */
- zcs->params.fParams.checksumFlag = 0;
- } }
+ }
- DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
- zcs->nextJobID,
- (U32)zcs->jobs[jobID].srcSize,
- zcs->jobs[jobID].lastChunk,
- zcs->doneJobID,
- zcs->doneJobID & zcs->jobIDMask);
- POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
- zcs->nextJobID++;
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))",
+ mtctx->nextJobID,
+ (U32)mtctx->jobs[jobID].src.size,
+ mtctx->jobs[jobID].lastJob,
+ mtctx->nextJobID,
+ jobID);
+ if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) {
+ mtctx->nextJobID++;
+ mtctx->jobReady = 0;
+ } else {
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID);
+ mtctx->jobReady = 1;
+ }
return 0;
}
-/* ZSTDMT_flushNextJob() :
- * output : will be updated with amount of data flushed .
- * blockToFlush : if >0, the function will block and wait if there is no data available to flush .
- * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
-static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
+/*! ZSTDMT_flushProduced() :
+ * `output` : `pos` will be updated with amount of data flushed .
+ * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
+ * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
+static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end)
{
- unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
- DEBUGLOG(5, "ZSTDMT_flushNextJob");
- if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */
- ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
- while (zcs->jobs[wJobID].jobCompleted==0) {
- DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
- if (!blockToFlush) { ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */
- ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */
+ unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask;
+ DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)",
+ blockToFlush, mtctx->doneJobID, mtctx->nextJobID);
+ assert(output->size >= output->pos);
+
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
+ if ( blockToFlush
+ && (mtctx->doneJobID < mtctx->nextJobID) ) {
+ assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize);
+ while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { /* nothing to flush */
+ if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) {
+ DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none",
+ mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size);
+ break;
+ }
+ DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
+ mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
+ ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); /* block when nothing to flush but some to come */
+ } }
+
+ /* try to flush something */
+ { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */
+ size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */
+ size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
+ if (ZSTD_isError(cSize)) {
+ DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
+ mtctx->doneJobID, ZSTD_getErrorName(cSize));
+ ZSTDMT_waitForAllJobsCompleted(mtctx);
+ ZSTDMT_releaseAllJobResources(mtctx);
+ return cSize;
+ }
+ /* add frame checksum if necessary (can only happen once) */
+ assert(srcConsumed <= srcSize);
+ if ( (srcConsumed == srcSize) /* job completed -> worker no longer active */
+ && mtctx->jobs[wJobID].frameChecksumNeeded ) {
+ U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
+ DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
+ MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum);
+ cSize += 4;
+ mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */
+ mtctx->jobs[wJobID].frameChecksumNeeded = 0;
+ }
+ if (cSize > 0) { /* compression is ongoing or completed */
+ size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
+ DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
+ (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize);
+ assert(mtctx->doneJobID < mtctx->nextJobID);
+ assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
+ assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
+ memcpy((char*)output->dst + output->pos,
+ (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
+ toFlush);
+ output->pos += toFlush;
+ mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
+
+ if ( (srcConsumed == srcSize) /* job completed */
+ && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */
+ DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
+ mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
+ mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
+ mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */
+ mtctx->consumed += srcSize;
+ mtctx->produced += cSize;
+ mtctx->doneJobID++;
+ } }
+
+ /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
+ if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed);
+ if (srcSize > srcConsumed) return 1; /* current job not completely compressed */
}
- ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
- /* compression job completed : output can be flushed */
- { ZSTDMT_jobDescription job = zcs->jobs[wJobID];
- if (!job.jobScanned) {
- if (ZSTD_isError(job.cSize)) {
- DEBUGLOG(5, "job %u : compression error detected : %s",
- zcs->doneJobID, ZSTD_getErrorName(job.cSize));
- ZSTDMT_waitForAllJobsCompleted(zcs);
- ZSTDMT_releaseAllJobResources(zcs);
- return job.cSize;
+ if (mtctx->doneJobID < mtctx->nextJobID) return 1; /* some more jobs ongoing */
+ if (mtctx->jobReady) return 1; /* one job is ready to push, just not yet in the list */
+ if (mtctx->inBuff.filled > 0) return 1; /* input is not empty, and still needs to be converted into a job */
+ mtctx->allJobsCompleted = mtctx->frameEnded; /* all jobs are entirely flushed => if this one is last one, frame is completed */
+ if (end == ZSTD_e_end) return !mtctx->frameEnded; /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */
+ return 0; /* internal buffers fully flushed */
+}
+
+/**
+ * Returns the range of data used by the earliest job that is not yet complete.
+ * If the data of the first job is broken up into two segments, we cover both
+ * sections.
+ */
+static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
+{
+ unsigned const firstJobID = mtctx->doneJobID;
+ unsigned const lastJobID = mtctx->nextJobID;
+ unsigned jobID;
+
+ for (jobID = firstJobID; jobID < lastJobID; ++jobID) {
+ unsigned const wJobID = jobID & mtctx->jobIDMask;
+ size_t consumed;
+
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
+ consumed = mtctx->jobs[wJobID].consumed;
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
+
+ if (consumed < mtctx->jobs[wJobID].src.size) {
+ range_t range = mtctx->jobs[wJobID].prefix;
+ if (range.size == 0) {
+ /* Empty prefix */
+ range = mtctx->jobs[wJobID].src;
}
- DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
- if (zcs->params.fParams.checksumFlag) {
- if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
- U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
- DEBUGLOG(5, "writing checksum : %08X \n", checksum);
- MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
- job.cSize += 4;
- zcs->jobs[wJobID].cSize += 4;
- } }
- zcs->jobs[wJobID].jobScanned = 1;
+ /* Job source in multiple segments not supported yet */
+ assert(range.start <= mtctx->jobs[wJobID].src.start);
+ return range;
}
- { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
- DEBUGLOG(5, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
- memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
- output->pos += toWrite;
- job.dstFlushed += toWrite;
+ }
+ return kNullRange;
+}
+
+/**
+ * Returns non-zero iff buffer and range overlap.
+ */
+static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
+{
+ BYTE const* const bufferStart = (BYTE const*)buffer.start;
+ BYTE const* const bufferEnd = bufferStart + buffer.capacity;
+ BYTE const* const rangeStart = (BYTE const*)range.start;
+ BYTE const* const rangeEnd = rangeStart + range.size;
+
+ if (rangeStart == NULL || bufferStart == NULL)
+ return 0;
+ /* Empty ranges cannot overlap */
+ if (bufferStart == bufferEnd || rangeStart == rangeEnd)
+ return 0;
+
+ return bufferStart < rangeEnd && rangeStart < bufferEnd;
+}
+
+static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
+{
+ range_t extDict;
+ range_t prefix;
+
+ extDict.start = window.dictBase + window.lowLimit;
+ extDict.size = window.dictLimit - window.lowLimit;
+
+ prefix.start = window.base + window.dictLimit;
+ prefix.size = window.nextSrc - (window.base + window.dictLimit);
+ DEBUGLOG(5, "extDict [0x%zx, 0x%zx)",
+ (size_t)extDict.start,
+ (size_t)extDict.start + extDict.size);
+ DEBUGLOG(5, "prefix [0x%zx, 0x%zx)",
+ (size_t)prefix.start,
+ (size_t)prefix.start + prefix.size);
+
+ return ZSTDMT_isOverlapped(buffer, extDict)
+ || ZSTDMT_isOverlapped(buffer, prefix);
+}
+
+static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
+{
+ if (mtctx->params.ldmParams.enableLdm) {
+ ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
+ DEBUGLOG(5, "source [0x%zx, 0x%zx)",
+ (size_t)buffer.start,
+ (size_t)buffer.start + buffer.capacity);
+ ZSTD_PTHREAD_MUTEX_LOCK(mutex);
+ while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) {
+ DEBUGLOG(6, "Waiting for LDM to finish...");
+ ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex);
}
- if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
- ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
- zcs->jobs[wJobID].dstBuff = g_nullBuffer;
- zcs->jobs[wJobID].jobCompleted = 0;
- zcs->doneJobID++;
- } else {
- zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
+ DEBUGLOG(6, "Done waiting for LDM to finish");
+ ZSTD_pthread_mutex_unlock(mutex);
+ }
+}
+
+/**
+ * Attempts to set the inBuff to the next section to fill.
+ * If any part of the new section is still in use we give up.
+ * Returns non-zero if the buffer is filled.
+ */
+static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
+{
+ range_t const inUse = ZSTDMT_getInputDataInUse(mtctx);
+ size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos;
+ size_t const target = mtctx->targetSectionSize;
+ buffer_t buffer;
+
+ assert(mtctx->inBuff.buffer.start == NULL);
+ assert(mtctx->roundBuff.capacity >= target);
+
+ if (spaceLeft < target) {
+ /* ZSTD_invalidateRepCodes() doesn't work for extDict variants.
+ * Simply copy the prefix to the beginning in that case.
+ */
+ BYTE* const start = (BYTE*)mtctx->roundBuff.buffer;
+ size_t const prefixSize = mtctx->inBuff.prefix.size;
+
+ buffer.start = start;
+ buffer.capacity = prefixSize;
+ if (ZSTDMT_isOverlapped(buffer, inUse)) {
+ DEBUGLOG(6, "Waiting for buffer...");
+ return 0;
}
- /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
- if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
- if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */
- zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */
- return 0; /* everything flushed */
-} }
+ ZSTDMT_waitForLdmComplete(mtctx, buffer);
+ memmove(start, mtctx->inBuff.prefix.start, prefixSize);
+ mtctx->inBuff.prefix.start = start;
+ mtctx->roundBuff.pos = prefixSize;
+ }
+ buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos;
+ buffer.capacity = target;
+
+ if (ZSTDMT_isOverlapped(buffer, inUse)) {
+ DEBUGLOG(6, "Waiting for buffer...");
+ return 0;
+ }
+ assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix));
+
+ ZSTDMT_waitForLdmComplete(mtctx, buffer);
+
+ DEBUGLOG(5, "Using prefix range [%zx, %zx)",
+ (size_t)mtctx->inBuff.prefix.start,
+ (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size);
+ DEBUGLOG(5, "Using source range [%zx, %zx)",
+ (size_t)buffer.start,
+ (size_t)buffer.start + buffer.capacity);
+
+
+ mtctx->inBuff.buffer = buffer;
+ mtctx->inBuff.filled = 0;
+ assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity);
+ return 1;
+}
/** ZSTDMT_compressStream_generic() :
@@ -1034,13 +1711,13 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp)
{
- size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize;
unsigned forwardInputProgress = 0;
- DEBUGLOG(5, "ZSTDMT_compressStream_generic ");
+ DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)",
+ (U32)endOp, (U32)(input->size - input->pos));
assert(output->pos <= output->size);
assert(input->pos <= input->size);
- if (mtctx->singleThreaded) { /* delegate to single-thread (synchronous) */
+ if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */
return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
}
@@ -1050,10 +1727,11 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
}
/* single-pass shortcut (note : synchronous-mode) */
- if ( (mtctx->nextJobID == 0) /* just started */
- && (mtctx->inBuff.filled == 0) /* nothing buffered */
- && (endOp == ZSTD_e_end) /* end order */
- && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
+ if ( (mtctx->nextJobID == 0) /* just started */
+ && (mtctx->inBuff.filled == 0) /* nothing buffered */
+ && (!mtctx->jobReady) /* no job already created */
+ && (endOp == ZSTD_e_end) /* end order */
+ && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */
size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
(char*)output->dst + output->pos, output->size - output->pos,
(const char*)input->src + input->pos, input->size - input->pos,
@@ -1061,89 +1739,93 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
if (ZSTD_isError(cSize)) return cSize;
input->pos = input->size;
output->pos += cSize;
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer); /* was allocated in initStream */
mtctx->allJobsCompleted = 1;
mtctx->frameEnded = 1;
return 0;
}
/* fill input buffer */
- if (input->size > input->pos) { /* support NULL input */
+ if ( (!mtctx->jobReady)
+ && (input->size > input->pos) ) { /* support NULL input */
if (mtctx->inBuff.buffer.start == NULL) {
- mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool); /* note : may fail, in which case, no forward input progress */
- mtctx->inBuff.filled = 0;
+ assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */
+ if (!ZSTDMT_tryGetInputRange(mtctx)) {
+ /* It is only possible for this operation to fail if there are
+ * still compression jobs ongoing.
+ */
+ assert(mtctx->doneJobID != mtctx->nextJobID);
+ }
}
- if (mtctx->inBuff.buffer.start) {
- size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
- DEBUGLOG(5, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
+ if (mtctx->inBuff.buffer.start != NULL) {
+ size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
+ assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
+ DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
+ (U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
input->pos += toLoad;
mtctx->inBuff.filled += toLoad;
forwardInputProgress = toLoad>0;
- } }
+ }
+ if ((input->pos < input->size) && (endOp == ZSTD_e_end))
+ endOp = ZSTD_e_flush; /* can't end now : not all input consumed */
+ }
- if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
- && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */
- CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
+ if ( (mtctx->jobReady)
+ || (mtctx->inBuff.filled >= mtctx->targetSectionSize) /* filled enough : let's compress */
+ || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0)) /* something to flush : let's go */
+ || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
+ size_t const jobSize = mtctx->inBuff.filled;
+ assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
+ CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
}
/* check for potential compressed data ready to be flushed */
- CHECK_F( ZSTDMT_flushNextJob(mtctx, output, !forwardInputProgress /* blockToFlush */) ); /* block if there was no forward input progress */
-
- if (input->pos < input->size) /* input not consumed : do not flush yet */
- endOp = ZSTD_e_continue;
-
- switch(endOp)
- {
- case ZSTD_e_flush:
- return ZSTDMT_flushStream(mtctx, output);
- case ZSTD_e_end:
- return ZSTDMT_endStream(mtctx, output);
- case ZSTD_e_continue:
- return 1;
- default:
- return ERROR(GENERIC); /* invalid endDirective */
+ { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */
+ if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */
+ return remainingToFlush;
}
}
-size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
{
- CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
+ CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
/* recommended next input size : fill current input buffer */
- return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
+ return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
}
-static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned endFrame)
+static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame)
{
- size_t const srcSize = mtctx->inBuff.filled - mtctx->dictSize;
+ size_t const srcSize = mtctx->inBuff.filled;
DEBUGLOG(5, "ZSTDMT_flushStream_internal");
- if ( ((srcSize > 0) || (endFrame && !mtctx->frameEnded))
- && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) {
- DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job");
+ if ( mtctx->jobReady /* one job ready for a worker to pick up */
+ || (srcSize > 0) /* still some data within input buffer */
+ || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
+ DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
+ (U32)srcSize, (U32)endFrame);
CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
}
/* check if there is any data available to flush */
- return ZSTDMT_flushNextJob(mtctx, output, 1 /* blockToFlush */);
+ return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame);
}
size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
{
DEBUGLOG(5, "ZSTDMT_flushStream");
- if (mtctx->singleThreaded)
+ if (mtctx->singleBlockingThread)
return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output);
- return ZSTDMT_flushStream_internal(mtctx, output, 0 /* endFrame */);
+ return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush);
}
size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
{
DEBUGLOG(4, "ZSTDMT_endStream");
- if (mtctx->singleThreaded)
+ if (mtctx->singleBlockingThread)
return ZSTD_endStream(mtctx->cctxPool->cctx[0], output);
- return ZSTDMT_flushStream_internal(mtctx, output, 1 /* endFrame */);
+ return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end);
}
diff --git a/thirdparty/zstd/compress/zstdmt_compress.h b/thirdparty/zstd/compress/zstdmt_compress.h
index d12f0adb8d..f79e3b4418 100644
--- a/thirdparty/zstd/compress/zstdmt_compress.h
+++ b/thirdparty/zstd/compress/zstdmt_compress.h
@@ -30,15 +30,15 @@
/* === Memory management === */
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
-ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
-ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads,
+ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
+ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
ZSTD_customMem cMem);
ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
-/* === Simple buffer-to-butter one-pass function === */
+/* === Simple one-pass compression function === */
ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
@@ -50,7 +50,7 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
/* === Streaming functions === */
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
-ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it may change in the future, to mean "empty" */
+ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
@@ -68,7 +68,7 @@ ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
- ZSTD_parameters const params,
+ ZSTD_parameters params,
unsigned overlapLog);
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
@@ -85,7 +85,7 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
typedef enum {
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
- ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
+ ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
} ZSTDMT_parameter;
/* ZSTDMT_setMTCtxParameter() :
@@ -97,30 +97,46 @@ ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
/*! ZSTDMT_compressStream_generic() :
- * Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream()
+ * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
* depending on flush directive.
* @return : minimum amount of data still to be flushed
* 0 if fully flushed
- * or an error code */
+ * or an error code
+ * note : needs to be init using any ZSTD_initCStream*() variant */
ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
-/* === Private definitions; never ever use directly === */
+/* ========================================================
+ * === Private interface, for use by ZSTD_compress.c ===
+ * === Not exposed in libzstd. Never invoke directly ===
+ * ======================================================== */
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
-/* ZSTDMT_CCtxParam_setNbThreads()
- * Set nbThreads, and clamp it correctly,
- * also reset jobSize and overlapLog */
-size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads);
+/* ZSTDMT_CCtxParam_setNbWorkers()
+ * Set nbWorkers, and clamp it.
+ * Also reset jobSize and overlapLog */
+size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
-/* ZSTDMT_getNbThreads():
+/*! ZSTDMT_updateCParams_whileCompressing() :
+ * Updates only a selected set of compression parameters, to remain compatible with current frame.
+ * New parameters will be applied to next compression job. */
+void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
+
+/* ZSTDMT_getNbWorkers():
* @return nb threads currently active in mtctx.
* mtctx must be valid */
-size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx);
+unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
+
+/* ZSTDMT_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads.
+ */
+ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
+
/*! ZSTDMT_initCStream_internal() :
* Private use only. Init streaming operation.
@@ -128,7 +144,7 @@ size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx);
* must receive dict, or cdict, or none, but not both.
* @return : 0, or an error code */
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
- const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
diff --git a/thirdparty/zstd/decompress/huf_decompress.c b/thirdparty/zstd/decompress/huf_decompress.c
index 79ded96bf6..73f5c46c06 100644
--- a/thirdparty/zstd/decompress/huf_decompress.c
+++ b/thirdparty/zstd/decompress/huf_decompress.c
@@ -49,18 +49,19 @@
****************************************************************/
#define HUF_isError ERR_isError
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
/* **************************************************************
* Byte alignment for workSpace management
****************************************************************/
-#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
/*-***************************/
/* generic DTableDesc */
/*-***************************/
-
typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
@@ -74,7 +75,6 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
/*-***************************/
/* single-symbol decoding */
/*-***************************/
-
typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
@@ -94,10 +94,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
- if ((spaceUsed32 << 2) > wkspSize)
- return ERROR(tableLog_tooLarge);
- workSpace = (U32 *)workSpace + spaceUsed32;
- wkspSize -= (spaceUsed32 << 2);
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
@@ -144,8 +141,10 @@ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
workSpace, sizeof(workSpace));
}
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
-static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
BYTE const c = dt[val].byte;
@@ -156,7 +155,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
*ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
@@ -164,30 +163,33 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
if (MEM_64bits()) \
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
-HINT_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+HINT_INLINE size_t
+HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
{
BYTE* const pStart = p;
/* up to 4 symbols at a time */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) {
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
}
- /* closer to the end */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+ /* [0-3] symbols remaining */
+ if (MEM_32bits())
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
- /* no more data to retrieve from bitstream, hence no need to reload */
+ /* no more data to retrieve from bitstream, no need to reload */
while (p < pEnd)
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
return pEnd-pStart;
}
-static size_t HUF_decompress1X2_usingDTable_internal(
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X2_usingDTable_internal_body(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
@@ -200,58 +202,17 @@ static size_t HUF_decompress1X2_usingDTable_internal(
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
- { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
- if (HUF_isError(errorCode)) return errorCode; }
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
- /* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
return dstSize;
}
-size_t HUF_decompress1X2_usingDTable(
- void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- const HUF_DTable* DTable)
-{
- DTableDesc dtd = HUF_getDTableDesc(DTable);
- if (dtd.tableType != 0) return ERROR(GENERIC);
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- void* workSpace, size_t wkspSize)
-{
- const BYTE* ip = (const BYTE*) cSrc;
-
- size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
- if (HUF_isError(hSize)) return hSize;
- if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
- ip += hSize; cSrcSize -= hSize;
-
- return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
-}
-
-
-size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize)
-{
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
- return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
- workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
- return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
-}
-
-
-static size_t HUF_decompress4X2_usingDTable_internal(
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X2_usingDTable_internal_body(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
@@ -286,23 +247,19 @@ static size_t HUF_decompress4X2_usingDTable_internal(
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
- U32 endSignal;
+ U32 endSignal = BIT_DStream_unfinished;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
- { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
- if (HUF_isError(errorCode)) return errorCode; }
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
- /* 16-32 symbols per loop (4-8 symbols per stream) */
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
+ while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
@@ -319,10 +276,15 @@ static size_t HUF_decompress4X2_usingDTable_internal(
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ BIT_reloadDStream(&bitD1);
+ BIT_reloadDStream(&bitD2);
+ BIT_reloadDStream(&bitD3);
+ BIT_reloadDStream(&bitD4);
}
/* check corruption */
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
@@ -335,8 +297,8 @@ static size_t HUF_decompress4X2_usingDTable_internal(
HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
/* check */
- endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
- if (!endSignal) return ERROR(corruption_detected);
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+ if (!endCheck) return ERROR(corruption_detected); }
/* decoded size */
return dstSize;
@@ -344,30 +306,309 @@ static size_t HUF_decompress4X2_usingDTable_internal(
}
-size_t HUF_decompress4X2_usingDTable(
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+ memcpy(op, dt+val, 2);
+ BIT_skipBits(DStream, dt[val].nbBits);
+ return dt[val].length;
+}
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+ memcpy(op, dt+val, 1);
+ if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+ else {
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+ BIT_skipBits(DStream, dt[val].nbBits);
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
+ } }
+ return 1;
+}
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+ if (MEM_64bits()) \
+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+HINT_INLINE size_t
+HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+ const HUF_DEltX4* const dt, const U32 dtLog)
+{
+ BYTE* const pStart = p;
+
+ /* up to 8 symbols at a time */
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+ HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+ HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+ HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+ }
+
+ /* closer to end : up to 2 symbols at a time */
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+ while (p <= pEnd-2)
+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
+
+ if (p < pEnd)
+ p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+ return p-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X4_usingDTable_internal_body(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ BIT_DStream_t bitD;
+
+ /* Init */
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+ /* decode */
+ { BYTE* const ostart = (BYTE*) dst;
+ BYTE* const oend = ostart + dstSize;
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
+ const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
+ }
+
+ /* check */
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+ /* decoded size */
+ return dstSize;
+}
+
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X4_usingDTable_internal_body(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+
+ { const BYTE* const istart = (const BYTE*) cSrc;
+ BYTE* const ostart = (BYTE*) dst;
+ BYTE* const oend = ostart + dstSize;
+ const void* const dtPtr = DTable+1;
+ const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
+
+ /* Init */
+ BIT_DStream_t bitD1;
+ BIT_DStream_t bitD2;
+ BIT_DStream_t bitD3;
+ BIT_DStream_t bitD4;
+ size_t const length1 = MEM_readLE16(istart);
+ size_t const length2 = MEM_readLE16(istart+2);
+ size_t const length3 = MEM_readLE16(istart+4);
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
+ const BYTE* const istart2 = istart1 + length1;
+ const BYTE* const istart3 = istart2 + length2;
+ const BYTE* const istart4 = istart3 + length3;
+ size_t const segmentSize = (dstSize+3) / 4;
+ BYTE* const opStart2 = ostart + segmentSize;
+ BYTE* const opStart3 = opStart2 + segmentSize;
+ BYTE* const opStart4 = opStart3 + segmentSize;
+ BYTE* op1 = ostart;
+ BYTE* op2 = opStart2;
+ BYTE* op3 = opStart3;
+ BYTE* op4 = opStart4;
+ U32 endSignal;
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ U32 const dtLog = dtd.tableLog;
+
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
+ HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+ HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ }
+
+ /* check corruption */
+ if (op1 > opStart2) return ERROR(corruption_detected);
+ if (op2 > opStart3) return ERROR(corruption_detected);
+ if (op3 > opStart4) return ERROR(corruption_detected);
+ /* note : op4 already verified within main loop */
+
+ /* finish bitStreams one by one */
+ HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+ HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+ HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+ HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
+
+ /* check */
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+ if (!endCheck) return ERROR(corruption_detected); }
+
+ /* decoded size */
+ return dstSize;
+ }
+}
+
+
+typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+ const void *cSrc,
+ size_t cSrcSize,
+ const HUF_DTable *DTable);
+#if DYNAMIC_BMI2
+
+#define X(fn) \
+ \
+ static size_t fn##_default( \
+ void* dst, size_t dstSize, \
+ const void* cSrc, size_t cSrcSize, \
+ const HUF_DTable* DTable) \
+ { \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ \
+ static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
+ void* dst, size_t dstSize, \
+ const void* cSrc, size_t cSrcSize, \
+ const HUF_DTable* DTable) \
+ { \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ \
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
+ { \
+ if (bmi2) { \
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
+ }
+
+#else
+
+#define X(fn) \
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
+ { \
+ (void)bmi2; \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ }
+
+#endif
+
+X(HUF_decompress1X2_usingDTable_internal)
+X(HUF_decompress4X2_usingDTable_internal)
+X(HUF_decompress1X4_usingDTable_internal)
+X(HUF_decompress4X4_usingDTable_internal)
+
+#undef X
+
+
+size_t HUF_decompress1X2_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 0) return ERROR(GENERIC);
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
-
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
{
const BYTE* ip = (const BYTE*) cSrc;
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize)
+{
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+ workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+ return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+size_t HUF_decompress4X2_usingDTable(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
+ if (dtd.tableType != 0) return ERROR(GENERIC);
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize, int bmi2)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
workSpace, wkspSize);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
- return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
}
@@ -387,8 +628,6 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
/* *************************/
/* double-symbols decoding */
/* *************************/
-typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
-
typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
/* HUF_fillDTableX4Level2() :
@@ -508,10 +747,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
- if ((spaceUsed32 << 2) > wkspSize)
- return ERROR(tableLog_tooLarge);
- workSpace = (U32 *)workSpace + spaceUsed32;
- wkspSize -= (spaceUsed32 << 2);
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
rankStart = rankStart0 + 1;
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
@@ -588,95 +824,6 @@ size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
workSpace, sizeof(workSpace));
}
-static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
-{
- size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
- memcpy(op, dt+val, 2);
- BIT_skipBits(DStream, dt[val].nbBits);
- return dt[val].length;
-}
-
-static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
-{
- size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
- memcpy(op, dt+val, 1);
- if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
- else {
- if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
- BIT_skipBits(DStream, dt[val].nbBits);
- if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
- /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
- DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
- } }
- return 1;
-}
-
-
-#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
- if (MEM_64bits()) \
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
-{
- BYTE* const pStart = p;
-
- /* up to 8 symbols at a time */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
- HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
- }
-
- /* closer to end : up to 2 symbols at a time */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
-
- while (p <= pEnd-2)
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
-
- if (p < pEnd)
- p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
-
- return p-pStart;
-}
-
-
-static size_t HUF_decompress1X4_usingDTable_internal(
- void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- const HUF_DTable* DTable)
-{
- BIT_DStream_t bitD;
-
- /* Init */
- { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
- if (HUF_isError(errorCode)) return errorCode;
- }
-
- /* decode */
- { BYTE* const ostart = (BYTE*) dst;
- BYTE* const oend = ostart + dstSize;
- const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
- HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
- }
-
- /* check */
- if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
-
- /* decoded size */
- return dstSize;
-}
-
size_t HUF_decompress1X4_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@@ -684,7 +831,7 @@ size_t HUF_decompress1X4_usingDTable(
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 1) return ERROR(GENERIC);
- return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+ return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
@@ -699,7 +846,7 @@ size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
- return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
+ return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
}
@@ -717,99 +864,6 @@ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
}
-static size_t HUF_decompress4X4_usingDTable_internal(
- void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- const HUF_DTable* DTable)
-{
- if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
-
- { const BYTE* const istart = (const BYTE*) cSrc;
- BYTE* const ostart = (BYTE*) dst;
- BYTE* const oend = ostart + dstSize;
- const void* const dtPtr = DTable+1;
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
-
- /* Init */
- BIT_DStream_t bitD1;
- BIT_DStream_t bitD2;
- BIT_DStream_t bitD3;
- BIT_DStream_t bitD4;
- size_t const length1 = MEM_readLE16(istart);
- size_t const length2 = MEM_readLE16(istart+2);
- size_t const length3 = MEM_readLE16(istart+4);
- size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
- const BYTE* const istart1 = istart + 6; /* jumpTable */
- const BYTE* const istart2 = istart1 + length1;
- const BYTE* const istart3 = istart2 + length2;
- const BYTE* const istart4 = istart3 + length3;
- size_t const segmentSize = (dstSize+3) / 4;
- BYTE* const opStart2 = ostart + segmentSize;
- BYTE* const opStart3 = opStart2 + segmentSize;
- BYTE* const opStart4 = opStart3 + segmentSize;
- BYTE* op1 = ostart;
- BYTE* op2 = opStart2;
- BYTE* op3 = opStart3;
- BYTE* op4 = opStart4;
- U32 endSignal;
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
- U32 const dtLog = dtd.tableLog;
-
- if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
- { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
- if (HUF_isError(errorCode)) return errorCode; }
-
- /* 16-32 symbols per loop (4-8 symbols per stream) */
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
- HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
- HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
-
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- }
-
- /* check corruption */
- if (op1 > opStart2) return ERROR(corruption_detected);
- if (op2 > opStart3) return ERROR(corruption_detected);
- if (op3 > opStart4) return ERROR(corruption_detected);
- /* note : op4 already verified within main loop */
-
- /* finish bitStreams one by one */
- HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
- HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
- HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
- HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
-
- /* check */
- { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
- if (!endCheck) return ERROR(corruption_detected); }
-
- /* decoded size */
- return dstSize;
- }
-}
-
-
size_t HUF_decompress4X4_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@@ -817,13 +871,12 @@ size_t HUF_decompress4X4_usingDTable(
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 1) return ERROR(GENERIC);
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+ return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
-
-size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
- void* workSpace, size_t wkspSize)
+ void* workSpace, size_t wkspSize, int bmi2)
{
const BYTE* ip = (const BYTE*) cSrc;
@@ -833,7 +886,14 @@ size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
+ return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
}
@@ -861,8 +921,8 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
const HUF_DTable* DTable)
{
DTableDesc const dtd = HUF_getDTableDesc(DTable);
- return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
- HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+ return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+ HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@@ -870,8 +930,8 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
const HUF_DTable* DTable)
{
DTableDesc const dtd = HUF_getDTableDesc(DTable);
- return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
- HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+ return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+ HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
@@ -898,21 +958,22 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
};
/** HUF_selectDecoder() :
-* Tells which decoder is likely to decode faster,
-* based on a set of pre-determined metrics.
-* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-* Assumption : 0 < cSrcSize, dstSize <= 128 KB */
+ * Tells which decoder is likely to decode faster,
+ * based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
+ * Assumption : 0 < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
{
+ assert(dstSize > 0);
+ assert(dstSize <= 128 KB);
/* decoder timing evaluation */
- U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
- U32 const D256 = (U32)(dstSize >> 8);
- U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
- U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
- DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
-
- return DTime1 < DTime0;
-}
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
+ U32 const D256 = (U32)(dstSize >> 8);
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
+ return DTime1 < DTime0;
+} }
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
@@ -994,3 +1055,42 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
workSpace, sizeof(workSpace));
}
+
+
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+ HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
+ size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+ HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+}
+
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+ /* validation checks */
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
+ if (cSrcSize == 0) return ERROR(corruption_detected);
+
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+ return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+ HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+ }
+}
diff --git a/thirdparty/zstd/decompress/zstd_decompress.c b/thirdparty/zstd/decompress/zstd_decompress.c
index a59d944112..3ec6a1cb32 100644
--- a/thirdparty/zstd/decompress/zstd_decompress.c
+++ b/thirdparty/zstd/decompress/zstd_decompress.c
@@ -14,8 +14,9 @@
*****************************************************************/
/*!
* HEAPMODE :
- * Select how default decompression function ZSTD_decompress() will allocate memory,
- * in memory stack (0), or in memory heap (1, requires malloc())
+ * Select how default decompression function ZSTD_decompress() allocates its context,
+ * on stack (0), or into heap (1, default; requires malloc()).
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
*/
#ifndef ZSTD_HEAPMODE
# define ZSTD_HEAPMODE 1
@@ -23,17 +24,18 @@
/*!
* LEGACY_SUPPORT :
-* if set to 1, ZSTD_decompress() can decode older formats (v0.1+)
+* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
*/
#ifndef ZSTD_LEGACY_SUPPORT
# define ZSTD_LEGACY_SUPPORT 0
#endif
/*!
-* MAXWINDOWSIZE_DEFAULT :
-* maximum window size accepted by DStream, by default.
-* Frames requiring more memory will be rejected.
-*/
+ * MAXWINDOWSIZE_DEFAULT :
+ * maximum window size accepted by DStream __by default__.
+ * Frames requiring more memory will be rejected.
+ * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
+ */
#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1)
#endif
@@ -43,6 +45,7 @@
* Dependencies
*********************************************************/
#include <string.h> /* memcpy, memmove, memset */
+#include "cpu.h"
#include "mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
@@ -80,10 +83,25 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
typedef enum { zdss_init=0, zdss_loadHeader,
zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+typedef struct {
+ U32 fastMode;
+ U32 tableLog;
+} ZSTD_seqSymbol_header;
+
+typedef struct {
+ U16 nextState;
+ BYTE nbAdditionalBits;
+ BYTE nbBits;
+ U32 baseValue;
+} ZSTD_seqSymbol;
+
+#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
+
typedef struct {
- FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
- FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
- FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+ ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];
+ ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];
+ ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
U32 rep[ZSTD_REP_NUM];
@@ -91,9 +109,9 @@ typedef struct {
struct ZSTD_DCtx_s
{
- const FSE_DTable* LLTptr;
- const FSE_DTable* MLTptr;
- const FSE_DTable* OFTptr;
+ const ZSTD_seqSymbol* LLTptr;
+ const ZSTD_seqSymbol* MLTptr;
+ const ZSTD_seqSymbol* OFTptr;
const HUF_DTable* HUFptr;
ZSTD_entropyDTables_t entropy;
const void* previousDstEnd; /* detect continuity */
@@ -116,6 +134,7 @@ struct ZSTD_DCtx_s
size_t litSize;
size_t rleSize;
size_t staticSize;
+ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
/* streaming */
ZSTD_DDict* ddictLocal;
@@ -173,6 +192,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->inBuffSize = 0;
dctx->outBuffSize = 0;
dctx->streamStage = zdss_init;
+ dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
}
ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
@@ -204,6 +224,7 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
ZSTD_DCtx* ZSTD_createDCtx(void)
{
+ DEBUGLOG(3, "ZSTD_createDCtx");
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
}
@@ -234,8 +255,8 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
/*-*************************************************************
-* Decompression section
-***************************************************************/
+ * Frame header decoding
+ ***************************************************************/
/*! ZSTD_isFrame() :
* Tells if the content of `buffer` starts with a valid Frame Identifier.
@@ -257,7 +278,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
/** ZSTD_frameHeaderSize_internal() :
* srcSize must be large enough to reach header size fields.
- * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
+ * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
* @return : size of the Frame Header
* or an error code, which can be tested with ZSTD_isError() */
static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
@@ -480,6 +501,10 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
}
+/*-*************************************************************
+ * Block decoding
+ ***************************************************************/
+
/*! ZSTD_getcBlockSize() :
* Provides the size of compressed block from block header `src` */
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
@@ -566,13 +591,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
if (HUF_isError((litEncType==set_repeat) ?
( singleStream ?
- HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) :
- HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) :
+ HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
+ HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) :
( singleStream ?
- HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
- dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) :
- HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
- dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
+ HUF_decompress1X2_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
+ dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2) :
+ HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
+ dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2))))
return ERROR(corruption_detected);
dctx->litPtr = dctx->litBuffer;
@@ -647,115 +672,268 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
}
}
-
-typedef union {
- FSE_decode_t realData;
- U32 alignedBy4;
-} FSE_decode_t4;
+/* Default FSE distribution tables.
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
+ * They were generated programmatically with following method :
+ * - start from default distributions, present in /lib/common/zstd_internal.h
+ * - generate tables normally, using ZSTD_buildFSETable()
+ * - printout the content of tables
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
/* Default FSE distribution table for Literal Lengths */
-static const FSE_decode_t4 LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
- { { LL_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
- /* base, symbol, bits */
- { { 0, 0, 4 } }, { { 16, 0, 4 } }, { { 32, 1, 5 } }, { { 0, 3, 5 } },
- { { 0, 4, 5 } }, { { 0, 6, 5 } }, { { 0, 7, 5 } }, { { 0, 9, 5 } },
- { { 0, 10, 5 } }, { { 0, 12, 5 } }, { { 0, 14, 6 } }, { { 0, 16, 5 } },
- { { 0, 18, 5 } }, { { 0, 19, 5 } }, { { 0, 21, 5 } }, { { 0, 22, 5 } },
- { { 0, 24, 5 } }, { { 32, 25, 5 } }, { { 0, 26, 5 } }, { { 0, 27, 6 } },
- { { 0, 29, 6 } }, { { 0, 31, 6 } }, { { 32, 0, 4 } }, { { 0, 1, 4 } },
- { { 0, 2, 5 } }, { { 32, 4, 5 } }, { { 0, 5, 5 } }, { { 32, 7, 5 } },
- { { 0, 8, 5 } }, { { 32, 10, 5 } }, { { 0, 11, 5 } }, { { 0, 13, 6 } },
- { { 32, 16, 5 } }, { { 0, 17, 5 } }, { { 32, 19, 5 } }, { { 0, 20, 5 } },
- { { 32, 22, 5 } }, { { 0, 23, 5 } }, { { 0, 25, 4 } }, { { 16, 25, 4 } },
- { { 32, 26, 5 } }, { { 0, 28, 6 } }, { { 0, 30, 6 } }, { { 48, 0, 4 } },
- { { 16, 1, 4 } }, { { 32, 2, 5 } }, { { 32, 3, 5 } }, { { 32, 5, 5 } },
- { { 32, 6, 5 } }, { { 32, 8, 5 } }, { { 32, 9, 5 } }, { { 32, 11, 5 } },
- { { 32, 12, 5 } }, { { 0, 15, 6 } }, { { 32, 17, 5 } }, { { 32, 18, 5 } },
- { { 32, 20, 5 } }, { { 32, 21, 5 } }, { { 32, 23, 5 } }, { { 32, 24, 5 } },
- { { 0, 35, 6 } }, { { 0, 34, 6 } }, { { 0, 33, 6 } }, { { 0, 32, 6 } },
+static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
+ { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+ /* nextState, nbAddBits, nbBits, baseVal */
+ { 0, 0, 4, 0}, { 16, 0, 4, 0},
+ { 32, 0, 5, 1}, { 0, 0, 5, 3},
+ { 0, 0, 5, 4}, { 0, 0, 5, 6},
+ { 0, 0, 5, 7}, { 0, 0, 5, 9},
+ { 0, 0, 5, 10}, { 0, 0, 5, 12},
+ { 0, 0, 6, 14}, { 0, 1, 5, 16},
+ { 0, 1, 5, 20}, { 0, 1, 5, 22},
+ { 0, 2, 5, 28}, { 0, 3, 5, 32},
+ { 0, 4, 5, 48}, { 32, 6, 5, 64},
+ { 0, 7, 5, 128}, { 0, 8, 6, 256},
+ { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
+ { 32, 0, 4, 0}, { 0, 0, 4, 1},
+ { 0, 0, 5, 2}, { 32, 0, 5, 4},
+ { 0, 0, 5, 5}, { 32, 0, 5, 7},
+ { 0, 0, 5, 8}, { 32, 0, 5, 10},
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
+ { 32, 1, 5, 16}, { 0, 1, 5, 18},
+ { 32, 1, 5, 22}, { 0, 2, 5, 24},
+ { 32, 3, 5, 32}, { 0, 3, 5, 40},
+ { 0, 6, 4, 64}, { 16, 6, 4, 64},
+ { 32, 7, 5, 128}, { 0, 9, 6, 512},
+ { 0, 11, 6, 2048}, { 48, 0, 4, 0},
+ { 16, 0, 4, 1}, { 32, 0, 5, 2},
+ { 32, 0, 5, 3}, { 32, 0, 5, 5},
+ { 32, 0, 5, 6}, { 32, 0, 5, 8},
+ { 32, 0, 5, 9}, { 32, 0, 5, 11},
+ { 32, 0, 5, 12}, { 0, 0, 6, 15},
+ { 32, 1, 5, 18}, { 32, 1, 5, 20},
+ { 32, 2, 5, 24}, { 32, 2, 5, 28},
+ { 32, 3, 5, 40}, { 32, 4, 5, 48},
+ { 0, 16, 6,65536}, { 0, 15, 6,32768},
+ { 0, 14, 6,16384}, { 0, 13, 6, 8192},
}; /* LL_defaultDTable */
+/* Default FSE distribution table for Offset Codes */
+static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
+ { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+ /* nextState, nbAddBits, nbBits, baseVal */
+ { 0, 0, 5, 0}, { 0, 6, 4, 61},
+ { 0, 9, 5, 509}, { 0, 15, 5,32765},
+ { 0, 21, 5,2097149}, { 0, 3, 5, 5},
+ { 0, 7, 4, 125}, { 0, 12, 5, 4093},
+ { 0, 18, 5,262141}, { 0, 23, 5,8388605},
+ { 0, 5, 5, 29}, { 0, 8, 4, 253},
+ { 0, 14, 5,16381}, { 0, 20, 5,1048573},
+ { 0, 2, 5, 1}, { 16, 7, 4, 125},
+ { 0, 11, 5, 2045}, { 0, 17, 5,131069},
+ { 0, 22, 5,4194301}, { 0, 4, 5, 13},
+ { 16, 8, 4, 253}, { 0, 13, 5, 8189},
+ { 0, 19, 5,524285}, { 0, 1, 5, 1},
+ { 16, 6, 4, 61}, { 0, 10, 5, 1021},
+ { 0, 16, 5,65533}, { 0, 28, 5,268435453},
+ { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
+ { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
+}; /* OF_defaultDTable */
+
+
/* Default FSE distribution table for Match Lengths */
-static const FSE_decode_t4 ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
- { { ML_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
- /* base, symbol, bits */
- { { 0, 0, 6 } }, { { 0, 1, 4 } }, { { 32, 2, 5 } }, { { 0, 3, 5 } },
- { { 0, 5, 5 } }, { { 0, 6, 5 } }, { { 0, 8, 5 } }, { { 0, 10, 6 } },
- { { 0, 13, 6 } }, { { 0, 16, 6 } }, { { 0, 19, 6 } }, { { 0, 22, 6 } },
- { { 0, 25, 6 } }, { { 0, 28, 6 } }, { { 0, 31, 6 } }, { { 0, 33, 6 } },
- { { 0, 35, 6 } }, { { 0, 37, 6 } }, { { 0, 39, 6 } }, { { 0, 41, 6 } },
- { { 0, 43, 6 } }, { { 0, 45, 6 } }, { { 16, 1, 4 } }, { { 0, 2, 4 } },
- { { 32, 3, 5 } }, { { 0, 4, 5 } }, { { 32, 6, 5 } }, { { 0, 7, 5 } },
- { { 0, 9, 6 } }, { { 0, 12, 6 } }, { { 0, 15, 6 } }, { { 0, 18, 6 } },
- { { 0, 21, 6 } }, { { 0, 24, 6 } }, { { 0, 27, 6 } }, { { 0, 30, 6 } },
- { { 0, 32, 6 } }, { { 0, 34, 6 } }, { { 0, 36, 6 } }, { { 0, 38, 6 } },
- { { 0, 40, 6 } }, { { 0, 42, 6 } }, { { 0, 44, 6 } }, { { 32, 1, 4 } },
- { { 48, 1, 4 } }, { { 16, 2, 4 } }, { { 32, 4, 5 } }, { { 32, 5, 5 } },
- { { 32, 7, 5 } }, { { 32, 8, 5 } }, { { 0, 11, 6 } }, { { 0, 14, 6 } },
- { { 0, 17, 6 } }, { { 0, 20, 6 } }, { { 0, 23, 6 } }, { { 0, 26, 6 } },
- { { 0, 29, 6 } }, { { 0, 52, 6 } }, { { 0, 51, 6 } }, { { 0, 50, 6 } },
- { { 0, 49, 6 } }, { { 0, 48, 6 } }, { { 0, 47, 6 } }, { { 0, 46, 6 } },
+static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
+ { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+ /* nextState, nbAddBits, nbBits, baseVal */
+ { 0, 0, 6, 3}, { 0, 0, 4, 4},
+ { 32, 0, 5, 5}, { 0, 0, 5, 6},
+ { 0, 0, 5, 8}, { 0, 0, 5, 9},
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
+ { 0, 0, 6, 16}, { 0, 0, 6, 19},
+ { 0, 0, 6, 22}, { 0, 0, 6, 25},
+ { 0, 0, 6, 28}, { 0, 0, 6, 31},
+ { 0, 0, 6, 34}, { 0, 1, 6, 37},
+ { 0, 1, 6, 41}, { 0, 2, 6, 47},
+ { 0, 3, 6, 59}, { 0, 4, 6, 83},
+ { 0, 7, 6, 131}, { 0, 9, 6, 515},
+ { 16, 0, 4, 4}, { 0, 0, 4, 5},
+ { 32, 0, 5, 6}, { 0, 0, 5, 7},
+ { 32, 0, 5, 9}, { 0, 0, 5, 10},
+ { 0, 0, 6, 12}, { 0, 0, 6, 15},
+ { 0, 0, 6, 18}, { 0, 0, 6, 21},
+ { 0, 0, 6, 24}, { 0, 0, 6, 27},
+ { 0, 0, 6, 30}, { 0, 0, 6, 33},
+ { 0, 1, 6, 35}, { 0, 1, 6, 39},
+ { 0, 2, 6, 43}, { 0, 3, 6, 51},
+ { 0, 4, 6, 67}, { 0, 5, 6, 99},
+ { 0, 8, 6, 259}, { 32, 0, 4, 4},
+ { 48, 0, 4, 4}, { 16, 0, 4, 5},
+ { 32, 0, 5, 7}, { 32, 0, 5, 8},
+ { 32, 0, 5, 10}, { 32, 0, 5, 11},
+ { 0, 0, 6, 14}, { 0, 0, 6, 17},
+ { 0, 0, 6, 20}, { 0, 0, 6, 23},
+ { 0, 0, 6, 26}, { 0, 0, 6, 29},
+ { 0, 0, 6, 32}, { 0, 16, 6,65539},
+ { 0, 15, 6,32771}, { 0, 14, 6,16387},
+ { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
+ { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
}; /* ML_defaultDTable */
-/* Default FSE distribution table for Offset Codes */
-static const FSE_decode_t4 OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
- { { OF_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
- /* base, symbol, bits */
- { { 0, 0, 5 } }, { { 0, 6, 4 } },
- { { 0, 9, 5 } }, { { 0, 15, 5 } },
- { { 0, 21, 5 } }, { { 0, 3, 5 } },
- { { 0, 7, 4 } }, { { 0, 12, 5 } },
- { { 0, 18, 5 } }, { { 0, 23, 5 } },
- { { 0, 5, 5 } }, { { 0, 8, 4 } },
- { { 0, 14, 5 } }, { { 0, 20, 5 } },
- { { 0, 2, 5 } }, { { 16, 7, 4 } },
- { { 0, 11, 5 } }, { { 0, 17, 5 } },
- { { 0, 22, 5 } }, { { 0, 4, 5 } },
- { { 16, 8, 4 } }, { { 0, 13, 5 } },
- { { 0, 19, 5 } }, { { 0, 1, 5 } },
- { { 16, 6, 4 } }, { { 0, 10, 5 } },
- { { 0, 16, 5 } }, { { 0, 28, 5 } },
- { { 0, 27, 5 } }, { { 0, 26, 5 } },
- { { 0, 25, 5 } }, { { 0, 24, 5 } },
-}; /* OF_defaultDTable */
+
+static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
+{
+ void* ptr = dt;
+ ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
+ ZSTD_seqSymbol* const cell = dt + 1;
+
+ DTableH->tableLog = 0;
+ DTableH->fastMode = 0;
+
+ cell->nbBits = 0;
+ cell->nextState = 0;
+ assert(nbAddBits < 255);
+ cell->nbAdditionalBits = (BYTE)nbAddBits;
+ cell->baseValue = baseValue;
+}
+
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off) */
+static void
+ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+ const short* normalizedCounter, unsigned maxSymbolValue,
+ const U32* baseValue, const U32* nbAdditionalBits,
+ unsigned tableLog)
+{
+ ZSTD_seqSymbol* const tableDecode = dt+1;
+ U16 symbolNext[MaxSeq+1];
+
+ U32 const maxSV1 = maxSymbolValue + 1;
+ U32 const tableSize = 1 << tableLog;
+ U32 highThreshold = tableSize-1;
+
+ /* Sanity Checks */
+ assert(maxSymbolValue <= MaxSeq);
+ assert(tableLog <= MaxFSELog);
+
+ /* Init, lay down lowprob symbols */
+ { ZSTD_seqSymbol_header DTableH;
+ DTableH.tableLog = tableLog;
+ DTableH.fastMode = 1;
+ { S16 const largeLimit= (S16)(1 << (tableLog-1));
+ U32 s;
+ for (s=0; s<maxSV1; s++) {
+ if (normalizedCounter[s]==-1) {
+ tableDecode[highThreshold--].baseValue = s;
+ symbolNext[s] = 1;
+ } else {
+ if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+ symbolNext[s] = normalizedCounter[s];
+ } } }
+ memcpy(dt, &DTableH, sizeof(DTableH));
+ }
+
+ /* Spread symbols */
+ { U32 const tableMask = tableSize-1;
+ U32 const step = FSE_TABLESTEP(tableSize);
+ U32 s, position = 0;
+ for (s=0; s<maxSV1; s++) {
+ int i;
+ for (i=0; i<normalizedCounter[s]; i++) {
+ tableDecode[position].baseValue = s;
+ position = (position + step) & tableMask;
+ while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
+ } }
+ assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+ }
+
+ /* Build Decoding table */
+ { U32 u;
+ for (u=0; u<tableSize; u++) {
+ U32 const symbol = tableDecode[u].baseValue;
+ U32 const nextState = symbolNext[symbol]++;
+ tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+ tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+ assert(nbAdditionalBits[symbol] < 255);
+ tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
+ tableDecode[u].baseValue = baseValue[symbol];
+ } }
+}
+
/*! ZSTD_buildSeqTable() :
* @return : nb bytes read from src,
- * or an error code if it fails, testable with ZSTD_isError()
- */
-static size_t ZSTD_buildSeqTable(FSE_DTable* DTableSpace, const FSE_DTable** DTablePtr,
+ * or an error code if it fails */
+static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
symbolEncodingType_e type, U32 max, U32 maxLog,
const void* src, size_t srcSize,
- const FSE_decode_t4* defaultTable, U32 flagRepeatTable)
+ const U32* baseValue, const U32* nbAdditionalBits,
+ const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable)
{
- const void* const tmpPtr = defaultTable; /* bypass strict aliasing */
switch(type)
{
case set_rle :
if (!srcSize) return ERROR(srcSize_wrong);
if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
- FSE_buildDTable_rle(DTableSpace, *(const BYTE*)src);
+ { U32 const symbol = *(const BYTE*)src;
+ U32 const baseline = baseValue[symbol];
+ U32 const nbBits = nbAdditionalBits[symbol];
+ ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
+ }
*DTablePtr = DTableSpace;
return 1;
case set_basic :
- *DTablePtr = (const FSE_DTable*)tmpPtr;
+ *DTablePtr = defaultTable;
return 0;
case set_repeat:
if (!flagRepeatTable) return ERROR(corruption_detected);
return 0;
- default : /* impossible */
case set_compressed :
{ U32 tableLog;
S16 norm[MaxSeq+1];
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
if (FSE_isError(headerSize)) return ERROR(corruption_detected);
if (tableLog > maxLog) return ERROR(corruption_detected);
- FSE_buildDTable(DTableSpace, norm, max, tableLog);
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
*DTablePtr = DTableSpace;
return headerSize;
- } }
+ }
+ default : /* impossible */
+ assert(0);
+ return ERROR(GENERIC);
+ }
}
+static const U32 LL_base[MaxLL+1] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 18, 20, 22, 24, 28, 32, 40,
+ 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+ 0x2000, 0x4000, 0x8000, 0x10000 };
+
+static const U32 OF_base[MaxOff+1] = {
+ 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
+ 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
+ 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+ 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
+
+static const U32 OF_bits[MaxOff+1] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31 };
+
+static const U32 ML_base[MaxML+1] = {
+ 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 37, 39, 41, 43, 47, 51, 59,
+ 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+ 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
const void* src, size_t srcSize)
{
@@ -792,19 +970,27 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
/* Build DTables */
{ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
LLtype, MaxLL, LLFSELog,
- ip, iend-ip, LL_defaultDTable, dctx->fseEntropy);
+ ip, iend-ip,
+ LL_base, LL_bits,
+ LL_defaultDTable, dctx->fseEntropy);
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
ip += llhSize;
}
+
{ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
OFtype, MaxOff, OffFSELog,
- ip, iend-ip, OF_defaultDTable, dctx->fseEntropy);
+ ip, iend-ip,
+ OF_base, OF_bits,
+ OF_defaultDTable, dctx->fseEntropy);
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
ip += ofhSize;
}
+
{ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
MLtype, MaxML, MLFSELog,
- ip, iend-ip, ML_defaultDTable, dctx->fseEntropy);
+ ip, iend-ip,
+ ML_base, ML_bits,
+ ML_defaultDTable, dctx->fseEntropy);
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
ip += mlhSize;
}
@@ -822,10 +1008,15 @@ typedef struct {
} seq_t;
typedef struct {
+ size_t state;
+ const ZSTD_seqSymbol* table;
+} ZSTD_fseState;
+
+typedef struct {
BIT_DStream_t DStream;
- FSE_DState_t stateLL;
- FSE_DState_t stateOffb;
- FSE_DState_t stateML;
+ ZSTD_fseState stateLL;
+ ZSTD_fseState stateOffb;
+ ZSTD_fseState stateML;
size_t prevOffset[ZSTD_REP_NUM];
const BYTE* prefixStart;
const BYTE* dictEnd;
@@ -880,118 +1071,6 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
}
-typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
-
-/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
- * bits before reloading. This value is the maximum number of bytes we read
- * after reloading when we are decoding long offets.
- */
-#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
- (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
- ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
- : 0)
-
-static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
-{
- seq_t seq;
-
- U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
- U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
- U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
-
- U32 const llBits = LL_bits[llCode];
- U32 const mlBits = ML_bits[mlCode];
- U32 const ofBits = ofCode;
- U32 const totalBits = llBits+mlBits+ofBits;
-
- static const U32 LL_base[MaxLL+1] = {
- 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 18, 20, 22, 24, 28, 32, 40,
- 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
- 0x2000, 0x4000, 0x8000, 0x10000 };
-
- static const U32 ML_base[MaxML+1] = {
- 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18,
- 19, 20, 21, 22, 23, 24, 25, 26,
- 27, 28, 29, 30, 31, 32, 33, 34,
- 35, 37, 39, 41, 43, 47, 51, 59,
- 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
- 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
-
- static const U32 OF_base[MaxOff+1] = {
- 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
- 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
- 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
- 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
-
- /* sequence */
- { size_t offset;
- if (!ofCode)
- offset = 0;
- else {
- ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
- ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
- assert(ofBits <= MaxOff);
- if (MEM_32bits() && longOffsets) {
- U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
- offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
- if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
- } else {
- offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
- }
- }
-
- if (ofCode <= 1) {
- offset += (llCode==0);
- if (offset) {
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
- seqState->prevOffset[1] = seqState->prevOffset[0];
- seqState->prevOffset[0] = offset = temp;
- } else {
- offset = seqState->prevOffset[0];
- }
- } else {
- seqState->prevOffset[2] = seqState->prevOffset[1];
- seqState->prevOffset[1] = seqState->prevOffset[0];
- seqState->prevOffset[0] = offset;
- }
- seq.offset = offset;
- }
-
- seq.matchLength = ML_base[mlCode]
- + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
- if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
- BIT_reloadDStream(&seqState->DStream);
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
- BIT_reloadDStream(&seqState->DStream);
- /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
- ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
-
- seq.litLength = LL_base[llCode]
- + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
- if (MEM_32bits())
- BIT_reloadDStream(&seqState->DStream);
-
- DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
- (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
-
- /* ANS state update */
- FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
- FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
- FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
-
- return seq;
-}
-
-
HINT_INLINE
size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
@@ -1073,10 +1152,199 @@ size_t ZSTD_execSequence(BYTE* op,
}
-static size_t ZSTD_decompressSequences(
- ZSTD_DCtx* dctx,
+HINT_INLINE
+size_t ZSTD_execSequenceLong(BYTE* op,
+ BYTE* const oend, seq_t sequence,
+ const BYTE** litPtr, const BYTE* const litLimit,
+ const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
+{
+ BYTE* const oLitEnd = op + sequence.litLength;
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+ const BYTE* match = sequence.match;
+
+ /* check */
+ if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
+ if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
+
+ /* copy Literals */
+ ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
+ if (sequence.litLength > 8)
+ ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+ op = oLitEnd;
+ *litPtr = iLitEnd; /* update for next sequence */
+
+ /* copy Match */
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+ /* offset beyond prefix */
+ if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
+ if (match + sequence.matchLength <= dictEnd) {
+ memmove(oLitEnd, match, sequence.matchLength);
+ return sequenceLength;
+ }
+ /* span extDict & currentPrefixSegment */
+ { size_t const length1 = dictEnd - match;
+ memmove(oLitEnd, match, length1);
+ op = oLitEnd + length1;
+ sequence.matchLength -= length1;
+ match = prefixStart;
+ if (op > oend_w || sequence.matchLength < MINMATCH) {
+ U32 i;
+ for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
+ return sequenceLength;
+ }
+ } }
+ assert(op <= oend_w);
+ assert(sequence.matchLength >= MINMATCH);
+
+ /* match within prefix */
+ if (sequence.offset < 8) {
+ /* close range match, overlap */
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
+ int const sub2 = dec64table[sequence.offset];
+ op[0] = match[0];
+ op[1] = match[1];
+ op[2] = match[2];
+ op[3] = match[3];
+ match += dec32table[sequence.offset];
+ ZSTD_copy4(op+4, match);
+ match -= sub2;
+ } else {
+ ZSTD_copy8(op, match);
+ }
+ op += 8; match += 8;
+
+ if (oMatchEnd > oend-(16-MINMATCH)) {
+ if (op < oend_w) {
+ ZSTD_wildcopy(op, match, oend_w - op);
+ match += oend_w - op;
+ op = oend_w;
+ }
+ while (op < oMatchEnd) *op++ = *match++;
+ } else {
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
+ }
+ return sequenceLength;
+}
+
+static void
+ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
+{
+ const void* ptr = dt;
+ const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
+ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+ DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
+ (U32)DStatePtr->state, DTableH->tableLog);
+ BIT_reloadDStream(bitD);
+ DStatePtr->table = dt + 1;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
+{
+ ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
+ U32 const nbBits = DInfo.nbBits;
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
+ DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
+ * bits before reloading. This value is the maximum number of bytes we read
+ * after reloading when we are decoding long offets.
+ */
+#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
+ (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
+ ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
+ : 0)
+
+typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+
+FORCE_INLINE_TEMPLATE seq_t
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+{
+ seq_t seq;
+ U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
+ U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
+ U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
+ U32 const totalBits = llBits+mlBits+ofBits;
+ U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
+ U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
+ U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
+
+ /* sequence */
+ { size_t offset;
+ if (!ofBits)
+ offset = 0;
+ else {
+ ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+ ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+ assert(ofBits <= MaxOff);
+ if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
+ U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+ BIT_reloadDStream(&seqState->DStream);
+ if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+ assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
+ } else {
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+ }
+ }
+
+ if (ofBits <= 1) {
+ offset += (llBase==0);
+ if (offset) {
+ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+ seqState->prevOffset[1] = seqState->prevOffset[0];
+ seqState->prevOffset[0] = offset = temp;
+ } else { /* offset == 0 */
+ offset = seqState->prevOffset[0];
+ }
+ } else {
+ seqState->prevOffset[2] = seqState->prevOffset[1];
+ seqState->prevOffset[1] = seqState->prevOffset[0];
+ seqState->prevOffset[0] = offset;
+ }
+ seq.offset = offset;
+ }
+
+ seq.matchLength = mlBase
+ + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
+ if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+ BIT_reloadDStream(&seqState->DStream);
+ if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+ BIT_reloadDStream(&seqState->DStream);
+ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+
+ seq.litLength = llBase
+ + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
+ if (MEM_32bits())
+ BIT_reloadDStream(&seqState->DStream);
+
+ DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+ /* ANS state update */
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
+
+ return seq;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
- const void* seqStart, size_t seqSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
@@ -1089,26 +1357,17 @@ static size_t ZSTD_decompressSequences(
const BYTE* const base = (const BYTE*) (dctx->base);
const BYTE* const vBase = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
- int nbSeq;
DEBUGLOG(5, "ZSTD_decompressSequences");
- /* Build Decoding Tables */
- { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
- DEBUGLOG(5, "ZSTD_decodeSeqHeaders: size=%u, nbSeq=%i",
- (U32)seqHSize, nbSeq);
- if (ZSTD_isError(seqHSize)) return seqHSize;
- ip += seqHSize;
- }
-
/* Regen sequences */
if (nbSeq) {
seqState_t seqState;
dctx->fseEntropy = 1;
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
- FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
- FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
- FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
nbSeq--;
@@ -1120,7 +1379,7 @@ static size_t ZSTD_decompressSequences(
} }
/* check if reached exact end */
- DEBUGLOG(5, "after decode loop, remaining nbSeq : %i", nbSeq);
+ DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq);
if (nbSeq) return ERROR(corruption_detected);
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
@@ -1136,46 +1395,32 @@ static size_t ZSTD_decompressSequences(
return op-ostart;
}
-
-HINT_INLINE
-seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
+static size_t
+ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
{
- seq_t seq;
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
- U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
- U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
- U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
- U32 const llBits = LL_bits[llCode];
- U32 const mlBits = ML_bits[mlCode];
- U32 const ofBits = ofCode;
- U32 const totalBits = llBits+mlBits+ofBits;
- static const U32 LL_base[MaxLL+1] = {
- 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 18, 20, 22, 24, 28, 32, 40,
- 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
- 0x2000, 0x4000, 0x8000, 0x10000 };
-
- static const U32 ML_base[MaxML+1] = {
- 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18,
- 19, 20, 21, 22, 23, 24, 25, 26,
- 27, 28, 29, 30, 31, 32, 33, 34,
- 35, 37, 39, 41, 43, 47, 51, 59,
- 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
- 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
-
- static const U32 OF_base[MaxOff+1] = {
- 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
- 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
- 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
- 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
+FORCE_INLINE_TEMPLATE seq_t
+ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
+{
+ seq_t seq;
+ U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
+ U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
+ U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
+ U32 const totalBits = llBits+mlBits+ofBits;
+ U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
+ U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
+ U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
/* sequence */
{ size_t offset;
- if (!ofCode)
+ if (!ofBits)
offset = 0;
else {
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
@@ -1183,17 +1428,17 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
assert(ofBits <= MaxOff);
if (MEM_32bits() && longOffsets) {
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
- offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
} else {
- offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
}
}
- if (ofCode <= 1) {
- offset += (llCode==0);
+ if (ofBits <= 1) {
+ offset += (llBase==0);
if (offset) {
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
@@ -1211,7 +1456,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
seq.offset = offset;
}
- seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
+ seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
BIT_reloadDStream(&seqState->DStream);
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
@@ -1219,7 +1464,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
- seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
+ seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
if (MEM_32bits())
BIT_reloadDStream(&seqState->DStream);
@@ -1231,98 +1476,19 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
}
/* ANS state update */
- FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
- FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
- FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
return seq;
}
-
-HINT_INLINE
-size_t ZSTD_execSequenceLong(BYTE* op,
- BYTE* const oend, seq_t sequence,
- const BYTE** litPtr, const BYTE* const litLimit,
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
-{
- BYTE* const oLitEnd = op + sequence.litLength;
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
- const BYTE* match = sequence.match;
-
- /* check */
- if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
-
- /* copy Literals */
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
- if (sequence.litLength > 8)
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
- op = oLitEnd;
- *litPtr = iLitEnd; /* update for next sequence */
-
- /* copy Match */
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
- /* offset beyond prefix */
- if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
- if (match + sequence.matchLength <= dictEnd) {
- memmove(oLitEnd, match, sequence.matchLength);
- return sequenceLength;
- }
- /* span extDict & currentPrefixSegment */
- { size_t const length1 = dictEnd - match;
- memmove(oLitEnd, match, length1);
- op = oLitEnd + length1;
- sequence.matchLength -= length1;
- match = prefixStart;
- if (op > oend_w || sequence.matchLength < MINMATCH) {
- U32 i;
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
- return sequenceLength;
- }
- } }
- assert(op <= oend_w);
- assert(sequence.matchLength >= MINMATCH);
-
- /* match within prefix */
- if (sequence.offset < 8) {
- /* close range match, overlap */
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
- int const sub2 = dec64table[sequence.offset];
- op[0] = match[0];
- op[1] = match[1];
- op[2] = match[2];
- op[3] = match[3];
- match += dec32table[sequence.offset];
- ZSTD_copy4(op+4, match);
- match -= sub2;
- } else {
- ZSTD_copy8(op, match);
- }
- op += 8; match += 8;
-
- if (oMatchEnd > oend-(16-MINMATCH)) {
- if (op < oend_w) {
- ZSTD_wildcopy(op, match, oend_w - op);
- match += oend_w - op;
- op = oend_w;
- }
- while (op < oMatchEnd) *op++ = *match++;
- } else {
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
- }
- return sequenceLength;
-}
-
-static size_t ZSTD_decompressSequencesLong(
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequencesLong_body(
ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
- const void* seqStart, size_t seqSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
@@ -1335,13 +1501,6 @@ static size_t ZSTD_decompressSequencesLong(
const BYTE* const prefixStart = (const BYTE*) (dctx->base);
const BYTE* const dictStart = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
- int nbSeq;
-
- /* Build Decoding Tables */
- { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
- if (ZSTD_isError(seqHSize)) return seqHSize;
- ip += seqHSize;
- }
/* Regen sequences */
if (nbSeq) {
@@ -1358,18 +1517,18 @@ static size_t ZSTD_decompressSequencesLong(
seqState.pos = (size_t)(op-prefixStart);
seqState.dictEnd = dictEnd;
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
- FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
- FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
- FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
/* prepare in advance */
- for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
+ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
}
if (seqNb<seqAdvance) return ERROR(corruption_detected);
/* decode and decompress */
- for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
+ for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
@@ -1389,6 +1548,9 @@ static size_t ZSTD_decompressSequencesLong(
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+#undef STORED_SEQS
+#undef STOSEQ_MASK
+#undef ADVANCED_SEQS
}
/* last literal segment */
@@ -1401,6 +1563,96 @@ static size_t ZSTD_decompressSequencesLong(
return op-ostart;
}
+static size_t
+ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
+{
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
+
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
+{
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
+{
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
+
+#endif
+
+typedef size_t (*ZSTD_decompressSequences_t)(
+ ZSTD_DCtx *dctx, void *dst, size_t maxDstSize,
+ const void *seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset);
+
+static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
+{
+ DEBUGLOG(5, "ZSTD_decompressSequences");
+#if DYNAMIC_BMI2
+ if (dctx->bmi2) {
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+#endif
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
+
+static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
+{
+ DEBUGLOG(5, "ZSTD_decompressSequencesLong");
+#if DYNAMIC_BMI2
+ if (dctx->bmi2) {
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+#endif
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
+
+/* ZSTD_getLongOffsetsShare() :
+ * condition : offTable must be valid
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
+ * compared to maximum possible of (1<<OffFSELog) */
+static unsigned
+ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
+{
+ const void* ptr = offTable;
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
+ const ZSTD_seqSymbol* table = offTable + 1;
+ U32 const max = 1 << tableLog;
+ U32 u, total = 0;
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
+
+ assert(max <= (1 << OffFSELog)); /* max not too large */
+ for (u=0; u<max; u++) {
+ if (table[u].nbAdditionalBits > 22) total += 1;
+ }
+
+ assert(tableLog <= OffFSELog);
+ total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
+
+ return total;
+}
+
static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
@@ -1410,13 +1662,9 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
/* isLongOffset must be true if there are long offsets.
* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
* We don't expect that to be the case in 64-bit mode.
- * If we are in block mode we don't know the window size, so we have to be
- * conservative.
+ * In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit)
*/
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
- /* windowSize could be any value at this point, since it is only validated
- * in the streaming API.
- */
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
@@ -1428,9 +1676,24 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
ip += litCSize;
srcSize -= litCSize;
}
- if (frame && dctx->fParams.windowSize > (1<<23))
- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, isLongOffset);
- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, isLongOffset);
+
+ /* Build Decoding Tables */
+ { int nbSeq;
+ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
+ if (ZSTD_isError(seqHSize)) return seqHSize;
+ ip += seqHSize;
+ srcSize -= seqHSize;
+
+ if ( (!frame || dctx->fParams.windowSize > (1<<24))
+ && (nbSeq>0) ) { /* could probably use a larger nbSeq limit */
+ U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
+ if (shareLongOffsets >= minShare)
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
+ }
+
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
+ }
}
@@ -1758,7 +2021,7 @@ static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skip
* or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
- DEBUGLOG(5, "ZSTD_decompressContinue");
+ DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (U32)srcSize);
/* Sanity check */
if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */
if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
@@ -1819,12 +2082,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
case ZSTDds_decompressLastBlock:
case ZSTDds_decompressBlock:
- DEBUGLOG(5, "case ZSTDds_decompressBlock");
+ DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
{ size_t rSize;
switch(dctx->bType)
{
case bt_compressed:
- DEBUGLOG(5, "case bt_compressed");
+ DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
break;
case bt_raw :
@@ -1838,12 +2101,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
return ERROR(corruption_detected);
}
if (ZSTD_isError(rSize)) return rSize;
- DEBUGLOG(5, "decoded size from block : %u", (U32)rSize);
+ DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (U32)rSize);
dctx->decodedSize += rSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
- DEBUGLOG(4, "decoded size from frame : %u", (U32)dctx->decodedSize);
+ DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (U32)dctx->decodedSize);
if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
if (dctx->decodedSize != dctx->fParams.frameContentSize) {
return ERROR(corruption_detected);
@@ -1867,7 +2130,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
assert(srcSize == 4); /* guaranteed by dctx->expected */
{ U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
U32 const check32 = MEM_readLE32(src);
- DEBUGLOG(4, "checksum : calculated %08X :: %08X read", h32, check32);
+ DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", h32, check32);
if (check32 != h32) return ERROR(checksum_wrong);
dctx->expected = 0;
dctx->stage = ZSTDds_getFrameHeaderSize;
@@ -1925,8 +2188,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
U32 offcodeMaxValue = MaxOff, offcodeLog;
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+ if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
- CHECK_E(FSE_buildDTable(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
+ ZSTD_buildFSETable(entropy->OFTable,
+ offcodeNCount, offcodeMaxValue,
+ OF_base, OF_bits,
+ offcodeLog);
dictPtr += offcodeHeaderSize;
}
@@ -1934,8 +2201,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+ if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
- CHECK_E(FSE_buildDTable(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
+ ZSTD_buildFSETable(entropy->MLTable,
+ matchlengthNCount, matchlengthMaxValue,
+ ML_base, ML_bits,
+ matchlengthLog);
dictPtr += matchlengthHeaderSize;
}
@@ -1943,8 +2214,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
unsigned litlengthMaxValue = MaxLL, litlengthLog;
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+ if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
- CHECK_E(FSE_buildDTable(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
+ ZSTD_buildFSETable(entropy->LLTable,
+ litlengthNCount, litlengthMaxValue,
+ LL_base, LL_bits,
+ litlengthLog);
dictPtr += litlengthHeaderSize;
}
@@ -2062,13 +2337,23 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddi
return 0;
}
-static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
+static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, ZSTD_dictContentType_e dictContentType)
{
ddict->dictID = 0;
ddict->entropyPresent = 0;
- if (ddict->dictSize < 8) return 0;
+ if (dictContentType == ZSTD_dct_rawContent) return 0;
+
+ if (ddict->dictSize < 8) {
+ if (dictContentType == ZSTD_dct_fullDict)
+ return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
+ return 0; /* pure content mode */
+ }
{ U32 const magic = MEM_readLE32(ddict->dictContent);
- if (magic != ZSTD_MAGIC_DICTIONARY) return 0; /* pure content mode */
+ if (magic != ZSTD_MAGIC_DICTIONARY) {
+ if (dictContentType == ZSTD_dct_fullDict)
+ return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
+ return 0; /* pure content mode */
+ }
}
ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_frameIdSize);
@@ -2079,7 +2364,10 @@ static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
}
-static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
+static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
{
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
ddict->dictBuffer = NULL;
@@ -2095,12 +2383,15 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, const void* dict, size_
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
/* parse dictionary content */
- CHECK_F( ZSTD_loadEntropy_inDDict(ddict) );
+ CHECK_F( ZSTD_loadEntropy_inDDict(ddict, dictContentType) );
return 0;
}
-ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_customMem customMem)
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType,
+ ZSTD_customMem customMem)
{
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
@@ -2108,7 +2399,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_di
if (!ddict) return NULL;
ddict->cMem = customMem;
- if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod) )) {
+ if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod, dictContentType) )) {
ZSTD_freeDDict(ddict);
return NULL;
}
@@ -2124,7 +2415,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_di
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
- return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, allocator);
+ return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
}
/*! ZSTD_createDDict_byReference() :
@@ -2134,13 +2425,15 @@ ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
- return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, allocator);
+ return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
}
-ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
- const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod)
+const ZSTD_DDict* ZSTD_initStaticDDict(
+ void* workspace, size_t workspaceSize,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
{
size_t const neededSpace =
sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
@@ -2153,7 +2446,7 @@ ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
memcpy(ddict+1, dict, dictSize); /* local copy */
dict = ddict+1;
}
- if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef) ))
+ if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef, dictContentType) ))
return NULL;
return ddict;
}
@@ -2247,6 +2540,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
ZSTD_DStream* ZSTD_createDStream(void)
{
+ DEBUGLOG(3, "ZSTD_createDStream");
return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
}
@@ -2271,58 +2565,99 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds)
size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
-size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
+size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
{
- zds->streamStage = zdss_loadHeader;
- zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
- ZSTD_freeDDict(zds->ddictLocal);
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
+ ZSTD_freeDDict(dctx->ddictLocal);
if (dict && dictSize >= 8) {
- zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
- if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
- } else zds->ddictLocal = NULL;
- zds->ddict = zds->ddictLocal;
- zds->legacyVersion = 0;
- zds->hostageByte = 0;
+ dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
+ if (dctx->ddictLocal == NULL) return ERROR(memory_allocation);
+ } else {
+ dctx->ddictLocal = NULL;
+ }
+ dctx->ddict = dctx->ddictLocal;
+ return 0;
+}
+
+size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType);
+}
+
+size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
+{
+ return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+
+/* ZSTD_initDStream_usingDict() :
+ * return : expected size, aka ZSTD_frameHeaderSize_prefix.
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
+{
+ DEBUGLOG(4, "ZSTD_initDStream_usingDict");
+ zds->streamStage = zdss_init;
+ CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
return ZSTD_frameHeaderSize_prefix;
}
/* note : this variant can't fail */
size_t ZSTD_initDStream(ZSTD_DStream* zds)
{
+ DEBUGLOG(4, "ZSTD_initDStream");
return ZSTD_initDStream_usingDict(zds, NULL, 0);
}
+size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
+ dctx->ddict = ddict;
+ return 0;
+}
+
/* ZSTD_initDStream_usingDDict() :
* ddict will just be referenced, and must outlive decompression session
* this function cannot fail */
-size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict)
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
{
- size_t const initResult = ZSTD_initDStream(zds);
- zds->ddict = ddict;
+ size_t const initResult = ZSTD_initDStream(dctx);
+ dctx->ddict = ddict;
return initResult;
}
-size_t ZSTD_resetDStream(ZSTD_DStream* zds)
+/* ZSTD_resetDStream() :
+ * return : expected size, aka ZSTD_frameHeaderSize_prefix.
+ * this function cannot fail */
+size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
{
- zds->streamStage = zdss_loadHeader;
- zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
- zds->legacyVersion = 0;
- zds->hostageByte = 0;
+ DEBUGLOG(4, "ZSTD_resetDStream");
+ dctx->streamStage = zdss_loadHeader;
+ dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
+ dctx->legacyVersion = 0;
+ dctx->hostageByte = 0;
return ZSTD_frameHeaderSize_prefix;
}
-size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
+size_t ZSTD_setDStreamParameter(ZSTD_DStream* dctx,
ZSTD_DStreamParameter_e paramType, unsigned paramValue)
{
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
- if ((unsigned)zds->streamStage > (unsigned)zdss_loadHeader)
- return ERROR(stage_wrong);
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
switch(paramType)
{
default : return ERROR(parameter_unsupported);
case DStream_p_maxWindowSize :
DEBUGLOG(4, "setting maxWindowSize = %u KB", paramValue >> 10);
- zds->maxWindowSize = paramValue ? paramValue : (U32)(-1);
+ dctx->maxWindowSize = paramValue ? paramValue : (U32)(-1);
break;
}
return 0;
@@ -2330,9 +2665,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
{
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
- if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader)
- return ERROR(stage_wrong);
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
dctx->maxWindowSize = maxWindowSize;
return 0;
}
@@ -2340,17 +2673,15 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
{
DEBUGLOG(4, "ZSTD_DCtx_setFormat : %u", (unsigned)format);
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
- if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader)
- return ERROR(stage_wrong);
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
dctx->format = format;
return 0;
}
-size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds)
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
{
- return ZSTD_sizeof_DCtx(zds);
+ return ZSTD_sizeof_DCtx(dctx);
}
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
@@ -2417,23 +2748,25 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
}
DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
- if (zds->legacyVersion) {
- /* legacy support is incompatible with static dctx */
- if (zds->staticSize) return ERROR(memory_allocation);
- return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
- }
-#endif
-
while (someMoreWork) {
switch(zds->streamStage)
{
case zdss_init :
+ DEBUGLOG(5, "stage zdss_init => transparent reset ");
ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
/* fall-through */
case zdss_loadHeader :
DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+ if (zds->legacyVersion) {
+ /* legacy support is incompatible with static dctx */
+ if (zds->staticSize) return ERROR(memory_allocation);
+ { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
+ if (hint==0) zds->streamStage = zdss_init;
+ return hint;
+ } }
+#endif
{ size_t const hSize = ZSTD_getFrameHeader_internal(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
DEBUGLOG(5, "header size : %u", (U32)hSize);
if (ZSTD_isError(hSize)) {
@@ -2442,14 +2775,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if (legacyVersion) {
const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
+ DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
/* legacy support is incompatible with static dctx */
if (zds->staticSize) return ERROR(memory_allocation);
CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext,
zds->previousLegacyVersion, legacyVersion,
dict, dictSize));
zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
- return ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
- }
+ { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
+ if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */
+ return hint;
+ } }
#endif
return hSize; /* error */
}
@@ -2559,6 +2895,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if (ip==iend) { someMoreWork = 0; break; } /* no more input */
zds->streamStage = zdss_load;
/* fall-through */
+
case zdss_load:
{ size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
size_t const toLoad = neededInSize - zds->inPos;
@@ -2585,6 +2922,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
} }
zds->streamStage = zdss_flush;
/* fall-through */
+
case zdss_flush:
{ size_t const toFlushSize = zds->outEnd - zds->outStart;
size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
@@ -2631,8 +2969,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
return 1;
} /* nextSrcSizeHint==0 */
nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */
- if (zds->inPos > nextSrcSizeHint) return ERROR(GENERIC); /* should never happen */
- nextSrcSizeHint -= zds->inPos; /* already loaded*/
+ assert(zds->inPos <= nextSrcSizeHint);
+ nextSrcSizeHint -= zds->inPos; /* part already loaded*/
return nextSrcSizeHint;
}
}
diff --git a/thirdparty/zstd/zstd.h b/thirdparty/zstd/zstd.h
index 9ac0a73dce..6405da602e 100644
--- a/thirdparty/zstd/zstd.h
+++ b/thirdparty/zstd/zstd.h
@@ -45,11 +45,11 @@ extern "C" {
Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory.
Compression can be done in:
- a single step (described as Simple API)
- - a single step, reusing a context (described as Explicit memory management)
+ - a single step, reusing a context (described as Explicit context)
- unbounded multiple steps (described as Streaming compression)
The compression ratio achievable on small data can be highly improved using a dictionary in:
- a single step (described as Simple dictionary API)
- - a single step, reusing a dictionary (described as Fast dictionary API)
+ - a single step, reusing a dictionary (described as Bulk-processing dictionary API)
Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
Advanced experimental APIs shall never be used with a dynamic library.
@@ -59,7 +59,7 @@ extern "C" {
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 3
-#define ZSTD_VERSION_RELEASE 3
+#define ZSTD_VERSION_RELEASE 4
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll version */
@@ -68,7 +68,7 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll versio
#define ZSTD_QUOTE(str) #str
#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
-ZSTDLIB_API const char* ZSTD_versionString(void); /* v1.3.0 */
+ZSTDLIB_API const char* ZSTD_versionString(void); /* added in v1.3.0 */
/***************************************
@@ -92,7 +92,7 @@ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
-/*! ZSTD_getFrameContentSize() : v1.3.0
+/*! ZSTD_getFrameContentSize() : added in v1.3.0
* `src` should point to the start of a ZSTD encoded frame.
* `srcSize` must be at least as large as the frame header.
* hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
@@ -120,26 +120,24 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t
/*! ZSTD_getDecompressedSize() :
* NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
- * Both functions work the same way,
- * but ZSTD_getDecompressedSize() blends
- * "empty", "unknown" and "error" results in the same return value (0),
- * while ZSTD_getFrameContentSize() distinguishes them.
- *
- * 'src' is the start of a zstd compressed frame.
- * @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */
+ * Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ * "empty", "unknown" and "error" results to the same return value (0),
+ * while ZSTD_getFrameContentSize() gives them separate return values.
+ * `src` is the start of a zstd compressed frame.
+ * @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */
ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
/*====== Helper functions ======*/
#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
-ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */
+ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */
ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */
/***************************************
-* Explicit memory management
+* Explicit context
***************************************/
/*= Compression context
* When compressing many times,
@@ -345,7 +343,7 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output
* *******************************************************************************/
typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
- /* Continue to distinguish them for compatibility with versions <= v1.2.0 */
+ /* For compatibility with versions <= v1.2.0, continue to consider them separated. */
/*===== ZSTD_DStream management functions =====*/
ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
@@ -375,23 +373,24 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
/* --- Constants ---*/
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */
#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
-#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* v0.7+ */
+#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* >= v0.7.0 */
#define ZSTD_WINDOWLOG_MAX_32 30
#define ZSTD_WINDOWLOG_MAX_64 31
#define ZSTD_WINDOWLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
#define ZSTD_WINDOWLOG_MIN 10
-#define ZSTD_HASHLOG_MAX MIN(ZSTD_WINDOWLOG_MAX, 30)
+#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
#define ZSTD_HASHLOG_MIN 6
-#define ZSTD_CHAINLOG_MAX MIN(ZSTD_WINDOWLOG_MAX+1, 30)
+#define ZSTD_CHAINLOG_MAX_32 29
+#define ZSTD_CHAINLOG_MAX_64 30
+#define ZSTD_CHAINLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN
#define ZSTD_HASHLOG3_MAX 17
#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1)
#define ZSTD_SEARCHLOG_MIN 1
#define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
#define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */
-#define ZSTD_TARGETLENGTH_MIN 4 /* only useful for btopt */
-#define ZSTD_TARGETLENGTH_MAX 999 /* only useful for btopt */
+#define ZSTD_TARGETLENGTH_MIN 1 /* only used by btopt, btultra and btfast */
#define ZSTD_LDM_MINMATCH_MIN 4
#define ZSTD_LDM_MINMATCH_MAX 4096
#define ZSTD_LDM_BUCKETSIZELOG_MAX 8
@@ -432,12 +431,17 @@ typedef struct {
typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
-/*--- Custom memory allocation functions ---*/
-typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
-typedef void (*ZSTD_freeFunction) (void* opaque, void* address);
-typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
-/* use this constant to defer to stdlib's functions */
-static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };
+typedef enum {
+ ZSTD_dct_auto=0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+ ZSTD_dct_rawContent, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+ ZSTD_dct_fullDict /* refuses to load a dictionary if it does not respect Zstandard's specification */
+} ZSTD_dictContentType_e;
+
+typedef enum {
+ ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
+ ZSTD_dlm_byRef, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
+
/***************************************
@@ -483,12 +487,12 @@ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
/***************************************
-* Context memory usage
+* Memory management
***************************************/
/*! ZSTD_sizeof_*() :
* These functions give the current memory usage of selected object.
- * Object memory usage can evolve when re-used multiple times. */
+ * Object memory usage can evolve when re-used. */
ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
@@ -503,8 +507,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* It will also consider src size to be arbitrarily "large", which is worst case.
* If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
* ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is > 1.
- * Note : CCtx estimation is only correct for single-threaded compression */
+ * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbWorkers is >= 1.
+ * Note : CCtx size estimation is only correct for single-threaded compression. */
ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
@@ -515,8 +519,8 @@ ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
* It will also consider src size to be arbitrarily "large", which is worst case.
* If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
* ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is set to a value > 1.
- * Note : CStream estimation is only correct for single-threaded compression.
+ * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbWorkers is >= 1.
+ * Note : CStream size estimation is only correct for single-threaded compression.
* ZSTD_DStream memory budget depends on window Size.
* This information can be passed manually, using ZSTD_estimateDStreamSize,
* or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
@@ -529,84 +533,92 @@ ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_para
ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
-typedef enum {
- ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
- ZSTD_dlm_byRef, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
-} ZSTD_dictLoadMethod_e;
-
/*! ZSTD_estimate?DictSize() :
* ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
- * ZSTD_estimateCStreamSize_advanced_usingCParams() makes it possible to control precisely compression parameters, like ZSTD_createCDict_advanced().
- * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller
+ * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
*/
ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
-
-/***************************************
-* Advanced compression functions
-***************************************/
-/*! ZSTD_createCCtx_advanced() :
- * Create a ZSTD compression context using external alloc and free functions */
-ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
-
-/*! ZSTD_initStaticCCtx() : initialize a fixed-size zstd compression context
- * workspace: The memory area to emplace the context into.
- * Provided pointer must 8-bytes aligned.
- * It must outlive context usage.
- * workspaceSize: Use ZSTD_estimateCCtxSize() or ZSTD_estimateCStreamSize()
- * to determine how large workspace must be to support scenario.
- * @return : pointer to ZSTD_CCtx* (same address as workspace, but different type),
- * or NULL if error (typically size too small)
- * Note : zstd will never resize nor malloc() when using a static cctx.
- * If it needs more memory than available, it will simply error out.
+/*! ZSTD_initStatic*() :
+ * Initialize an object using a pre-allocated fixed-size buffer.
+ * workspace: The memory area to emplace the object into.
+ * Provided pointer *must be 8-bytes aligned*.
+ * Buffer must outlive object.
+ * workspaceSize: Use ZSTD_estimate*Size() to determine
+ * how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ * or NULL if error (size too small, incorrect alignment, etc.)
+ * Note : zstd will never resize nor malloc() when using a static buffer.
+ * If the object requires more memory than available,
+ * zstd will just error out (typically ZSTD_error_memory_allocation).
* Note 2 : there is no corresponding "free" function.
- * Since workspace was allocated externally, it must be freed externally too.
- * Limitation 1 : currently not compatible with internal CDict creation, such as
- * ZSTD_CCtx_loadDictionary() or ZSTD_initCStream_usingDict().
- * Limitation 2 : currently not compatible with multi-threading
+ * Since workspace is allocated externally, it must be freed externally too.
+ * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ * into its associated cParams.
+ * Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ * Limitation 2 : static cctx currently not compatible with multi-threading.
+ * Limitation 3 : static dctx is incompatible with legacy support.
*/
-ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */
+ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */
-/*! ZSTD_createCDict_byReference() :
- * Create a digested dictionary for compression
- * Dictionary content is simply referenced, and therefore stays in dictBuffer.
- * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+ void* workspace, size_t workspaceSize,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType,
+ ZSTD_compressionParameters cParams);
+
+ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+ void* workspace, size_t workspaceSize,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType);
+
+/*! Custom memory allocation :
+ * These prototypes make it possible to pass your own allocation/free functions.
+ * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ * All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */
+
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-typedef enum { ZSTD_dm_auto=0, /* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
- ZSTD_dm_rawContent, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
- ZSTD_dm_fullDict /* refuses to load a dictionary if it does not respect Zstandard's specification */
-} ZSTD_dictMode_e;
-/*! ZSTD_createCDict_advanced() :
- * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
ZSTD_compressionParameters cParams,
ZSTD_customMem customMem);
-/*! ZSTD_initStaticCDict() :
- * Generate a digested dictionary in provided memory area.
- * workspace: The memory area to emplace the dictionary into.
- * Provided pointer must 8-bytes aligned.
- * It must outlive dictionary usage.
- * workspaceSize: Use ZSTD_estimateCDictSize()
- * to determine how large workspace must be.
- * cParams : use ZSTD_getCParams() to transform a compression level
- * into its relevants cParams.
- * @return : pointer to ZSTD_CDict* (same address as workspace, but different type),
- * or NULL if error (typically, size too small).
- * Note : there is no corresponding "free" function.
- * Since workspace was allocated externally, it must be freed externally.
- */
-ZSTDLIB_API ZSTD_CDict* ZSTD_initStaticCDict(
- void* workspace, size_t workspaceSize,
- const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode,
- ZSTD_compressionParameters cParams);
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType,
+ ZSTD_customMem customMem);
+
+
+
+/***************************************
+* Advanced compression functions
+***************************************/
+
+/*! ZSTD_createCDict_byReference() :
+ * Create a digested dictionary for compression
+ * Dictionary content is simply referenced, and therefore stays in dictBuffer.
+ * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
/*! ZSTD_getCParams() :
* @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
@@ -652,28 +664,6 @@ ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
* Note 3 : Skippable Frame Identifiers are considered valid. */
ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
-/*! ZSTD_createDCtx_advanced() :
- * Create a ZSTD decompression context using external alloc and free functions */
-ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
-
-/*! ZSTD_initStaticDCtx() : initialize a fixed-size zstd decompression context
- * workspace: The memory area to emplace the context into.
- * Provided pointer must 8-bytes aligned.
- * It must outlive context usage.
- * workspaceSize: Use ZSTD_estimateDCtxSize() or ZSTD_estimateDStreamSize()
- * to determine how large workspace must be to support scenario.
- * @return : pointer to ZSTD_DCtx* (same address as workspace, but different type),
- * or NULL if error (typically size too small)
- * Note : zstd will never resize nor malloc() when using a static dctx.
- * If it needs more memory than available, it will simply error out.
- * Note 2 : static dctx is incompatible with legacy support
- * Note 3 : there is no corresponding "free" function.
- * Since workspace was allocated externally, it must be freed externally.
- * Limitation : currently not compatible with internal DDict creation,
- * such as ZSTD_initDStream_usingDict().
- */
-ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
-
/*! ZSTD_createDDict_byReference() :
* Create a digested dictionary, ready to start decompression operation without startup delay.
* Dictionary content is referenced, and therefore stays in dictBuffer.
@@ -681,26 +671,6 @@ ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize
* it must remain read accessible throughout the lifetime of DDict */
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
-/*! ZSTD_createDDict_advanced() :
- * Create a ZSTD_DDict using external alloc and free, optionally by reference */
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_customMem customMem);
-
-/*! ZSTD_initStaticDDict() :
- * Generate a digested dictionary in provided memory area.
- * workspace: The memory area to emplace the dictionary into.
- * Provided pointer must 8-bytes aligned.
- * It must outlive dictionary usage.
- * workspaceSize: Use ZSTD_estimateDDictSize()
- * to determine how large workspace must be.
- * @return : pointer to ZSTD_DDict*, or NULL if error (size too small)
- * Note : there is no corresponding "free" function.
- * Since workspace was allocated externally, it must be freed externally.
- */
-ZSTDLIB_API ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
- const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod);
/*! ZSTD_getDictID_fromDict() :
* Provides the dictID stored within dictionary.
@@ -732,8 +702,6 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
********************************************************************/
/*===== Advanced Streaming compression functions =====*/
-ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */
ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */
ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/
ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
@@ -748,14 +716,28 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const
* If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
* If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
* For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
- * but it may change to mean "empty" in some future version, so prefer using macro ZSTD_CONTENTSIZE_UNKNOWN.
+ * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+typedef struct {
+ unsigned long long ingested;
+ unsigned long long consumed;
+ unsigned long long produced;
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression():
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Therefore, (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Can report progression inside worker threads (multi-threading and non-blocking mode).
+ */
+ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+
+
+
/*===== Advanced Streaming decompression functions =====*/
-ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */
typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); /* obsolete : this API will be removed in a future version */
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */
@@ -924,10 +906,8 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
* and then applied on all subsequent compression jobs.
* When no parameter is ever provided, CCtx is created with compression level ZSTD_CLEVEL_DEFAULT.
*
- * This API is intended to replace all others experimental API.
- * It can basically do all other use cases, and even new ones.
- * In constrast with _advanced() variants, it stands a reasonable chance to become "stable",
- * after a good testing period.
+ * This API is intended to replace all others advanced / experimental API entry points.
+ * But it stands a reasonable chance to become "stable", after a reasonable testing period.
*/
/* note on naming convention :
@@ -944,12 +924,12 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
* All enum will be pinned to explicit values before reaching "stable API" status */
typedef enum {
- /* Question : should we have a format ZSTD_f_auto ?
- * For the time being, it would mean exactly the same as ZSTD_f_zstd1.
- * But, in the future, should several formats be supported,
+ /* Opened question : should we have a format ZSTD_f_auto ?
+ * Today, it would mean exactly the same as ZSTD_f_zstd1.
+ * But, in the future, should several formats become supported,
* on the compression side, it would mean "default format".
- * On the decompression side, it would mean "multi format",
- * and ZSTD_f_zstd1 could be reserved to mean "accept *only* zstd frames".
+ * On the decompression side, it would mean "automatic format detection",
+ * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
* Since meaning is a little different, another option could be to define different enums for compression and decompression.
* This question could be kept for later, when there are actually multiple formats to support,
* but there is also the question of pinning enum values, and pinning value `0` is especially important */
@@ -967,42 +947,76 @@ typedef enum {
/* compression parameters */
ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
* Default level is ZSTD_CLEVEL_DEFAULT==3.
- * Special: value 0 means "do not change cLevel". */
+ * Special: value 0 means "do not change cLevel".
+ * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
+ * Note 2 : setting a level sets all default values of other compression parameters.
+ * Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
ZSTD_p_windowLog, /* Maximum allowed back-reference distance, expressed as power of 2.
* Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
- * Special: value 0 means "do not change windowLog".
+ * Special: value 0 means "use default windowLog".
* Note: Using a window size greater than ZSTD_MAXWINDOWSIZE_DEFAULT (default: 2^27)
- * requires setting the maximum window size at least as large during decompression. */
+ * requires explicitly allowing such window size during decompression stage. */
ZSTD_p_hashLog, /* Size of the probe table, as a power of 2.
* Resulting table size is (1 << (hashLog+2)).
* Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
* Larger tables improve compression ratio of strategies <= dFast,
* and improve speed of strategies > dFast.
- * Special: value 0 means "do not change hashLog". */
+ * Special: value 0 means "use default hashLog". */
ZSTD_p_chainLog, /* Size of the full-search table, as a power of 2.
* Resulting table size is (1 << (chainLog+2)).
* Larger tables result in better and slower compression.
* This parameter is useless when using "fast" strategy.
- * Special: value 0 means "do not change chainLog". */
+ * Special: value 0 means "use default chainLog". */
ZSTD_p_searchLog, /* Number of search attempts, as a power of 2.
* More attempts result in better and slower compression.
* This parameter is useless when using "fast" and "dFast" strategies.
- * Special: value 0 means "do not change searchLog". */
+ * Special: value 0 means "use default searchLog". */
ZSTD_p_minMatch, /* Minimum size of searched matches (note : repCode matches can be smaller).
* Larger values make faster compression and decompression, but decrease ratio.
* Must be clamped between ZSTD_SEARCHLENGTH_MIN and ZSTD_SEARCHLENGTH_MAX.
* Note that currently, for all strategies < btopt, effective minimum is 4.
- * Note that currently, for all strategies > fast, effective maximum is 6.
- * Special: value 0 means "do not change minMatchLength". */
- ZSTD_p_targetLength, /* Only useful for strategies >= btopt.
- * Length of Match considered "good enough" to stop search.
- * Larger values make compression stronger and slower.
- * Special: value 0 means "do not change targetLength". */
+ * , for all strategies > fast, effective maximum is 6.
+ * Special: value 0 means "use default minMatchLength". */
+ ZSTD_p_targetLength, /* Impact of this field depends on strategy.
+ * For strategies btopt & btultra:
+ * Length of Match considered "good enough" to stop search.
+ * Larger values make compression stronger, and slower.
+ * For strategy fast:
+ * Distance between match sampling.
+ * Larger values make compression faster, and weaker.
+ * Special: value 0 means "use default targetLength". */
ZSTD_p_compressionStrategy, /* See ZSTD_strategy enum definition.
* Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility.
* The higher the value of selected strategy, the more complex it is,
* resulting in stronger and slower compression.
- * Special: value 0 means "do not change strategy". */
+ * Special: value 0 means "use default strategy". */
+
+ ZSTD_p_enableLongDistanceMatching=160, /* Enable long distance matching.
+ * This parameter is designed to improve compression ratio
+ * for large inputs, by finding large matches at long distance.
+ * It increases memory usage and window size.
+ * Note: enabling this parameter increases ZSTD_p_windowLog to 128 MB
+ * except when expressly set to a different value. */
+ ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2.
+ * Larger values increase memory usage and compression ratio,
+ * but decrease compression speed.
+ * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+ * default: windowlog - 7.
+ * Special: value 0 means "automatically determine hashlog". */
+ ZSTD_p_ldmMinMatch, /* Minimum match size for long distance matcher.
+ * Larger/too small values usually decrease compression ratio.
+ * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+ * Special: value 0 means "use default value" (default: 64). */
+ ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution.
+ * Larger values improve collision resolution but decrease compression speed.
+ * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX .
+ * Special: value 0 means "use default value" (default: 3). */
+ ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table.
+ * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+ * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+ * Larger values improve compression speed.
+ * Deviating far from default value will likely result in a compression ratio decrease.
+ * Special: value 0 means "automatically determine hashEveryLog". */
/* frame parameters */
ZSTD_p_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
@@ -1012,58 +1026,45 @@ typedef enum {
ZSTD_p_dictIDFlag, /* When applicable, dictionary's ID is written into frame header (default:1) */
/* multi-threading parameters */
- ZSTD_p_nbThreads=400, /* Select how many threads a compression job can spawn (default:1)
- * More threads improve speed, but also increase memory usage.
- * Can only receive a value > 1 if ZSTD_MULTITHREAD is enabled.
- * Special: value 0 means "do not change nbThreads" */
- ZSTD_p_jobSize, /* Size of a compression job. This value is only enforced in streaming (non-blocking) mode.
- * Each compression job is completed in parallel, so indirectly controls the nb of active threads.
+ /* These parameters are only useful if multi-threading is enabled (ZSTD_MULTITHREAD).
+ * They return an error otherwise. */
+ ZSTD_p_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel.
+ * When nbWorkers >= 1, triggers asynchronous mode :
+ * ZSTD_compress_generic() consumes some input, flush some output if possible, and immediately gives back control to caller,
+ * while compression work is performed in parallel, within worker threads.
+ * (note : a strong exception to this rule is when first invocation sets ZSTD_e_end : it becomes a blocking call).
+ * More workers improve speed, but also increase memory usage.
+ * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
+ ZSTD_p_jobSize, /* Size of a compression job. This value is enforced only in non-blocking mode.
+ * Each compression job is completed in parallel, so this value indirectly controls the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters.
- * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest
+ * Job size must be a minimum of overlapSize, or 1 MB, whichever is largest.
* The minimum size is automatically and transparently enforced */
ZSTD_p_overlapSizeLog, /* Size of previous input reloaded at the beginning of each job.
* 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */
- /* advanced parameters - may not remain available after API update */
+ /* =================================================================== */
+ /* experimental parameters - no stability guaranteed */
+ /* =================================================================== */
+
+ ZSTD_p_compressLiterals=1000, /* control huffman compression of literals (enabled) by default.
+ * disabling it improves speed and decreases compression ratio by a large amount.
+ * note : this setting is automatically updated when changing compression level.
+ * positive compression levels set ZSTD_p_compressLiterals to 1.
+ * negative compression levels set ZSTD_p_compressLiterals to 0. */
+
ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
* even when referencing into Dictionary content (default:0) */
- ZSTD_p_enableLongDistanceMatching=1200, /* Enable long distance matching.
- * This parameter is designed to improve the compression
- * ratio for large inputs with long distance matches.
- * This increases the memory usage as well as window size.
- * Note: setting this parameter sets all the LDM parameters
- * as well as ZSTD_p_windowLog. It should be set after
- * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and
- * other LDM parameters. Setting the compression level
- * after this parameter overrides the window log, though LDM
- * will remain enabled until explicitly disabled. */
- ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2.
- * Larger values increase memory usage and compression ratio, but decrease
- * compression speed.
- * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
- * (default: windowlog - 7). */
- ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher.
- * Larger/too small values usually decrease compression ratio.
- * Must be clamped between ZSTD_LDM_MINMATCH_MIN
- * and ZSTD_LDM_MINMATCH_MAX (default: 64). */
- ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution.
- * Larger values usually improve collision resolution but may decrease
- * compression speed.
- * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX (default: 3). */
- ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table.
- * The default is MAX(0, (windowLog - ldmHashLog)) to
- * optimize hash table usage.
- * Larger values improve compression speed. Deviating far from the
- * default value will likely result in a decrease in compression ratio.
- * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. */
} ZSTD_cParameter;
/*! ZSTD_CCtx_setParameter() :
* Set one compression parameter, selected by enum ZSTD_cParameter.
+ * Setting a parameter is generally only possible during frame initialization (before starting compression),
+ * except for a few exceptions which can be updated during compression: compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
* Note : when `value` is an enum, cast it to unsigned for proper type checking.
- * @result : informational value (typically, the one being set, possibly corrected),
+ * @result : informational value (typically, value being set clamped correctly),
* or an error code (which can be tested with ZSTD_isError()). */
ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
@@ -1079,26 +1080,24 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param
ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
/*! ZSTD_CCtx_loadDictionary() :
- * Create an internal CDict from dict buffer.
- * Decompression will have to use same buffer.
+ * Create an internal CDict from `dict` buffer.
+ * Decompression will have to use same dictionary.
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
- * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
- * meaning "return to no-dictionary mode".
- * Note 1 : `dict` content will be copied internally. Use
- * ZSTD_CCtx_loadDictionary_byReference() to reference dictionary
- * content instead. The dictionary buffer must then outlive its
- * users.
+ * Special: Adding a NULL (or 0-size) dictionary invalidates previous dictionary,
+ * meaning "return to no-dictionary mode".
+ * Note 1 : Dictionary will be used for all future compression jobs.
+ * To return to "no-dictionary" situation, load a NULL dictionary
* Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
* For this reason, compression parameters cannot be changed anymore after loading a dictionary.
- * It's also a CPU-heavy operation, with non-negligible impact on latency.
- * Note 3 : Dictionary will be used for all future compression jobs.
- * To return to "no-dictionary" situation, load a NULL dictionary
- * Note 5 : Use ZSTD_CCtx_loadDictionary_advanced() to select how dictionary
- * content will be interpreted.
- */
+ * It's also a CPU consuming operation, with non-negligible impact on latency.
+ * Note 3 :`dict` content will be copied internally.
+ * Use ZSTD_CCtx_loadDictionary_byReference() to reference dictionary content instead.
+ * In such a case, dictionary buffer must outlive its users.
+ * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ * to precisely select how dictionary content must be interpreted. */
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
-ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode);
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
/*! ZSTD_CCtx_refCDict() :
@@ -1110,8 +1109,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void
* Special : adding a NULL CDict means "return to no-dictionary mode".
* Note 1 : Currently, only one dictionary can be managed.
* Adding a new dictionary effectively "discards" any previous one.
- * Note 2 : CDict is just referenced, its lifetime must outlive CCtx.
- */
+ * Note 2 : CDict is just referenced, its lifetime must outlive CCtx. */
ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
/*! ZSTD_CCtx_refPrefix() :
@@ -1121,20 +1119,29 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
* Subsequent compression jobs will be done without prefix (if none is explicitly referenced).
* If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead.
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
- * Special : Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
* Note 1 : Prefix buffer is referenced. It must outlive compression job.
* Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
- * It's a CPU-heavy operation, with non-negligible impact on latency.
- * Note 3 : By default, the prefix is treated as raw content
- * (ZSTD_dm_rawContent). Use ZSTD_CCtx_refPrefix_advanced() to alter
- * dictMode. */
+ * It's a CPU consuming operation, with non-negligible impact on latency.
+ * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
+ * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. */
ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);
-ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode);
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_CCtx_reset() :
+ * Return a CCtx to clean state.
+ * Useful after an error, or to interrupt an ongoing compression job and start a new one.
+ * Any internal data not yet flushed is cancelled.
+ * Dictionary (if any) is dropped.
+ * All parameters are back to default values.
+ * It's possible to modify compression parameters after a reset.
+ */
+ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx);
typedef enum {
- ZSTD_e_continue=0, /* collect more data, encoder transparently decides when to output result, for optimal conditions */
+ ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal conditions */
ZSTD_e_flush, /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */
ZSTD_e_end /* flush any remaining data and close current frame. Any additional data starts a new frame. */
} ZSTD_EndDirective;
@@ -1150,10 +1157,11 @@ typedef enum {
* and then immediately returns, just indicating that there is some data remaining to be flushed.
* The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
* - Exception : in multi-threading mode, if the first call requests a ZSTD_e_end directive, it is blocking : it will complete compression before giving back control to caller.
- * - @return provides the minimum amount of data remaining to be flushed from internal buffers
+ * - @return provides a minimum amount of data remaining to be flushed from internal buffers
* or an error code, which can be tested using ZSTD_isError().
* if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
- * This is useful to determine if a ZSTD_e_flush or ZSTD_e_end directive is completed.
+ * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
* - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
* only ZSTD_e_end or ZSTD_e_flush operations are allowed.
* Before starting a new compression job, or changing compression parameters,
@@ -1164,16 +1172,6 @@ ZSTDLIB_API size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
-/*! ZSTD_CCtx_reset() :
- * Return a CCtx to clean state.
- * Useful after an error, or to interrupt an ongoing compression job and start a new one.
- * Any internal data not yet flushed is cancelled.
- * Dictionary (if any) is dropped.
- * All parameters are back to default values.
- * It's possible to modify compression parameters after a reset.
- */
-ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx); /* Not ready yet ! */
-
/*! ZSTD_compress_generic_simpleArgs() :
* Same as ZSTD_compress_generic(),
@@ -1207,25 +1205,26 @@ ZSTDLIB_API size_t ZSTD_compress_generic_simpleArgs (
* for static allocation for single-threaded compression.
*/
ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
-/*! ZSTD_resetCCtxParams() :
- * Reset params to default, with the default compression level.
+
+/*! ZSTD_CCtxParams_reset() :
+ * Reset params to default values.
*/
-ZSTDLIB_API size_t ZSTD_resetCCtxParams(ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
-/*! ZSTD_initCCtxParams() :
+/*! ZSTD_CCtxParams_init() :
* Initializes the compression parameters of cctxParams according to
* compression level. All other parameters are reset to their default values.
*/
-ZSTDLIB_API size_t ZSTD_initCCtxParams(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
-/*! ZSTD_initCCtxParams_advanced() :
+/*! ZSTD_CCtxParams_init_advanced() :
* Initializes the compression and frame parameters of cctxParams according to
* params. All other parameters are reset to their default values.
*/
-ZSTDLIB_API size_t ZSTD_initCCtxParams_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
-ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
/*! ZSTD_CCtxParam_setParameter() :
* Similar to ZSTD_CCtx_setParameter.
@@ -1238,9 +1237,10 @@ ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cP
/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
* Apply a set of ZSTD_CCtx_params to the compression context.
- * This must be done before the dictionary is loaded.
- * The pledgedSrcSize is treated as unknown.
- * Multithreading parameters are applied only if nbThreads > 1.
+ * This can be done even after compression is started,
+ * if nbWorkers==0, this will have no impact until a new compression is started.
+ * if nbWorkers>=1, new parameters will be picked up at next job,
+ * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
*/
ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
@@ -1267,9 +1267,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
* Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to select
* how dictionary content will be interpreted and loaded.
*/
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); /* not implemented */
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); /* not implemented */
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode); /* not implemented */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
/*! ZSTD_DCtx_refDDict() :
@@ -1281,7 +1281,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void
* Special : adding a NULL DDict means "return to no-dictionary mode".
* Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
*/
-ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); /* not implemented */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
/*! ZSTD_DCtx_refPrefix() :
@@ -1295,8 +1295,8 @@ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
* Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode.
* Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
*/
-ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); /* not implemented */
-ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode); /* not implemented */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize);
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
/*! ZSTD_DCtx_setMaxWindowSize() :
@@ -1389,7 +1389,7 @@ ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DCtx* dctx);
ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx);
ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression */
+ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */