summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.appveyor.yml3
-rw-r--r--core/io/multiplayer_api.cpp30
-rw-r--r--core/io/multiplayer_api.h30
-rw-r--r--core/math/math_2d.h9
-rw-r--r--core/math/matrix3.cpp13
-rw-r--r--core/math/matrix3.h2
-rw-r--r--core/math/quat.cpp16
-rw-r--r--core/math/quat.h24
-rw-r--r--core/math/vector3.h10
-rw-r--r--core/pool_allocator.cpp2
-rw-r--r--core/variant_call.cpp18
-rw-r--r--doc/classes/Basis.xml18
-rw-r--r--doc/classes/EditorInterface.xml2
-rw-r--r--doc/classes/EditorSettings.xml24
-rw-r--r--doc/classes/Image.xml2
-rw-r--r--doc/classes/ImageTexture.xml4
-rw-r--r--doc/classes/Quat.xml41
-rw-r--r--doc/classes/Vector2.xml12
-rw-r--r--doc/classes/Vector3.xml12
-rw-r--r--editor/editor_inspector.cpp30
-rw-r--r--editor/editor_inspector.h30
-rw-r--r--editor/editor_properties.cpp30
-rw-r--r--editor/editor_properties.h30
-rw-r--r--editor/editor_settings.cpp5
-rw-r--r--editor/editor_settings.h2
-rw-r--r--editor/editor_spin_slider.cpp30
-rw-r--r--editor/editor_spin_slider.h30
-rw-r--r--editor/import/resource_importer_bitmask.cpp30
-rw-r--r--editor/import/resource_importer_bitmask.h30
-rw-r--r--editor/plugins/canvas_item_editor_plugin.cpp128
-rw-r--r--editor/plugins/canvas_item_editor_plugin.h2
-rw-r--r--editor/plugins/script_editor_plugin.cpp3
-rw-r--r--editor/plugins/skeleton_2d_editor_plugin.cpp30
-rw-r--r--editor/plugins/skeleton_2d_editor_plugin.h30
-rw-r--r--editor/plugins/sprite_editor_plugin.cpp30
-rw-r--r--editor/plugins/sprite_editor_plugin.h30
-rw-r--r--editor/scene_tree_dock.cpp14
-rw-r--r--editor/scene_tree_dock.h1
-rw-r--r--main/timer_sync.cpp30
-rw-r--r--main/timer_sync.h30
-rw-r--r--modules/csg/csg.cpp30
-rw-r--r--modules/csg/csg.h30
-rw-r--r--modules/csg/csg_gizmos.cpp30
-rw-r--r--modules/csg/csg_gizmos.h30
-rw-r--r--modules/csg/csg_shape.cpp30
-rw-r--r--modules/csg/csg_shape.h30
-rw-r--r--modules/mono/glue/builtin_types_glue.h30
-rw-r--r--modules/mono/glue/cs_files/GodotTaskScheduler.cs2
-rw-r--r--platform/javascript/SCsub6
-rw-r--r--platform/javascript/detect.py17
-rw-r--r--scene/2d/mesh_instance_2d.cpp30
-rw-r--r--scene/2d/mesh_instance_2d.h30
-rw-r--r--scene/2d/skeleton_2d.cpp30
-rw-r--r--scene/2d/skeleton_2d.h30
-rw-r--r--scene/2d/tile_map.cpp3
-rw-r--r--scene/resources/texture.cpp9
-rw-r--r--scene/resources/texture.h2
-rw-r--r--servers/physics/collision_solver_sat.cpp18
-rw-r--r--thirdparty/README.md2
-rw-r--r--thirdparty/zstd/common/bitstream.h2
-rw-r--r--thirdparty/zstd/common/compiler.h25
-rw-r--r--thirdparty/zstd/common/cpu.h216
-rw-r--r--thirdparty/zstd/common/error_private.c1
-rw-r--r--thirdparty/zstd/common/fse.h2
-rw-r--r--thirdparty/zstd/common/fse_decompress.c4
-rw-r--r--thirdparty/zstd/common/huf.h203
-rw-r--r--thirdparty/zstd/common/pool.c63
-rw-r--r--thirdparty/zstd/common/pool.h27
-rw-r--r--thirdparty/zstd/common/threading.h24
-rw-r--r--thirdparty/zstd/common/zstd_errors.h23
-rw-r--r--thirdparty/zstd/common/zstd_internal.h9
-rw-r--r--thirdparty/zstd/compress/fse_compress.c30
-rw-r--r--thirdparty/zstd/compress/huf_compress.c222
-rw-r--r--thirdparty/zstd/compress/zstd_compress.c1617
-rw-r--r--thirdparty/zstd/compress/zstd_compress_internal.h315
-rw-r--r--thirdparty/zstd/compress/zstd_double_fast.c142
-rw-r--r--thirdparty/zstd/compress/zstd_double_fast.h15
-rw-r--r--thirdparty/zstd/compress/zstd_fast.c158
-rw-r--r--thirdparty/zstd/compress/zstd_fast.h16
-rw-r--r--thirdparty/zstd/compress/zstd_lazy.c607
-rw-r--r--thirdparty/zstd/compress/zstd_lazy.h49
-rw-r--r--thirdparty/zstd/compress/zstd_ldm.c664
-rw-r--r--thirdparty/zstd/compress/zstd_ldm.h85
-rw-r--r--thirdparty/zstd/compress/zstd_opt.c250
-rw-r--r--thirdparty/zstd/compress/zstd_opt.h22
-rw-r--r--thirdparty/zstd/compress/zstdmt_compress.c1580
-rw-r--r--thirdparty/zstd/compress/zstdmt_compress.h48
-rw-r--r--thirdparty/zstd/decompress/huf_decompress.c680
-rw-r--r--thirdparty/zstd/decompress/zstd_decompress.c1216
-rw-r--r--thirdparty/zstd/zstd.h508
90 files changed, 6747 insertions, 3312 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index 708961fd92..7691f65d6a 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -17,7 +17,8 @@ cache:
install:
- SET "PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
- - pip install --egg scons # it will fail on AppVeyor without --egg flag
+ - pip install -U wheel # needed for pip install scons to work, otherwise a flag is missing
+ - pip install scons
- if defined VS call "%VS%" %ARCH% # if defined - so we can also use mingw
before_build:
diff --git a/core/io/multiplayer_api.cpp b/core/io/multiplayer_api.cpp
index 456d29520f..b0f2ca754d 100644
--- a/core/io/multiplayer_api.cpp
+++ b/core/io/multiplayer_api.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* multiplayer_api.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "core/io/multiplayer_api.h"
#include "core/io/marshalls.h"
#include "scene/main/node.h"
diff --git a/core/io/multiplayer_api.h b/core/io/multiplayer_api.h
index 25f445004d..64f59d32d8 100644
--- a/core/io/multiplayer_api.h
+++ b/core/io/multiplayer_api.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* multiplayer_api.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef MULTIPLAYER_PROTOCOL_H
#define MULTIPLAYER_PROTOCOL_H
diff --git a/core/math/math_2d.h b/core/math/math_2d.h
index 611d47e3ff..25c39e5d7a 100644
--- a/core/math/math_2d.h
+++ b/core/math/math_2d.h
@@ -112,6 +112,7 @@ struct Vector2 {
_FORCE_INLINE_ static Vector2 linear_interpolate(const Vector2 &p_a, const Vector2 &p_b, real_t p_t);
_FORCE_INLINE_ Vector2 linear_interpolate(const Vector2 &p_b, real_t p_t) const;
+ _FORCE_INLINE_ Vector2 slerp(const Vector2 &p_b, real_t p_t) const;
Vector2 cubic_interpolate(const Vector2 &p_b, const Vector2 &p_pre_a, const Vector2 &p_post_b, real_t p_t) const;
Vector2 slide(const Vector2 &p_normal) const;
@@ -263,6 +264,14 @@ Vector2 Vector2::linear_interpolate(const Vector2 &p_b, real_t p_t) const {
return res;
}
+Vector2 Vector2::slerp(const Vector2 &p_b, real_t p_t) const {
+#ifdef MATH_CHECKS
+ ERR_FAIL_COND_V(is_normalized() == false, Vector2());
+#endif
+ real_t theta = angle_to(p_b);
+ return rotated(theta * p_t);
+}
+
Vector2 Vector2::linear_interpolate(const Vector2 &p_a, const Vector2 &p_b, real_t p_t) {
Vector2 res = p_a;
diff --git a/core/math/matrix3.cpp b/core/math/matrix3.cpp
index b0b05d1ec8..8ee8ccb457 100644
--- a/core/math/matrix3.cpp
+++ b/core/math/matrix3.cpp
@@ -826,3 +826,16 @@ void Basis::set_diagonal(const Vector3 p_diag) {
elements[2][1] = 0;
elements[2][2] = p_diag.z;
}
+
+Basis Basis::slerp(const Basis &target, const real_t &t) const {
+ // TODO: implement this directly without using quaternions to make it more efficient
+#ifdef MATH_CHECKS
+ ERR_FAIL_COND_V(is_rotation() == false, Basis());
+ ERR_FAIL_COND_V(target.is_rotation() == false, Basis());
+#endif
+
+ Quat from(*this);
+ Quat to(target);
+
+ return Basis(from.slerp(to, t));
+}
diff --git a/core/math/matrix3.h b/core/math/matrix3.h
index fd383fc673..63d4f5d79d 100644
--- a/core/math/matrix3.h
+++ b/core/math/matrix3.h
@@ -155,6 +155,8 @@ public:
bool is_diagonal() const;
bool is_rotation() const;
+ Basis slerp(const Basis &target, const real_t &t) const;
+
operator String() const;
/* create / set */
diff --git a/core/math/quat.cpp b/core/math/quat.cpp
index 4f61401ac7..b938fc3cfd 100644
--- a/core/math/quat.cpp
+++ b/core/math/quat.cpp
@@ -98,6 +98,9 @@ void Quat::set_euler_yxz(const Vector3 &p_euler) {
// and similar for other axes.
// This implementation uses YXZ convention (Z is the first rotation).
Vector3 Quat::get_euler_yxz() const {
+#ifdef MATH_CHECKS
+ ERR_FAIL_COND_V(is_normalized() == false, Vector3(0, 0, 0));
+#endif
Basis m(*this);
return m.get_euler_yxz();
}
@@ -135,11 +138,17 @@ bool Quat::is_normalized() const {
}
Quat Quat::inverse() const {
+#ifdef MATH_CHECKS
+ ERR_FAIL_COND_V(is_normalized() == false, Quat(0, 0, 0, 0));
+#endif
return Quat(-x, -y, -z, w);
}
Quat Quat::slerp(const Quat &q, const real_t &t) const {
-
+#ifdef MATH_CHECKS
+ ERR_FAIL_COND_V(is_normalized() == false, Quat(0, 0, 0, 0));
+ ERR_FAIL_COND_V(q.is_normalized() == false, Quat(0, 0, 0, 0));
+#endif
Quat to1;
real_t omega, cosom, sinom, scale0, scale1;
@@ -215,7 +224,10 @@ Quat::operator String() const {
return String::num(x) + ", " + String::num(y) + ", " + String::num(z) + ", " + String::num(w);
}
-Quat::Quat(const Vector3 &axis, const real_t &angle) {
+void Quat::set_axis_angle(const Vector3 &axis, const real_t &angle) {
+#ifdef MATH_CHECKS
+ ERR_FAIL_COND(axis.is_normalized() == false);
+#endif
real_t d = axis.length();
if (d == 0)
set(0, 0, 0, 0);
diff --git a/core/math/quat.h b/core/math/quat.h
index ebc924504b..3e1344a913 100644
--- a/core/math/quat.h
+++ b/core/math/quat.h
@@ -64,11 +64,13 @@ public:
Quat slerpni(const Quat &q, const real_t &t) const;
Quat cubic_slerp(const Quat &q, const Quat &prep, const Quat &postq, const real_t &t) const;
+ void set_axis_angle(const Vector3 &axis, const real_t &angle);
_FORCE_INLINE_ void get_axis_angle(Vector3 &r_axis, real_t &r_angle) const {
r_angle = 2 * Math::acos(w);
- r_axis.x = x / Math::sqrt(1 - w * w);
- r_axis.y = y / Math::sqrt(1 - w * w);
- r_axis.z = z / Math::sqrt(1 - w * w);
+ real_t r = ((real_t)1) / Math::sqrt(1 - w * w);
+ r_axis.x = x * r;
+ r_axis.y = y * r;
+ r_axis.z = z * r;
}
void operator*=(const Quat &q);
@@ -83,9 +85,9 @@ public:
_FORCE_INLINE_ Vector3 xform(const Vector3 &v) const {
- Quat q = *this * v;
- q *= this->inverse();
- return Vector3(q.x, q.y, q.z);
+ Vector3 u(x, y, z);
+ Vector3 uv = u.cross(v);
+ return v + ((uv * w) + u.cross(uv)) * ((real_t)2);
}
_FORCE_INLINE_ void operator+=(const Quat &q);
@@ -115,7 +117,15 @@ public:
z = p_z;
w = p_w;
}
- Quat(const Vector3 &axis, const real_t &angle);
+ Quat(const Vector3 &axis, const real_t &angle) { set_axis_angle(axis, angle); }
+
+ Quat(const Vector3 &euler) { set_euler(euler); }
+ Quat(const Quat &q) {
+ x = q.x;
+ y = q.y;
+ z = q.z;
+ w = q.w;
+ }
Quat(const Vector3 &v0, const Vector3 &v1) // shortest arc
{
diff --git a/core/math/vector3.h b/core/math/vector3.h
index 3bbfd7627c..433adf09ee 100644
--- a/core/math/vector3.h
+++ b/core/math/vector3.h
@@ -91,6 +91,7 @@ struct Vector3 {
/* Static Methods between 2 vector3s */
_FORCE_INLINE_ Vector3 linear_interpolate(const Vector3 &p_b, real_t p_t) const;
+ _FORCE_INLINE_ Vector3 slerp(const Vector3 &p_b, real_t p_t) const;
Vector3 cubic_interpolate(const Vector3 &p_b, const Vector3 &p_pre_a, const Vector3 &p_post_b, real_t p_t) const;
Vector3 cubic_interpolaten(const Vector3 &p_b, const Vector3 &p_pre_a, const Vector3 &p_post_b, real_t p_t) const;
@@ -218,6 +219,15 @@ Vector3 Vector3::linear_interpolate(const Vector3 &p_b, real_t p_t) const {
z + (p_t * (p_b.z - z)));
}
+Vector3 Vector3::slerp(const Vector3 &p_b, real_t p_t) const {
+#ifdef MATH_CHECKS
+ ERR_FAIL_COND_V(is_normalized() == false, Vector3());
+#endif
+
+ real_t theta = angle_to(p_b);
+ return rotated(cross(p_b), theta * p_t);
+}
+
real_t Vector3::distance_to(const Vector3 &p_b) const {
return (p_b - *this).length();
diff --git a/core/pool_allocator.cpp b/core/pool_allocator.cpp
index 017586b92a..8952314212 100644
--- a/core/pool_allocator.cpp
+++ b/core/pool_allocator.cpp
@@ -359,7 +359,7 @@ Error PoolAllocator::resize(ID p_mem, int p_new_size) {
//p_new_size = align(p_new_size)
int _free = free_mem; // - static_area_size;
- if ((_free + aligned(e->len)) - alloc_size < 0) {
+ if (uint32_t(_free + aligned(e->len)) < alloc_size) {
mt_unlock();
ERR_FAIL_V(ERR_OUT_OF_MEMORY);
};
diff --git a/core/variant_call.cpp b/core/variant_call.cpp
index bd1cde5a82..4e883d496f 100644
--- a/core/variant_call.cpp
+++ b/core/variant_call.cpp
@@ -340,6 +340,7 @@ struct _VariantCall {
VCALL_LOCALMEM1R(Vector2, angle_to);
VCALL_LOCALMEM1R(Vector2, angle_to_point);
VCALL_LOCALMEM2R(Vector2, linear_interpolate);
+ VCALL_LOCALMEM2R(Vector2, slerp);
VCALL_LOCALMEM4R(Vector2, cubic_interpolate);
VCALL_LOCALMEM1R(Vector2, rotated);
VCALL_LOCALMEM0R(Vector2, tangent);
@@ -380,6 +381,7 @@ struct _VariantCall {
VCALL_LOCALMEM1R(Vector3, snapped);
VCALL_LOCALMEM2R(Vector3, rotated);
VCALL_LOCALMEM2R(Vector3, linear_interpolate);
+ VCALL_LOCALMEM2R(Vector3, slerp);
VCALL_LOCALMEM4R(Vector3, cubic_interpolate);
VCALL_LOCALMEM1R(Vector3, dot);
VCALL_LOCALMEM1R(Vector3, cross);
@@ -439,6 +441,9 @@ struct _VariantCall {
VCALL_LOCALMEM2R(Quat, slerp);
VCALL_LOCALMEM2R(Quat, slerpni);
VCALL_LOCALMEM4R(Quat, cubic_slerp);
+ VCALL_LOCALMEM0R(Quat, get_euler);
+ VCALL_LOCALMEM1(Quat, set_euler);
+ VCALL_LOCALMEM2(Quat, set_axis_angle);
VCALL_LOCALMEM0R(Color, to_rgba32);
VCALL_LOCALMEM0R(Color, to_argb32);
@@ -876,6 +881,11 @@ struct _VariantCall {
r_ret = Quat(((Vector3)(*p_args[0])), ((float)(*p_args[1])));
}
+ static void Quat_init3(Variant &r_ret, const Variant **p_args) {
+
+ r_ret = Quat(((Vector3)(*p_args[0])));
+ }
+
static void Color_init1(Variant &r_ret, const Variant **p_args) {
r_ret = Color(*p_args[0], *p_args[1], *p_args[2], *p_args[3]);
@@ -1150,7 +1160,7 @@ Variant Variant::construct(const Variant::Type p_type, const Variant **p_args, i
case RECT2: return (Rect2(*p_args[0]));
case VECTOR3: return (Vector3(*p_args[0]));
case PLANE: return (Plane(*p_args[0]));
- case QUAT: return (Quat(*p_args[0]));
+ case QUAT: return (p_args[0]->operator Quat());
case AABB:
return (::AABB(*p_args[0])); // 10
case BASIS: return (Basis(p_args[0]->operator Basis()));
@@ -1518,6 +1528,7 @@ void register_variant_methods() {
ADDFUNC1R(VECTOR2, REAL, Vector2, angle_to, VECTOR2, "to", varray());
ADDFUNC1R(VECTOR2, REAL, Vector2, angle_to_point, VECTOR2, "to", varray());
ADDFUNC2R(VECTOR2, VECTOR2, Vector2, linear_interpolate, VECTOR2, "b", REAL, "t", varray());
+ ADDFUNC2R(VECTOR2, VECTOR2, Vector2, slerp, VECTOR2, "b", REAL, "t", varray());
ADDFUNC4R(VECTOR2, VECTOR2, Vector2, cubic_interpolate, VECTOR2, "b", VECTOR2, "pre_a", VECTOR2, "post_b", REAL, "t", varray());
ADDFUNC1R(VECTOR2, VECTOR2, Vector2, rotated, REAL, "phi", varray());
ADDFUNC0R(VECTOR2, VECTOR2, Vector2, tangent, varray());
@@ -1557,6 +1568,7 @@ void register_variant_methods() {
ADDFUNC1R(VECTOR3, VECTOR3, Vector3, snapped, VECTOR3, "by", varray());
ADDFUNC2R(VECTOR3, VECTOR3, Vector3, rotated, VECTOR3, "axis", REAL, "phi", varray());
ADDFUNC2R(VECTOR3, VECTOR3, Vector3, linear_interpolate, VECTOR3, "b", REAL, "t", varray());
+ ADDFUNC2R(VECTOR3, VECTOR3, Vector3, slerp, VECTOR3, "b", REAL, "t", varray());
ADDFUNC4R(VECTOR3, VECTOR3, Vector3, cubic_interpolate, VECTOR3, "b", VECTOR3, "pre_a", VECTOR3, "post_b", REAL, "t", varray());
ADDFUNC1R(VECTOR3, REAL, Vector3, dot, VECTOR3, "b", varray());
ADDFUNC1R(VECTOR3, VECTOR3, Vector3, cross, VECTOR3, "b", varray());
@@ -1594,6 +1606,9 @@ void register_variant_methods() {
ADDFUNC2R(QUAT, QUAT, Quat, slerp, QUAT, "b", REAL, "t", varray());
ADDFUNC2R(QUAT, QUAT, Quat, slerpni, QUAT, "b", REAL, "t", varray());
ADDFUNC4R(QUAT, QUAT, Quat, cubic_slerp, QUAT, "b", QUAT, "pre_a", QUAT, "post_b", REAL, "t", varray());
+ ADDFUNC0R(QUAT, VECTOR3, Quat, get_euler, varray());
+ ADDFUNC1(QUAT, NIL, Quat, set_euler, VECTOR3, "euler", varray());
+ ADDFUNC2(QUAT, NIL, Quat, set_axis_angle, VECTOR3, "axis", REAL, "angle", varray());
ADDFUNC0R(COLOR, INT, Color, to_rgba32, varray());
ADDFUNC0R(COLOR, INT, Color, to_argb32, varray());
@@ -1816,6 +1831,7 @@ void register_variant_methods() {
_VariantCall::add_constructor(_VariantCall::Quat_init1, Variant::QUAT, "x", Variant::REAL, "y", Variant::REAL, "z", Variant::REAL, "w", Variant::REAL);
_VariantCall::add_constructor(_VariantCall::Quat_init2, Variant::QUAT, "axis", Variant::VECTOR3, "angle", Variant::REAL);
+ _VariantCall::add_constructor(_VariantCall::Quat_init3, Variant::QUAT, "euler", Variant::VECTOR3);
_VariantCall::add_constructor(_VariantCall::Color_init1, Variant::COLOR, "r", Variant::REAL, "g", Variant::REAL, "b", Variant::REAL, "a", Variant::REAL);
_VariantCall::add_constructor(_VariantCall::Color_init2, Variant::COLOR, "r", Variant::REAL, "g", Variant::REAL, "b", Variant::REAL);
diff --git a/doc/classes/Basis.xml b/doc/classes/Basis.xml
index 554ed99964..b9dc763820 100644
--- a/doc/classes/Basis.xml
+++ b/doc/classes/Basis.xml
@@ -4,11 +4,12 @@
3x3 matrix datatype.
</brief_description>
<description>
- 3x3 matrix used for 3D rotation and scale. Contains 3 vector fields x,y and z as its columns, which can be interpreted as the local basis vectors of a transformation. Can also be accessed as array of 3D vectors. These vectors are orthogonal to each other, but are not necessarily normalized. Almost always used as orthogonal basis for a [Transform].
+ 3x3 matrix used for 3D rotation and scale. Contains 3 vector fields x,y and z as its columns, which can be interpreted as the local basis vectors of a transformation. Can also be accessed as array of 3D vectors. These vectors are orthogonal to each other, but are not necessarily normalized (due to scaling). Almost always used as orthogonal basis for a [Transform].
For such use, it is composed of a scaling and a rotation matrix, in that order (M = R.S).
</description>
<tutorials>
http://docs.godotengine.org/en/latest/tutorials/3d/using_transforms.html
+ http://docs.godotengine.org/en/latest/tutorials/math/rotations.html
</tutorials>
<demos>
</demos>
@@ -105,7 +106,7 @@
<argument index="1" name="phi" type="float">
</argument>
<description>
- Introduce an additional rotation around the given axis by phi (radians). Only relevant when the matrix is being used as a part of [Transform]. The axis must be a normalized vector.
+ Introduce an additional rotation around the given axis by phi (radians). The axis must be a normalized vector.
</description>
</method>
<method name="scaled">
@@ -114,7 +115,18 @@
<argument index="0" name="scale" type="Vector3">
</argument>
<description>
- Introduce an additional scaling specified by the given 3D scaling factor. Only relevant when the matrix is being used as a part of [Transform].
+ Introduce an additional scaling specified by the given 3D scaling factor.
+ </description>
+ </method>
+ <method name="slerp">
+ <return type="Basis">
+ </return>
+ <argument index="0" name="b" type="Basis">
+ </argument>
+ <argument index="1" name="t" type="float">
+ </argument>
+ <description>
+ Assuming that the matrix is a proper rotation matrix, slerp performs a spherical-linear interpolation with another rotation matrix.
</description>
</method>
<method name="tdotx">
diff --git a/doc/classes/EditorInterface.xml b/doc/classes/EditorInterface.xml
index 006a7ca690..19bd7e6d52 100644
--- a/doc/classes/EditorInterface.xml
+++ b/doc/classes/EditorInterface.xml
@@ -4,7 +4,7 @@
Editor interface and main components.
</brief_description>
<description>
- Editor interface. Allows saving and (re-)loading scenes, rendering mesh previews, inspecting and editing resources and objects and provides access to [EditorSettings], [EditorFileSystem], [EditorResourcePreview]\ er, [ScriptEditor], the editor viewport, as well as information about scenes. Also see [EditorPlugin] and [EditorScript].
+ Editor interface. Allows saving and (re-)loading scenes, rendering mesh previews, inspecting and editing resources and objects and provides access to [EditorSettings], [EditorFileSystem], [EditorResourcePreview], [ScriptEditor], the editor viewport, as well as information about scenes. Also see [EditorPlugin] and [EditorScript].
</description>
<tutorials>
</tutorials>
diff --git a/doc/classes/EditorSettings.xml b/doc/classes/EditorSettings.xml
index cdd9560eda..5325a67de3 100644
--- a/doc/classes/EditorSettings.xml
+++ b/doc/classes/EditorSettings.xml
@@ -55,7 +55,19 @@
Get the list of favorite directories for this project.
</description>
</method>
- <method name="get_project_settings_dir" qualifiers="const">
+ <method name="get_project_metadata" qualifiers="const">
+ <return type="Variant">
+ </return>
+ <argument index="0" name="section" type="String">
+ </argument>
+ <argument index="1" name="key" type="String">
+ </argument>
+ <argument index="2" name="default" type="Variant" default="null">
+ </argument>
+ <description>
+ </description>
+ </method>
+ <method name="get_project_settings_dir">
<return type="String">
</return>
<description>
@@ -131,6 +143,16 @@
<description>
</description>
</method>
+ <method name="set_project_metadata">
+ <argument index="0" name="section" type="String">
+ </argument>
+ <argument index="1" name="key" type="String">
+ </argument>
+ <argument index="2" name="data" type="Variant">
+ </argument>
+ <description>
+ </description>
+ </method>
<method name="set_recent_dirs">
<return type="void">
</return>
diff --git a/doc/classes/Image.xml b/doc/classes/Image.xml
index ca2d519e8a..760b0c6bdc 100644
--- a/doc/classes/Image.xml
+++ b/doc/classes/Image.xml
@@ -338,6 +338,7 @@
<argument index="0" name="buffer" type="PoolByteArray">
</argument>
<description>
+ Loads an image from the binary contents of a JPEG file.
</description>
</method>
<method name="load_png_from_buffer">
@@ -346,6 +347,7 @@
<argument index="0" name="buffer" type="PoolByteArray">
</argument>
<description>
+ Loads an image from the binary contents of a PNG file.
</description>
</method>
<method name="lock">
diff --git a/doc/classes/ImageTexture.xml b/doc/classes/ImageTexture.xml
index 9a5937299c..0bff3317db 100644
--- a/doc/classes/ImageTexture.xml
+++ b/doc/classes/ImageTexture.xml
@@ -47,12 +47,12 @@
</description>
</method>
<method name="load">
- <return type="void">
+ <return type="int" enum="Error">
</return>
<argument index="0" name="path" type="String">
</argument>
<description>
- Load an [code]ImageTexture[/code].
+ Load an [code]ImageTexture[/code] from a file path.
</description>
</method>
<method name="set_data">
diff --git a/doc/classes/Quat.xml b/doc/classes/Quat.xml
index 33f2b9758b..4121790881 100644
--- a/doc/classes/Quat.xml
+++ b/doc/classes/Quat.xml
@@ -4,13 +4,14 @@
Quaternion.
</brief_description>
<description>
- A 4-dimensional vector representing a rotation.
- The vector represents a 4 dimensional complex number where multiplication of the basis elements is not commutative (multiplying i with j gives a different result than multiplying j with i).
- Multiplying quaternions reproduces rotation sequences. However quaternions need to be often renormalized, or else they suffer from precision issues.
- It can be used to perform SLERP (spherical-linear interpolation) between two rotations.
+ A unit quaternion used for representing 3D rotations.
+ It is similar to [Basis], which implements matrix representation of rotations, and can be parametrized using both an axis-angle pair or Euler angles. But due to its compactness and the way it is stored in memory, certain operations (obtaining axis-angle and performing SLERP, in particular) are more efficient and robust against floating point errors.
+
+ Quaternions need to be (re)normalized.
</description>
<tutorials>
http://docs.godotengine.org/en/latest/tutorials/3d/using_transforms.html#interpolating-with-quaternions
+ http://docs.godotengine.org/en/latest/tutorials/math/rotations.html
</tutorials>
<demos>
</demos>
@@ -44,6 +45,15 @@
<method name="Quat">
<return type="Quat">
</return>
+ <argument index="0" name="euler" type="Vector3">
+ </argument>
+ <description>
+ Returns a quaternion that will perform a rotation specified by Euler angles (in the YXZ convention: first Z, then X, and Y last), given in the vector format as (X-angle, Y-angle, Z-angle).
+ </description>
+ </method>
+ <method name="Quat">
+ <return type="Quat">
+ </return>
<argument index="0" name="from" type="Basis">
</argument>
<description>
@@ -74,6 +84,13 @@
Returns the dot product of two quaternions.
</description>
</method>
+ <method name="get_euler">
+ <return type="Vector3">
+ </return>
+ <description>
+ Return Euler angles (in the YXZ convention: first Z, then X, and Y last) corresponding to the rotation represented by the unit quaternion. Returned vector contains the rotation angles in the format (X-angle, Y-angle, Z-angle).
+ </description>
+ </method>
<method name="inverse">
<return type="Quat">
</return>
@@ -109,6 +126,22 @@
Returns a copy of the quaternion, normalized to unit length.
</description>
</method>
+ <method name="set_axis_angle">
+ <argument index="0" name="axis" type="Vector3">
+ </argument>
+ <argument index="1" name="phi" type="float">
+ </argument>
+ <description>
+ Set the quaternion to a rotation which rotates around axis by the specified angle, in radians. The axis must be a normalized vector.
+ </description>
+ </method>
+ <method name="set_euler">
+ <argument index="0" name="euler" type="Vector3">
+ </argument>
+ <description>
+ Set the quaternion to a rotation specified by Euler angles (in the YXZ convention: first Z, then X, and Y last), given in the vector format as (X-angle, Y-angle, Z-angle).
+ </description>
+ </method>
<method name="slerp">
<return type="Quat">
</return>
diff --git a/doc/classes/Vector2.xml b/doc/classes/Vector2.xml
index ec92dcf900..e3fbaff62d 100644
--- a/doc/classes/Vector2.xml
+++ b/doc/classes/Vector2.xml
@@ -208,6 +208,18 @@
<description>
</description>
</method>
+ <method name="slerp">
+ <return type="Vector2">
+ </return>
+ <argument index="0" name="b" type="Vector2">
+ </argument>
+ <argument index="1" name="t" type="float">
+ </argument>
+ <description>
+ Returns the result of SLERP between this vector and "b", by amount "t". "t" should be a float of 0.0-1.0, a percentage of how far along the interpolation is.
+ Both vectors need to be normalized.
+ </description>
+ </method>
<method name="slide">
<return type="Vector2">
</return>
diff --git a/doc/classes/Vector3.xml b/doc/classes/Vector3.xml
index a5fc62b6f0..84c16c9fc5 100644
--- a/doc/classes/Vector3.xml
+++ b/doc/classes/Vector3.xml
@@ -210,6 +210,18 @@
<description>
</description>
</method>
+ <method name="slerp">
+ <return type="Vector3">
+ </return>
+ <argument index="0" name="b" type="Vector3">
+ </argument>
+ <argument index="1" name="t" type="float">
+ </argument>
+ <description>
+ Returns the result of SLERP between this vector and "b", by amount "t". "t" should be a float of 0.0-1.0, a percentage of how far along the interpolation is.
+ Both vectors need to be normalized.
+ </description>
+ </method>
<method name="slide">
<return type="Vector3">
</return>
diff --git a/editor/editor_inspector.cpp b/editor/editor_inspector.cpp
index 6129387a5c..38e74c126e 100644
--- a/editor/editor_inspector.cpp
+++ b/editor/editor_inspector.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* editor_inspector.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "editor_inspector.h"
#include "array_property_edit.h"
#include "dictionary_property_edit.h"
diff --git a/editor/editor_inspector.h b/editor/editor_inspector.h
index 64be316b0e..410c76dbb6 100644
--- a/editor/editor_inspector.h
+++ b/editor/editor_inspector.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* editor_inspector.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef EDITOR_INSPECTOR_H
#define EDITOR_INSPECTOR_H
diff --git a/editor/editor_properties.cpp b/editor/editor_properties.cpp
index fb2570d472..f3397d7980 100644
--- a/editor/editor_properties.cpp
+++ b/editor/editor_properties.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* editor_properties.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "editor_properties.h"
#include "editor/editor_resource_preview.h"
#include "editor_node.h"
diff --git a/editor/editor_properties.h b/editor/editor_properties.h
index 4fcef5fdae..cf0c735b37 100644
--- a/editor/editor_properties.h
+++ b/editor/editor_properties.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* editor_properties.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef EDITOR_PROPERTIES_H
#define EDITOR_PROPERTIES_H
diff --git a/editor/editor_settings.cpp b/editor/editor_settings.cpp
index fe0392334c..a47605be15 100644
--- a/editor/editor_settings.cpp
+++ b/editor/editor_settings.cpp
@@ -1151,7 +1151,7 @@ void EditorSettings::set_project_metadata(const String &p_section, const String
cf->save(path);
}
-Variant EditorSettings::get_project_metadata(const String &p_section, const String &p_key, Variant p_default) {
+Variant EditorSettings::get_project_metadata(const String &p_section, const String &p_key, Variant p_default) const {
Ref<ConfigFile> cf = memnew(ConfigFile);
String path = get_project_settings_dir().plus_file("project_metadata.cfg");
Error err = cf->load(path);
@@ -1493,6 +1493,9 @@ void EditorSettings::_bind_methods() {
ClassDB::bind_method(D_METHOD("get_settings_dir"), &EditorSettings::get_settings_dir);
ClassDB::bind_method(D_METHOD("get_project_settings_dir"), &EditorSettings::get_project_settings_dir);
+ ClassDB::bind_method(D_METHOD("set_project_metadata", "section", "key", "data"), &EditorSettings::set_project_metadata);
+ ClassDB::bind_method(D_METHOD("get_project_metadata", "section", "key", "default"), &EditorSettings::get_project_metadata, DEFVAL(Variant()));
+
ClassDB::bind_method(D_METHOD("set_favorite_dirs", "dirs"), &EditorSettings::set_favorite_dirs);
ClassDB::bind_method(D_METHOD("get_favorite_dirs"), &EditorSettings::get_favorite_dirs);
ClassDB::bind_method(D_METHOD("set_recent_dirs", "dirs"), &EditorSettings::set_recent_dirs);
diff --git a/editor/editor_settings.h b/editor/editor_settings.h
index e196ca506e..b48aac89c7 100644
--- a/editor/editor_settings.h
+++ b/editor/editor_settings.h
@@ -167,7 +167,7 @@ public:
String get_cache_dir() const;
void set_project_metadata(const String &p_section, const String &p_key, Variant p_data);
- Variant get_project_metadata(const String &p_section, const String &p_key, Variant p_default);
+ Variant get_project_metadata(const String &p_section, const String &p_key, Variant p_default) const;
void set_favorite_dirs(const Vector<String> &p_favorites_dirs);
Vector<String> get_favorite_dirs() const;
diff --git a/editor/editor_spin_slider.cpp b/editor/editor_spin_slider.cpp
index 9764364273..087dcd649f 100644
--- a/editor/editor_spin_slider.cpp
+++ b/editor/editor_spin_slider.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* editor_spin_slider.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "editor_spin_slider.h"
#include "editor_scale.h"
#include "os/input.h"
diff --git a/editor/editor_spin_slider.h b/editor/editor_spin_slider.h
index ac8d9e15d6..4956990dc2 100644
--- a/editor/editor_spin_slider.h
+++ b/editor/editor_spin_slider.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* editor_spin_slider.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef EDITOR_SPIN_SLIDER_H
#define EDITOR_SPIN_SLIDER_H
diff --git a/editor/import/resource_importer_bitmask.cpp b/editor/import/resource_importer_bitmask.cpp
index a5d3959952..7b330936f6 100644
--- a/editor/import/resource_importer_bitmask.cpp
+++ b/editor/import/resource_importer_bitmask.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* resource_importer_bitmask.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "resource_importer_bitmask.h"
#include "core/image.h"
#include "editor/editor_file_system.h"
diff --git a/editor/import/resource_importer_bitmask.h b/editor/import/resource_importer_bitmask.h
index 8a3cafa7ce..f3537df819 100644
--- a/editor/import/resource_importer_bitmask.h
+++ b/editor/import/resource_importer_bitmask.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* resource_importer_bitmask.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef RESOURCE_IMPORTER_BITMASK_H
#define RESOURCE_IMPORTER_BITMASK_H
diff --git a/editor/plugins/canvas_item_editor_plugin.cpp b/editor/plugins/canvas_item_editor_plugin.cpp
index f27796db5e..488f687515 100644
--- a/editor/plugins/canvas_item_editor_plugin.cpp
+++ b/editor/plugins/canvas_item_editor_plugin.cpp
@@ -539,6 +539,88 @@ void CanvasItemEditor::_get_canvas_items_at_pos(const Point2 &p_pos, Vector<_Sel
}
}
+void CanvasItemEditor::_get_bones_at_pos(const Point2 &p_pos, Vector<_SelectResult> &r_items) {
+ Point2 screen_pos = transform.xform(p_pos);
+
+ for (Map<BoneKey, BoneList>::Element *E = bone_list.front(); E; E = E->next()) {
+ Node2D *from_node = Object::cast_to<Node2D>(ObjectDB::get_instance(E->key().from));
+ Node2D *to_node = Object::cast_to<Node2D>(ObjectDB::get_instance(E->key().to));
+
+ Vector<Vector2> bone_shape;
+ if (!_get_bone_shape(&bone_shape, NULL, E))
+ continue;
+
+ // Check if the point is inside the Polygon2D
+ if (Geometry::is_point_in_polygon(screen_pos, bone_shape)) {
+ // Check if the item is already in the list
+ bool duplicate = false;
+ for (int i = 0; i < r_items.size(); i++) {
+ if (r_items[i].item == from_node) {
+ duplicate = true;
+ break;
+ }
+ }
+ if (duplicate)
+ continue;
+
+ // Else, add it
+ _SelectResult res;
+ res.item = from_node;
+ res.z_index = from_node ? from_node->get_z_index() : 0;
+ res.has_z = from_node;
+ r_items.push_back(res);
+ }
+ }
+}
+
+bool CanvasItemEditor::_get_bone_shape(Vector<Vector2> *shape, Vector<Vector2> *outline_shape, Map<BoneKey, BoneList>::Element *bone) {
+ int bone_width = EditorSettings::get_singleton()->get("editors/2d/bone_width");
+ int bone_outline_width = EditorSettings::get_singleton()->get("editors/2d/bone_outline_size");
+
+ Node2D *from_node = Object::cast_to<Node2D>(ObjectDB::get_instance(bone->key().from));
+ Node2D *to_node = Object::cast_to<Node2D>(ObjectDB::get_instance(bone->key().to));
+
+ if (!from_node->is_inside_tree())
+ return false; //may have been removed
+ if (!from_node)
+ return false;
+
+ if (!to_node && bone->get().length == 0)
+ return false;
+
+ Vector2 from = transform.xform(from_node->get_global_position());
+ Vector2 to;
+
+ if (to_node)
+ to = transform.xform(to_node->get_global_position());
+ else
+ to = transform.xform(from_node->get_global_transform().xform(Vector2(bone->get().length, 0)));
+
+ Vector2 rel = to - from;
+ Vector2 relt = rel.tangent().normalized() * bone_width;
+ Vector2 reln = rel.normalized();
+ Vector2 reltn = relt.normalized();
+
+ if (shape) {
+ shape->clear();
+ shape->push_back(from);
+ shape->push_back(from + rel * 0.2 + relt);
+ shape->push_back(to);
+ shape->push_back(from + rel * 0.2 - relt);
+ }
+
+ if (outline_shape) {
+ outline_shape->clear();
+ outline_shape->push_back(from + (-reln - reltn) * bone_outline_width);
+ outline_shape->push_back(from + (-reln + reltn) * bone_outline_width);
+ outline_shape->push_back(from + rel * 0.2 + relt + reltn * bone_outline_width);
+ outline_shape->push_back(to + (reln + reltn) * bone_outline_width);
+ outline_shape->push_back(to + (reln - reltn) * bone_outline_width);
+ outline_shape->push_back(from + rel * 0.2 - relt - reltn * bone_outline_width);
+ }
+ return true;
+}
+
void CanvasItemEditor::_find_canvas_items_in_rect(const Rect2 &p_rect, Node *p_node, List<CanvasItem *> *r_items, const Transform2D &p_parent_xform, const Transform2D &p_canvas_xform) {
if (!p_node)
return;
@@ -1796,8 +1878,13 @@ bool CanvasItemEditor::_gui_input_select(const Ref<InputEvent> &p_event) {
// Find the item to select
CanvasItem *canvas_item = NULL;
Vector<_SelectResult> selection;
+
+ // Retrieve the items
_get_canvas_items_at_pos(click, selection, editor_selection->get_selection().empty() ? 1 : 0);
+ // Retrieve the bones
+ _get_bones_at_pos(click, selection);
+
for (int i = 0; i < selection.size(); i++) {
if (editor_selection->is_selected(selection[i].item)) {
// Drag the node(s) if requested
@@ -2588,57 +2675,22 @@ void CanvasItemEditor::_draw_bones() {
RID ci = viewport->get_canvas_item();
if (skeleton_show_bones) {
- int bone_width = EditorSettings::get_singleton()->get("editors/2d/bone_width");
Color bone_color1 = EditorSettings::get_singleton()->get("editors/2d/bone_color1");
Color bone_color2 = EditorSettings::get_singleton()->get("editors/2d/bone_color2");
Color bone_ik_color = EditorSettings::get_singleton()->get("editors/2d/bone_ik_color");
Color bone_outline_color = EditorSettings::get_singleton()->get("editors/2d/bone_outline_color");
Color bone_selected_color = EditorSettings::get_singleton()->get("editors/2d/bone_selected_color");
- int bone_outline_size = EditorSettings::get_singleton()->get("editors/2d/bone_outline_size");
for (Map<BoneKey, BoneList>::Element *E = bone_list.front(); E; E = E->next()) {
- Node2D *from_node = Object::cast_to<Node2D>(ObjectDB::get_instance(E->key().from));
- Node2D *to_node = Object::cast_to<Node2D>(ObjectDB::get_instance(E->key().to));
-
- if (!from_node->is_inside_tree())
- continue; //may have been removed
- if (!from_node)
- continue;
-
- if (!to_node && E->get().length == 0)
- continue;
-
- Vector2 from = transform.xform(from_node->get_global_position());
- Vector2 to;
-
- if (to_node)
- to = transform.xform(to_node->get_global_position());
- else
- to = transform.xform(from_node->get_global_transform().xform(Vector2(E->get().length, 0)));
-
- Vector2 rel = to - from;
- Vector2 relt = rel.tangent().normalized() * bone_width;
- Vector2 reln = rel.normalized();
- Vector2 reltn = relt.normalized();
-
Vector<Vector2> bone_shape;
- bone_shape.push_back(from);
- bone_shape.push_back(from + rel * 0.2 + relt);
- bone_shape.push_back(to);
- bone_shape.push_back(from + rel * 0.2 - relt);
-
Vector<Vector2> bone_shape_outline;
- bone_shape_outline.push_back(from + (-reln - reltn) * bone_outline_size);
- bone_shape_outline.push_back(from + (-reln + reltn) * bone_outline_size);
- bone_shape_outline.push_back(from + rel * 0.2 + relt + reltn * bone_outline_size);
- bone_shape_outline.push_back(to + (reln + reltn) * bone_outline_size);
- bone_shape_outline.push_back(to + (reln - reltn) * bone_outline_size);
- bone_shape_outline.push_back(from + rel * 0.2 - relt - reltn * bone_outline_size);
+ if (!_get_bone_shape(&bone_shape, &bone_shape_outline, E))
+ continue;
+ Node2D *from_node = Object::cast_to<Node2D>(ObjectDB::get_instance(E->key().from));
Vector<Color> colors;
if (from_node->has_meta("_edit_ik_")) {
-
colors.push_back(bone_ik_color);
colors.push_back(bone_ik_color);
colors.push_back(bone_ik_color);
diff --git a/editor/plugins/canvas_item_editor_plugin.h b/editor/plugins/canvas_item_editor_plugin.h
index a1957b892e..4d2af11303 100644
--- a/editor/plugins/canvas_item_editor_plugin.h
+++ b/editor/plugins/canvas_item_editor_plugin.h
@@ -351,6 +351,7 @@ class CanvasItemEditor : public VBoxContainer {
void _find_canvas_items_at_pos(const Point2 &p_pos, Node *p_node, Vector<_SelectResult> &r_items, int p_limit = 0, const Transform2D &p_parent_xform = Transform2D(), const Transform2D &p_canvas_xform = Transform2D());
void _get_canvas_items_at_pos(const Point2 &p_pos, Vector<_SelectResult> &r_items, int p_limit = 0);
+ void _get_bones_at_pos(const Point2 &p_pos, Vector<_SelectResult> &r_items);
void _find_canvas_items_in_rect(const Rect2 &p_rect, Node *p_node, List<CanvasItem *> *r_items, const Transform2D &p_parent_xform = Transform2D(), const Transform2D &p_canvas_xform = Transform2D());
bool _select_click_on_item(CanvasItem *item, Point2 p_click_pos, bool p_append);
@@ -379,6 +380,7 @@ class CanvasItemEditor : public VBoxContainer {
UndoRedo *undo_redo;
bool _build_bones_list(Node *p_node);
+ bool _get_bone_shape(Vector<Vector2> *shape, Vector<Vector2> *outline_shape, Map<BoneKey, BoneList>::Element *bone);
List<CanvasItem *> _get_edited_canvas_items(bool retreive_locked = false, bool remove_canvas_item_if_parent_in_selection = true);
Rect2 _get_encompassing_rect_from_list(List<CanvasItem *> p_list);
diff --git a/editor/plugins/script_editor_plugin.cpp b/editor/plugins/script_editor_plugin.cpp
index ac250f085d..c0f91455fa 100644
--- a/editor/plugins/script_editor_plugin.cpp
+++ b/editor/plugins/script_editor_plugin.cpp
@@ -1549,9 +1549,10 @@ void ScriptEditor::_update_script_names() {
ScriptEditorBase *se = Object::cast_to<ScriptEditorBase>(tab_container->get_child(i));
if (se) {
- String name = se->get_name();
Ref<Texture> icon = se->get_icon();
String path = se->get_edited_script()->get_path();
+ bool built_in = !path.is_resource_file();
+ String name = built_in ? path.get_file() : se->get_name();
_ScriptEditorItemData sd;
sd.icon = icon;
diff --git a/editor/plugins/skeleton_2d_editor_plugin.cpp b/editor/plugins/skeleton_2d_editor_plugin.cpp
index e372f792d6..08bfebefbd 100644
--- a/editor/plugins/skeleton_2d_editor_plugin.cpp
+++ b/editor/plugins/skeleton_2d_editor_plugin.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* skeleton_2d_editor_plugin.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "skeleton_2d_editor_plugin.h"
#include "canvas_item_editor_plugin.h"
diff --git a/editor/plugins/skeleton_2d_editor_plugin.h b/editor/plugins/skeleton_2d_editor_plugin.h
index bbe2a3a6f2..26ab4328b0 100644
--- a/editor/plugins/skeleton_2d_editor_plugin.h
+++ b/editor/plugins/skeleton_2d_editor_plugin.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* skeleton_2d_editor_plugin.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef SKELETON_2D_EDITOR_PLUGIN_H
#define SKELETON_2D_EDITOR_PLUGIN_H
diff --git a/editor/plugins/sprite_editor_plugin.cpp b/editor/plugins/sprite_editor_plugin.cpp
index 49816fe2ae..66673cca00 100644
--- a/editor/plugins/sprite_editor_plugin.cpp
+++ b/editor/plugins/sprite_editor_plugin.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* sprite_editor_plugin.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "sprite_editor_plugin.h"
#include "canvas_item_editor_plugin.h"
diff --git a/editor/plugins/sprite_editor_plugin.h b/editor/plugins/sprite_editor_plugin.h
index 17aa3eb1f9..238227e4a0 100644
--- a/editor/plugins/sprite_editor_plugin.h
+++ b/editor/plugins/sprite_editor_plugin.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* sprite_editor_plugin.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef SPRITE_EDITOR_PLUGIN_H
#define SPRITE_EDITOR_PLUGIN_H
diff --git a/editor/scene_tree_dock.cpp b/editor/scene_tree_dock.cpp
index f5cee4cd65..08338b0b2d 100644
--- a/editor/scene_tree_dock.cpp
+++ b/editor/scene_tree_dock.cpp
@@ -75,6 +75,8 @@ void SceneTreeDock::_unhandled_key_input(Ref<InputEvent> p_event) {
if (ED_IS_SHORTCUT("scene_tree/batch_rename", p_event)) {
_tool_selected(TOOL_BATCH_RENAME);
+ } else if (ED_IS_SHORTCUT("scene_tree/rename", p_event)) {
+ _tool_selected(TOOL_RENAME);
} else if (ED_IS_SHORTCUT("scene_tree/add_child_node", p_event)) {
_tool_selected(TOOL_NEW);
} else if (ED_IS_SHORTCUT("scene_tree/instance_scene", p_event)) {
@@ -293,6 +295,13 @@ void SceneTreeDock::_tool_selected(int p_tool, bool p_confirm_override) {
rename_dialog->popup_centered();
}
} break;
+ case TOOL_RENAME: {
+ Tree *tree = scene_tree->get_scene_tree();
+ if (tree->is_anything_selected()) {
+ tree->grab_focus();
+ tree->edit_selected();
+ }
+ } break;
case TOOL_NEW: {
String preferred = "";
@@ -1870,7 +1879,6 @@ void SceneTreeDock::_tree_rmb(const Vector2 &p_menu_pos) {
menu->clear();
- menu->add_icon_shortcut(get_icon("Rename", "EditorIcons"), ED_GET_SHORTCUT("scene_tree/batch_rename"), TOOL_BATCH_RENAME);
if (selection.size() == 1) {
subresources.clear();
@@ -1886,6 +1894,9 @@ void SceneTreeDock::_tree_rmb(const Vector2 &p_menu_pos) {
menu->add_icon_shortcut(get_icon("ScriptCreate", "EditorIcons"), ED_GET_SHORTCUT("scene_tree/attach_script"), TOOL_ATTACH_SCRIPT);
menu->add_icon_shortcut(get_icon("ScriptRemove", "EditorIcons"), ED_GET_SHORTCUT("scene_tree/clear_script"), TOOL_CLEAR_SCRIPT);
menu->add_separator();
+ menu->add_icon_shortcut(get_icon("Rename", "EditorIcons"), ED_GET_SHORTCUT("scene_tree/rename"), TOOL_RENAME);
+ } else { // multi select
+ menu->add_icon_shortcut(get_icon("Rename", "EditorIcons"), ED_GET_SHORTCUT("scene_tree/batch_rename"), TOOL_BATCH_RENAME);
}
menu->add_icon_shortcut(get_icon("Reload", "EditorIcons"), ED_GET_SHORTCUT("scene_tree/change_node_type"), TOOL_REPLACE);
menu->add_separator();
@@ -2064,6 +2075,7 @@ SceneTreeDock::SceneTreeDock(EditorNode *p_editor, Node *p_scene_root, EditorSel
filter_hbc->add_constant_override("separate", 0);
ToolButton *tb;
+ ED_SHORTCUT("scene_tree/rename", TTR("Rename"));
ED_SHORTCUT("scene_tree/batch_rename", TTR("Batch Rename"), KEY_MASK_CMD | KEY_F2);
ED_SHORTCUT("scene_tree/add_child_node", TTR("Add Child Node"), KEY_MASK_CMD | KEY_A);
ED_SHORTCUT("scene_tree/instance_scene", TTR("Instance Child Scene"));
diff --git a/editor/scene_tree_dock.h b/editor/scene_tree_dock.h
index fd7d616a80..ed13e063bb 100644
--- a/editor/scene_tree_dock.h
+++ b/editor/scene_tree_dock.h
@@ -59,6 +59,7 @@ class SceneTreeDock : public VBoxContainer {
TOOL_NEW,
TOOL_INSTANCE,
+ TOOL_RENAME,
TOOL_BATCH_RENAME,
TOOL_REPLACE,
TOOL_ATTACH_SCRIPT,
diff --git a/main/timer_sync.cpp b/main/timer_sync.cpp
index c33cbafee8..9f4f6ed271 100644
--- a/main/timer_sync.cpp
+++ b/main/timer_sync.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* timer_sync.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "timer_sync.h"
void MainFrameTime::clamp_idle(float min_idle_step, float max_idle_step) {
diff --git a/main/timer_sync.h b/main/timer_sync.h
index 6ef4254270..75fbe6a9dc 100644
--- a/main/timer_sync.h
+++ b/main/timer_sync.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* timer_sync.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef TIMER_SYNC_H
#define TIMER_SYNC_H
diff --git a/modules/csg/csg.cpp b/modules/csg/csg.cpp
index c1fe11d6aa..4e6e701bfd 100644
--- a/modules/csg/csg.cpp
+++ b/modules/csg/csg.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* csg.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "csg.h"
#include "face3.h"
#include "geometry.h"
diff --git a/modules/csg/csg.h b/modules/csg/csg.h
index bb67e1fb36..53303a6533 100644
--- a/modules/csg/csg.h
+++ b/modules/csg/csg.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* csg.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef CSG_H
#define CSG_H
diff --git a/modules/csg/csg_gizmos.cpp b/modules/csg/csg_gizmos.cpp
index 06cbaab3b0..2150320c4a 100644
--- a/modules/csg/csg_gizmos.cpp
+++ b/modules/csg/csg_gizmos.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* csg_gizmos.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "csg_gizmos.h"
///////////
diff --git a/modules/csg/csg_gizmos.h b/modules/csg/csg_gizmos.h
index b5e394ecad..68e916823b 100644
--- a/modules/csg/csg_gizmos.h
+++ b/modules/csg/csg_gizmos.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* csg_gizmos.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef CSG_GIZMOS_H
#define CSG_GIZMOS_H
diff --git a/modules/csg/csg_shape.cpp b/modules/csg/csg_shape.cpp
index c223279f98..82db1871da 100644
--- a/modules/csg/csg_shape.cpp
+++ b/modules/csg/csg_shape.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* csg_shape.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "csg_shape.h"
#include "scene/3d/path.h"
diff --git a/modules/csg/csg_shape.h b/modules/csg/csg_shape.h
index c1d2cce606..cbb5c7e041 100644
--- a/modules/csg/csg_shape.h
+++ b/modules/csg/csg_shape.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* csg_shape.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef CSG_SHAPE_H
#define CSG_SHAPE_H
diff --git a/modules/mono/glue/builtin_types_glue.h b/modules/mono/glue/builtin_types_glue.h
index 460de84b65..ef9f152682 100644
--- a/modules/mono/glue/builtin_types_glue.h
+++ b/modules/mono/glue/builtin_types_glue.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* builtin_types_glue.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef BUILTIN_TYPES_GLUE_H
#define BUILTIN_TYPES_GLUE_H
diff --git a/modules/mono/glue/cs_files/GodotTaskScheduler.cs b/modules/mono/glue/cs_files/GodotTaskScheduler.cs
index 6bf25a89d2..3d23ec10f1 100644
--- a/modules/mono/glue/cs_files/GodotTaskScheduler.cs
+++ b/modules/mono/glue/cs_files/GodotTaskScheduler.cs
@@ -14,6 +14,7 @@ namespace Godot
public GodotTaskScheduler()
{
Context = new GodotSynchronizationContext();
+ SynchronizationContext.SetSynchronizationContext(Context);
}
protected sealed override void QueueTask(Task task)
@@ -57,7 +58,6 @@ namespace Godot
public void Activate()
{
- SynchronizationContext.SetSynchronizationContext(Context);
ExecuteQueuedTasks();
Context.ExecutePendingContinuations();
}
diff --git a/platform/javascript/SCsub b/platform/javascript/SCsub
index 5991075e29..98988d97fd 100644
--- a/platform/javascript/SCsub
+++ b/platform/javascript/SCsub
@@ -30,8 +30,8 @@ zip_files = env.InstallAs([
zip_dir.File('godot.wasm'),
zip_dir.File('godot.html')
], [
- js_wrapped,
- wasm,
- '#misc/dist/html/default.html'
+ js_wrapped,
+ wasm,
+ '#misc/dist/html/default.html'
])
env.Zip('#bin/godot', zip_files, ZIPROOT=zip_dir, ZIPSUFFIX='${PROGSUFFIX}${ZIPSUFFIX}', ZIPCOMSTR='Archving $SOURCES as $TARGET')
diff --git a/platform/javascript/detect.py b/platform/javascript/detect.py
index a48cb872ee..fc909f6619 100644
--- a/platform/javascript/detect.py
+++ b/platform/javascript/detect.py
@@ -1,5 +1,4 @@
import os
-import string
import sys
@@ -39,7 +38,7 @@ def configure(env):
## Build type
- if env['target'] == 'release' or env['target'] == 'profile':
+ if env['target'] != 'debug':
# Use -Os to prioritize optimizing for reduced file size. This is
# particularly valuable for the web platform because it directly
# decreases download time.
@@ -48,17 +47,11 @@ def configure(env):
# run-time performance.
env.Append(CCFLAGS=['-Os'])
env.Append(LINKFLAGS=['-Os'])
- if env['target'] == 'profile':
+ if env['target'] == 'release_debug':
+ env.Append(CPPDEFINES=['DEBUG_ENABLED'])
+ # Retain function names for backtraces at the cost of file size.
env.Append(LINKFLAGS=['--profiling-funcs'])
-
- elif env['target'] == 'release_debug':
- env.Append(CPPDEFINES=['DEBUG_ENABLED'])
- env.Append(CCFLAGS=['-O2'])
- env.Append(LINKFLAGS=['-O2'])
- # Retain function names for backtraces at the cost of file size.
- env.Append(LINKFLAGS=['--profiling-funcs'])
-
- elif env['target'] == 'debug':
+ else:
env.Append(CPPDEFINES=['DEBUG_ENABLED'])
env.Append(CCFLAGS=['-O1', '-g'])
env.Append(LINKFLAGS=['-O1', '-g'])
diff --git a/scene/2d/mesh_instance_2d.cpp b/scene/2d/mesh_instance_2d.cpp
index adbb227d0c..9f21fe1a1f 100644
--- a/scene/2d/mesh_instance_2d.cpp
+++ b/scene/2d/mesh_instance_2d.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* mesh_instance_2d.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "mesh_instance_2d.h"
void MeshInstance2D::_notification(int p_what) {
diff --git a/scene/2d/mesh_instance_2d.h b/scene/2d/mesh_instance_2d.h
index d1d1ade0ae..c9889c1c03 100644
--- a/scene/2d/mesh_instance_2d.h
+++ b/scene/2d/mesh_instance_2d.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* mesh_instance_2d.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef MESH_INSTANCE_2D_H
#define MESH_INSTANCE_2D_H
diff --git a/scene/2d/skeleton_2d.cpp b/scene/2d/skeleton_2d.cpp
index 2363c791fa..0595cc43b8 100644
--- a/scene/2d/skeleton_2d.cpp
+++ b/scene/2d/skeleton_2d.cpp
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* skeleton_2d.cpp */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#include "skeleton_2d.h"
void Bone2D::_notification(int p_what) {
diff --git a/scene/2d/skeleton_2d.h b/scene/2d/skeleton_2d.h
index cd270dac85..b86cf3be81 100644
--- a/scene/2d/skeleton_2d.h
+++ b/scene/2d/skeleton_2d.h
@@ -1,3 +1,33 @@
+/*************************************************************************/
+/* skeleton_2d.h */
+/*************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/*************************************************************************/
+/* Copyright (c) 2007-2018 Juan Linietsky, Ariel Manzur. */
+/* Copyright (c) 2014-2018 Godot Engine contributors (cf. AUTHORS.md) */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/*************************************************************************/
+
#ifndef SKELETON_2D_H
#define SKELETON_2D_H
diff --git a/scene/2d/tile_map.cpp b/scene/2d/tile_map.cpp
index 60766862cc..5294189c65 100644
--- a/scene/2d/tile_map.cpp
+++ b/scene/2d/tile_map.cpp
@@ -61,6 +61,7 @@ void TileMap::_notification(int p_what) {
}
pending_update = true;
+ _recreate_quadrants();
_update_dirty_quadrants();
RID space = get_world_2d()->get_space();
_update_quadrant_transform();
@@ -716,7 +717,7 @@ void TileMap::_make_quadrant_dirty(Map<PosKey, Quadrant>::Element *Q) {
pending_update = true;
if (!is_inside_tree())
return;
- call_deferred("_update_dirty_quadrants");
+ _update_dirty_quadrants();
}
void TileMap::set_cellv(const Vector2 &p_pos, int p_tile, bool p_flip_x, bool p_flip_y, bool p_transpose) {
diff --git a/scene/resources/texture.cpp b/scene/resources/texture.cpp
index c0f6756fd1..56a2e7afba 100644
--- a/scene/resources/texture.cpp
+++ b/scene/resources/texture.cpp
@@ -220,12 +220,15 @@ Image::Format ImageTexture::get_format() const {
return format;
}
-void ImageTexture::load(const String &p_path) {
+Error ImageTexture::load(const String &p_path) {
Ref<Image> img;
img.instance();
- img->load(p_path);
- create_from_image(img);
+ Error err = img->load(p_path);
+ if (err == OK) {
+ create_from_image(img);
+ }
+ return err;
}
void ImageTexture::set_data(const Ref<Image> &p_image) {
diff --git a/scene/resources/texture.h b/scene/resources/texture.h
index 93d7ec4ef9..d81fd3b19b 100644
--- a/scene/resources/texture.h
+++ b/scene/resources/texture.h
@@ -124,7 +124,7 @@ public:
void set_flags(uint32_t p_flags);
uint32_t get_flags() const;
Image::Format get_format() const;
- void load(const String &p_path);
+ Error load(const String &p_path);
void set_data(const Ref<Image> &p_image);
Ref<Image> get_data() const;
diff --git a/servers/physics/collision_solver_sat.cpp b/servers/physics/collision_solver_sat.cpp
index eefb0f0396..e587485fcb 100644
--- a/servers/physics/collision_solver_sat.cpp
+++ b/servers/physics/collision_solver_sat.cpp
@@ -341,26 +341,26 @@ public:
min_B -= (max_A - min_A) * 0.5;
max_B += (max_A - min_A) * 0.5;
- real_t dmin = min_B - (min_A + max_A) * 0.5;
- real_t dmax = max_B - (min_A + max_A) * 0.5;
+ min_B -= (min_A + max_A) * 0.5;
+ max_B -= (min_A + max_A) * 0.5;
- if (dmin > 0.0 || dmax < 0.0) {
+ if (min_B > 0.0 || max_B < 0.0) {
separator_axis = axis;
return false; // doesn't contain 0
}
//use the smallest depth
- dmin = Math::abs(dmin);
+ min_B = -min_B;
- if (dmax < dmin) {
- if (dmax < best_depth) {
- best_depth = dmax;
+ if (max_B < min_B) {
+ if (max_B < best_depth) {
+ best_depth = max_B;
best_axis = axis;
}
} else {
- if (dmin < best_depth) {
- best_depth = dmin;
+ if (min_B < best_depth) {
+ best_depth = min_B;
best_axis = -axis; // keep it as A axis
}
}
diff --git a/thirdparty/README.md b/thirdparty/README.md
index a0b152fda2..09b016ad4d 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -480,7 +480,7 @@ Files extracted from upstream source:
## zstd
- Upstream: https://github.com/facebook/zstd
-- Version: 1.3.3
+- Version: 1.3.4
- License: BSD-3-Clause
Files extracted from upstream source:
diff --git a/thirdparty/zstd/common/bitstream.h b/thirdparty/zstd/common/bitstream.h
index fcf3843079..f7f389fe0f 100644
--- a/thirdparty/zstd/common/bitstream.h
+++ b/thirdparty/zstd/common/bitstream.h
@@ -426,7 +426,7 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
* Refill `bitD` from buffer previously set in BIT_initDStream() .
* This function is safe, it guarantees it will not read beyond src buffer.
* @return : status of `BIT_DStream_t` internal register.
- * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
+ * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
{
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
diff --git a/thirdparty/zstd/common/compiler.h b/thirdparty/zstd/common/compiler.h
index 3a7553c380..e90a3bcde3 100644
--- a/thirdparty/zstd/common/compiler.h
+++ b/thirdparty/zstd/common/compiler.h
@@ -63,6 +63,31 @@
# endif
#endif
+/* target attribute */
+#ifndef __has_attribute
+ #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
+#endif
+#if defined(__GNUC__)
+# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
+#else
+# define TARGET_ATTRIBUTE(target)
+#endif
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+ #if (defined(__clang__) && __has_attribute(__target__)) \
+ || (defined(__GNUC__) \
+ && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \
+ && (defined(__x86_64__) || defined(_M_X86)) \
+ && !defined(__BMI2__)
+ # define DYNAMIC_BMI2 1
+ #else
+ # define DYNAMIC_BMI2 0
+ #endif
+#endif
+
/* prefetch */
#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
diff --git a/thirdparty/zstd/common/cpu.h b/thirdparty/zstd/common/cpu.h
new file mode 100644
index 0000000000..4eb48e39e1
--- /dev/null
+++ b/thirdparty/zstd/common/cpu.h
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2018-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMMON_CPU_H
+#define ZSTD_COMMON_CPU_H
+
+/**
+ * Implementation taken from folly/CpuId.h
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
+ */
+
+#include <string.h>
+
+#include "mem.h"
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+typedef struct {
+ U32 f1c;
+ U32 f1d;
+ U32 f7b;
+ U32 f7c;
+} ZSTD_cpuid_t;
+
+MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
+ U32 f1c = 0;
+ U32 f1d = 0;
+ U32 f7b = 0;
+ U32 f7c = 0;
+#ifdef _MSC_VER
+ int reg[4];
+ __cpuid((int*)reg, 0);
+ {
+ int const n = reg[0];
+ if (n >= 1) {
+ __cpuid((int*)reg, 1);
+ f1c = (U32)reg[2];
+ f1d = (U32)reg[3];
+ }
+ if (n >= 7) {
+ __cpuidex((int*)reg, 7, 0);
+ f7b = (U32)reg[1];
+ f7c = (U32)reg[2];
+ }
+ }
+#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
+ /* The following block like the normal cpuid branch below, but gcc
+ * reserves ebx for use of its pic register so we must specially
+ * handle the save and restore to avoid clobbering the register
+ */
+ U32 n;
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "popl %%ebx\n\t"
+ : "=a"(n)
+ : "a"(0)
+ : "ecx", "edx");
+ if (n >= 1) {
+ U32 f1a;
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "popl %%ebx\n\t"
+ : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+ : "a"(1)
+ :);
+ }
+ if (n >= 7) {
+ __asm__(
+ "pushl %%ebx\n\t"
+ "cpuid\n\t"
+ "movl %%ebx, %%eax\n\r"
+ "popl %%ebx"
+ : "=a"(f7b), "=c"(f7c)
+ : "a"(7), "c"(0)
+ : "edx");
+ }
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
+ U32 n;
+ __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+ if (n >= 1) {
+ U32 f1a;
+ __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
+ }
+ if (n >= 7) {
+ U32 f7a;
+ __asm__("cpuid"
+ : "=a"(f7a), "=b"(f7b), "=c"(f7c)
+ : "a"(7), "c"(0)
+ : "edx");
+ }
+#endif
+ {
+ ZSTD_cpuid_t cpuid;
+ cpuid.f1c = f1c;
+ cpuid.f1d = f1d;
+ cpuid.f7b = f7b;
+ cpuid.f7c = f7c;
+ return cpuid;
+ }
+}
+
+#define X(name, r, bit) \
+ MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \
+ return ((cpuid.r) & (1U << bit)) != 0; \
+ }
+
+/* cpuid(1): Processor Info and Feature Bits. */
+#define C(name, bit) X(name, f1c, bit)
+ C(sse3, 0)
+ C(pclmuldq, 1)
+ C(dtes64, 2)
+ C(monitor, 3)
+ C(dscpl, 4)
+ C(vmx, 5)
+ C(smx, 6)
+ C(eist, 7)
+ C(tm2, 8)
+ C(ssse3, 9)
+ C(cnxtid, 10)
+ C(fma, 12)
+ C(cx16, 13)
+ C(xtpr, 14)
+ C(pdcm, 15)
+ C(pcid, 17)
+ C(dca, 18)
+ C(sse41, 19)
+ C(sse42, 20)
+ C(x2apic, 21)
+ C(movbe, 22)
+ C(popcnt, 23)
+ C(tscdeadline, 24)
+ C(aes, 25)
+ C(xsave, 26)
+ C(osxsave, 27)
+ C(avx, 28)
+ C(f16c, 29)
+ C(rdrand, 30)
+#undef C
+#define D(name, bit) X(name, f1d, bit)
+ D(fpu, 0)
+ D(vme, 1)
+ D(de, 2)
+ D(pse, 3)
+ D(tsc, 4)
+ D(msr, 5)
+ D(pae, 6)
+ D(mce, 7)
+ D(cx8, 8)
+ D(apic, 9)
+ D(sep, 11)
+ D(mtrr, 12)
+ D(pge, 13)
+ D(mca, 14)
+ D(cmov, 15)
+ D(pat, 16)
+ D(pse36, 17)
+ D(psn, 18)
+ D(clfsh, 19)
+ D(ds, 21)
+ D(acpi, 22)
+ D(mmx, 23)
+ D(fxsr, 24)
+ D(sse, 25)
+ D(sse2, 26)
+ D(ss, 27)
+ D(htt, 28)
+ D(tm, 29)
+ D(pbe, 31)
+#undef D
+
+/* cpuid(7): Extended Features. */
+#define B(name, bit) X(name, f7b, bit)
+ B(bmi1, 3)
+ B(hle, 4)
+ B(avx2, 5)
+ B(smep, 7)
+ B(bmi2, 8)
+ B(erms, 9)
+ B(invpcid, 10)
+ B(rtm, 11)
+ B(mpx, 14)
+ B(avx512f, 16)
+ B(avx512dq, 17)
+ B(rdseed, 18)
+ B(adx, 19)
+ B(smap, 20)
+ B(avx512ifma, 21)
+ B(pcommit, 22)
+ B(clflushopt, 23)
+ B(clwb, 24)
+ B(avx512pf, 26)
+ B(avx512er, 27)
+ B(avx512cd, 28)
+ B(sha, 29)
+ B(avx512bw, 30)
+ B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c, bit)
+ C(prefetchwt1, 0)
+ C(avx512vbmi, 1)
+#undef C
+
+#undef X
+
+#endif /* ZSTD_COMMON_CPU_H */
diff --git a/thirdparty/zstd/common/error_private.c b/thirdparty/zstd/common/error_private.c
index 11f7cdab1c..d004ee636c 100644
--- a/thirdparty/zstd/common/error_private.c
+++ b/thirdparty/zstd/common/error_private.c
@@ -29,6 +29,7 @@ const char* ERR_getErrorString(ERR_enum code)
case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
case PREFIX(init_missing): return "Context should be init first";
case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+ case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
diff --git a/thirdparty/zstd/common/fse.h b/thirdparty/zstd/common/fse.h
index afd7801963..6a1d272be5 100644
--- a/thirdparty/zstd/common/fse.h
+++ b/thirdparty/zstd/common/fse.h
@@ -345,7 +345,7 @@ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* s
*/
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
-/*! FSE_count_simple
+/*! FSE_count_simple() :
* Same as FSE_countFast(), but does not use any additional memory (not even on stack).
* This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
*/
diff --git a/thirdparty/zstd/common/fse_decompress.c b/thirdparty/zstd/common/fse_decompress.c
index 8e3f0035f6..4c66c3b774 100644
--- a/thirdparty/zstd/common/fse_decompress.c
+++ b/thirdparty/zstd/common/fse_decompress.c
@@ -139,8 +139,8 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned
{ U32 u;
for (u=0; u<tableSize; u++) {
FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
- U16 nextState = symbolNext[symbol]++;
- tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
+ U32 const nextState = symbolNext[symbol]++;
+ tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
} }
diff --git a/thirdparty/zstd/common/huf.h b/thirdparty/zstd/common/huf.h
index 522bf9b6c0..b4645b4e51 100644
--- a/thirdparty/zstd/common/huf.h
+++ b/thirdparty/zstd/common/huf.h
@@ -58,32 +58,32 @@ extern "C" {
#endif
-/* *** simple functions *** */
-/**
-HUF_compress() :
- Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
- 'dst' buffer must be already allocated.
- Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
- `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
- @return : size of compressed data (<= `dstCapacity`).
- Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
- if return == 1, srcData is a single repeated byte symbol (RLE compression).
- if HUF_isError(return), compression failed (more details using HUF_getErrorName())
-*/
+/* ========================== */
+/* *** simple functions *** */
+/* ========================== */
+
+/** HUF_compress() :
+ * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
+ * 'dst' buffer must be already allocated.
+ * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
+ * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
+ * @return : size of compressed data (<= `dstCapacity`).
+ * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+ * if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+ */
HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
const void* src, size_t srcSize);
-/**
-HUF_decompress() :
- Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
- into already allocated buffer 'dst', of minimum size 'dstSize'.
- `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
- Note : in contrast with FSE, HUF_decompress can regenerate
- RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
- because it knows size to regenerate.
- @return : size of regenerated data (== originalSize),
- or an error code, which can be tested using HUF_isError()
-*/
+/** HUF_decompress() :
+ * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+ * into already allocated buffer 'dst', of minimum size 'dstSize'.
+ * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
+ * Note : in contrast with FSE, HUF_decompress can regenerate
+ * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+ * because it knows size to regenerate (originalSize).
+ * @return : size of regenerated data (== originalSize),
+ * or an error code, which can be tested using HUF_isError()
+ */
HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize,
const void* cSrc, size_t cSrcSize);
@@ -100,39 +100,32 @@ HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error c
/* *** Advanced function *** */
/** HUF_compress2() :
- * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog`.
- * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
-HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+ * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
+ * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
+ * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog);
/** HUF_compress4X_wksp() :
* Same as HUF_compress2(), but uses externally allocated `workSpace`.
- * `workspace` must have minimum alignment of 4, and be at least as large as following macro */
+ * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
#define HUF_WORKSPACE_SIZE (6 << 10)
#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
-HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
-
-/**
- * The minimum workspace size for the `workSpace` used in
- * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
- *
- * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
- * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
- * Buffer overflow errors may potentially occur if code modifications result in
- * a required workspace size greater than that specified in the following
- * macro.
- */
-#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
-#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog,
+ void* workSpace, size_t wkspSize);
#endif /* HUF_H_298734234 */
/* ******************************************************************
* WARNING !!
* The following section contains advanced and experimental definitions
- * which shall never be used in the context of dll
+ * which shall never be used in the context of a dynamic library,
* because they are not guaranteed to remain stable in the future.
* Only consider them in association with static linking.
- *******************************************************************/
+ * *****************************************************************/
#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
#define HUF_H_HUF_STATIC_LINKING_ONLY
@@ -141,11 +134,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const
/* *** Constants *** */
-#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
-#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */
+#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */
#define HUF_SYMBOLVALUE_MAX 255
-#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
# error "HUF_TABLELOG_MAX is too large !"
#endif
@@ -192,24 +185,23 @@ size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
/* ****************************************
-* HUF detailed API
-******************************************/
-/*!
-HUF_compress() does the following:
-1. count symbol occurrence from source[] into table count[] using FSE_count()
-2. (optional) refine tableLog using HUF_optimalTableLog()
-3. build Huffman table from count using HUF_buildCTable()
-4. save Huffman table to memory buffer using HUF_writeCTable()
-5. encode the data stream using HUF_compress4X_usingCTable()
-
-The following API allows targeting specific sub-functions for advanced tasks.
-For example, it's possible to compress several blocks using the same 'CTable',
-or to save and regenerate 'CTable' using external methods.
-*/
-/* FSE_count() : find it within "fse.h" */
+ * HUF detailed API
+ * ****************************************/
+
+/*! HUF_compress() does the following:
+ * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
+ * 2. (optional) refine tableLog using HUF_optimalTableLog()
+ * 3. build Huffman table from count using HUF_buildCTable()
+ * 4. save Huffman table to memory buffer using HUF_writeCTable()
+ * 5. encode the data stream using HUF_compress4X_usingCTable()
+ *
+ * The following API allows targeting specific sub-functions for advanced tasks.
+ * For example, it's possible to compress several blocks using the same 'CTable',
+ * or to save and regenerate 'CTable' using external methods.
+ */
unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
-size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);
+size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
@@ -219,46 +211,65 @@ typedef enum {
HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */
} HUF_repeat;
/** HUF_compress4X_repeat() :
-* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-* If it uses hufTable it does not modify hufTable or repeat.
-* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
-* If preferRepeat then the old table will always be used if valid. */
-size_t HUF_compress4X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+ * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ * If it uses hufTable it does not modify hufTable or repeat.
+ * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ * If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog,
+ void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
*/
+#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
+#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
/*! HUF_readStats() :
- Read compact Huffman tree, saved by HUF_writeCTable().
- `huffWeight` is destination buffer.
- @return : size read from `src` , or an error Code .
- Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
-size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
- U32* nbSymbolsPtr, U32* tableLogPtr,
+ * Read compact Huffman tree, saved by HUF_writeCTable().
+ * `huffWeight` is destination buffer.
+ * @return : size read from `src` , or an error Code .
+ * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
+ U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
const void* src, size_t srcSize);
/** HUF_readCTable() :
-* Loading a CTable saved with HUF_writeCTable() */
+ * Loading a CTable saved with HUF_writeCTable() */
size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
/*
-HUF_decompress() does the following:
-1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
-2. build Huffman table from save, using HUF_readDTableXn()
-3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
-*/
+ * HUF_decompress() does the following:
+ * 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
+ * 2. build Huffman table from save, using HUF_readDTableX?()
+ * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
+ */
/** HUF_selectDecoder() :
-* Tells which decoder is likely to decode faster,
-* based on a set of pre-determined metrics.
-* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-* Assumption : 0 < cSrcSize < dstSize <= 128 KB */
+ * Tells which decoder is likely to decode faster,
+ * based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
+ * Assumption : 0 < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
+/**
+ * The minimum workspace size for the `workSpace` used in
+ * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
+ *
+ * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ * Buffer overflow errors may potentially occur if code modifications result in
+ * a required workspace size greater than that specified in the following
+ * macro.
+ */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
@@ -269,17 +280,23 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+/* ====================== */
/* single stream variants */
+/* ====================== */
size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
/** HUF_compress1X_repeat() :
-* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-* If it uses hufTable it does not modify hufTable or repeat.
-* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
-* If preferRepeat then the old table will always be used if valid. */
-size_t HUF_compress1X_repeat(void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize, HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+ * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ * If it uses hufTable it does not modify hufTable or repeat.
+ * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ * If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ unsigned maxSymbolValue, unsigned tableLog,
+ void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
@@ -295,6 +312,14 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cS
size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+/* BMI2 variants.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+
#endif /* HUF_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
diff --git a/thirdparty/zstd/common/pool.c b/thirdparty/zstd/common/pool.c
index 98b109e72a..773488b072 100644
--- a/thirdparty/zstd/common/pool.c
+++ b/thirdparty/zstd/common/pool.c
@@ -12,6 +12,7 @@
/* ====== Dependencies ======= */
#include <stddef.h> /* size_t */
#include "pool.h"
+#include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
/* ====== Compiler specifics ====== */
#if defined(_MSC_VER)
@@ -193,32 +194,54 @@ static int isQueueFull(POOL_ctx const* ctx) {
}
}
-void POOL_add(void* ctxVoid, POOL_function function, void *opaque) {
- POOL_ctx* const ctx = (POOL_ctx*)ctxVoid;
- if (!ctx) { return; }
+static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque)
+{
+ POOL_job const job = {function, opaque};
+ assert(ctx != NULL);
+ if (ctx->shutdown) return;
+
+ ctx->queueEmpty = 0;
+ ctx->queue[ctx->queueTail] = job;
+ ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
+ ZSTD_pthread_cond_signal(&ctx->queuePopCond);
+}
+
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque)
+{
+ assert(ctx != NULL);
ZSTD_pthread_mutex_lock(&ctx->queueMutex);
- { POOL_job const job = {function, opaque};
+ /* Wait until there is space in the queue for the new job */
+ while (isQueueFull(ctx) && (!ctx->shutdown)) {
+ ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
+ }
+ POOL_add_internal(ctx, function, opaque);
+ ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+}
- /* Wait until there is space in the queue for the new job */
- while (isQueueFull(ctx) && !ctx->shutdown) {
- ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex);
- }
- /* The queue is still going => there is space */
- if (!ctx->shutdown) {
- ctx->queueEmpty = 0;
- ctx->queue[ctx->queueTail] = job;
- ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize;
- }
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque)
+{
+ assert(ctx != NULL);
+ ZSTD_pthread_mutex_lock(&ctx->queueMutex);
+ if (isQueueFull(ctx)) {
+ ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
+ return 0;
}
+ POOL_add_internal(ctx, function, opaque);
ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
- ZSTD_pthread_cond_signal(&ctx->queuePopCond);
+ return 1;
}
+
#else /* ZSTD_MULTITHREAD not defined */
+
+/* ========================== */
/* No multi-threading support */
+/* ========================== */
-/* We don't need any data, but if it is empty malloc() might return NULL. */
+
+/* We don't need any data, but if it is empty, malloc() might return NULL. */
struct POOL_ctx_s {
int dummy;
};
@@ -240,9 +263,15 @@ void POOL_free(POOL_ctx* ctx) {
(void)ctx;
}
-void POOL_add(void* ctx, POOL_function function, void* opaque) {
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
+ (void)ctx;
+ function(opaque);
+}
+
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) {
(void)ctx;
function(opaque);
+ return 1;
}
size_t POOL_sizeof(POOL_ctx* ctx) {
diff --git a/thirdparty/zstd/common/pool.h b/thirdparty/zstd/common/pool.h
index 08c63715aa..a57e9b4fab 100644
--- a/thirdparty/zstd/common/pool.h
+++ b/thirdparty/zstd/common/pool.h
@@ -17,7 +17,8 @@ extern "C" {
#include <stddef.h> /* size_t */
-#include "zstd_internal.h" /* ZSTD_customMem */
+#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */
+#include "zstd.h"
typedef struct POOL_ctx_s POOL_ctx;
@@ -27,35 +28,43 @@ typedef struct POOL_ctx_s POOL_ctx;
* The maximum number of queued jobs before blocking is `queueSize`.
* @return : POOL_ctx pointer on success, else NULL.
*/
-POOL_ctx *POOL_create(size_t numThreads, size_t queueSize);
+POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
-POOL_ctx *POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
+POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
/*! POOL_free() :
Free a thread pool returned by POOL_create().
*/
-void POOL_free(POOL_ctx *ctx);
+void POOL_free(POOL_ctx* ctx);
/*! POOL_sizeof() :
return memory usage of pool returned by POOL_create().
*/
-size_t POOL_sizeof(POOL_ctx *ctx);
+size_t POOL_sizeof(POOL_ctx* ctx);
/*! POOL_function :
The function type that can be added to a thread pool.
*/
-typedef void (*POOL_function)(void *);
+typedef void (*POOL_function)(void*);
/*! POOL_add_function :
The function type for a generic thread pool add function.
*/
-typedef void (*POOL_add_function)(void *, POOL_function, void *);
+typedef void (*POOL_add_function)(void*, POOL_function, void*);
/*! POOL_add() :
- Add the job `function(opaque)` to the thread pool.
+ Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
Possibly blocks until there is room in the queue.
Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
*/
-void POOL_add(void *ctx, POOL_function function, void *opaque);
+void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
+
+
+/*! POOL_tryAdd() :
+ Add the job `function(opaque)` to the thread pool if a worker is available.
+ return immediately otherwise.
+ @return : 1 if successful, 0 if not.
+*/
+int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
#if defined (__cplusplus)
diff --git a/thirdparty/zstd/common/threading.h b/thirdparty/zstd/common/threading.h
index 197770db27..d806c89d01 100644
--- a/thirdparty/zstd/common/threading.h
+++ b/thirdparty/zstd/common/threading.h
@@ -45,15 +45,15 @@ extern "C" {
/* mutex */
#define ZSTD_pthread_mutex_t CRITICAL_SECTION
-#define ZSTD_pthread_mutex_init(a, b) (InitializeCriticalSection((a)), 0)
+#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0)
#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a))
#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a))
#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a))
/* condition variable */
#define ZSTD_pthread_cond_t CONDITION_VARIABLE
-#define ZSTD_pthread_cond_init(a, b) (InitializeConditionVariable((a)), 0)
-#define ZSTD_pthread_cond_destroy(a) /* No delete */
+#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0)
+#define ZSTD_pthread_cond_destroy(a) ((void)(a))
#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE)
#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a))
#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a))
@@ -100,17 +100,17 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr);
/* No multithreading support */
typedef int ZSTD_pthread_mutex_t;
-#define ZSTD_pthread_mutex_init(a, b) ((void)a, 0)
-#define ZSTD_pthread_mutex_destroy(a)
-#define ZSTD_pthread_mutex_lock(a)
-#define ZSTD_pthread_mutex_unlock(a)
+#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0)
+#define ZSTD_pthread_mutex_destroy(a) ((void)(a))
+#define ZSTD_pthread_mutex_lock(a) ((void)(a))
+#define ZSTD_pthread_mutex_unlock(a) ((void)(a))
typedef int ZSTD_pthread_cond_t;
-#define ZSTD_pthread_cond_init(a, b) ((void)a, 0)
-#define ZSTD_pthread_cond_destroy(a)
-#define ZSTD_pthread_cond_wait(a, b)
-#define ZSTD_pthread_cond_signal(a)
-#define ZSTD_pthread_cond_broadcast(a)
+#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0)
+#define ZSTD_pthread_cond_destroy(a) ((void)(a))
+#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b))
+#define ZSTD_pthread_cond_signal(a) ((void)(a))
+#define ZSTD_pthread_cond_broadcast(a) ((void)(a))
/* do not use ZSTD_pthread_t */
diff --git a/thirdparty/zstd/common/zstd_errors.h b/thirdparty/zstd/common/zstd_errors.h
index 4bcb7769fe..57533f2869 100644
--- a/thirdparty/zstd/common/zstd_errors.h
+++ b/thirdparty/zstd/common/zstd_errors.h
@@ -35,12 +35,20 @@ extern "C" {
# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
#endif
-/*-****************************************
- * error codes list
- * note : this API is still considered unstable
- * and shall not be used with a dynamic library.
- * only static linking is allowed
- ******************************************/
+/*-*********************************************
+ * Error codes list
+ *-*********************************************
+ * Error codes _values_ are pinned down since v1.3.1 only.
+ * Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ * Only values < 100 are considered stable.
+ *
+ * note 1 : this API shall be used with static linking only.
+ * dynamic linking is not yet officially supported.
+ * note 2 : Prefer relying on the enum than on its value whenever possible
+ * This is the only supported way to use the error list < v1.3.1
+ * note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
typedef enum {
ZSTD_error_no_error = 0,
ZSTD_error_GENERIC = 1,
@@ -61,9 +69,10 @@ typedef enum {
ZSTD_error_stage_wrong = 60,
ZSTD_error_init_missing = 62,
ZSTD_error_memory_allocation = 64,
+ ZSTD_error_workSpace_tooSmall= 66,
ZSTD_error_dstSize_tooSmall = 70,
ZSTD_error_srcSize_wrong = 72,
- /* following error codes are not stable and may be removed or changed in a future version */
+ /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
ZSTD_error_frameIndex_tooLarge = 100,
ZSTD_error_seekableIO = 102,
ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
diff --git a/thirdparty/zstd/common/zstd_internal.h b/thirdparty/zstd/common/zstd_internal.h
index 5d2900eb76..65c08a8257 100644
--- a/thirdparty/zstd/common/zstd_internal.h
+++ b/thirdparty/zstd/common/zstd_internal.h
@@ -132,14 +132,15 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
#define Litbits 8
#define MaxLit ((1<<Litbits) - 1)
-#define MaxML 52
-#define MaxLL 35
+#define MaxML 52
+#define MaxLL 35
#define DefaultMaxOff 28
-#define MaxOff 31
+#define MaxOff 31
#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
#define MLFSELog 9
#define LLFSELog 9
#define OffFSELog 8
+#define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
@@ -228,8 +229,6 @@ typedef struct {
BYTE* ofCode;
U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
U32 longLengthPos;
- U32 rep[ZSTD_REP_NUM];
- U32 repToConfirm[ZSTD_REP_NUM];
} seqStore_t;
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
diff --git a/thirdparty/zstd/compress/fse_compress.c b/thirdparty/zstd/compress/fse_compress.c
index 549c115d42..cb8f1fa323 100644
--- a/thirdparty/zstd/compress/fse_compress.c
+++ b/thirdparty/zstd/compress/fse_compress.c
@@ -248,7 +248,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
bitCount -= (count<max);
previous0 = (count==1);
if (remaining<1) return ERROR(GENERIC);
- while (remaining<threshold) nbBits--, threshold>>=1;
+ while (remaining<threshold) { nbBits--; threshold>>=1; }
}
if (bitCount>16) {
if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
@@ -292,7 +292,7 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
It doesn't use any additional memory.
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
For this reason, prefer using a table `count` with 256 elements.
- @return : count of most numerous element
+ @return : count of most numerous element.
*/
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
const void* src, size_t srcSize)
@@ -305,7 +305,10 @@ size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
- while (ip<end) count[*ip++]++;
+ while (ip<end) {
+ assert(*ip <= maxSymbolValue);
+ count[*ip++]++;
+ }
while (!count[maxSymbolValue]) maxSymbolValue--;
*maxSymbolValuePtr = maxSymbolValue;
@@ -318,7 +321,8 @@ size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
/* FSE_count_parallel_wksp() :
* Same as FSE_count_parallel(), but using an externally provided scratch buffer.
- * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
+ * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
+ * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
static size_t FSE_count_parallel_wksp(
unsigned* count, unsigned* maxSymbolValuePtr,
const void* source, size_t sourceSize,
@@ -333,7 +337,7 @@ static size_t FSE_count_parallel_wksp(
U32* const Counting3 = Counting2 + 256;
U32* const Counting4 = Counting3 + 256;
- memset(Counting1, 0, 4*256*sizeof(unsigned));
+ memset(workSpace, 0, 4*256*sizeof(unsigned));
/* safety checks */
if (!sourceSize) {
@@ -379,7 +383,9 @@ static size_t FSE_count_parallel_wksp(
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
} }
- { U32 s; for (s=0; s<=maxSymbolValue; s++) {
+ { U32 s;
+ if (maxSymbolValue > 255) maxSymbolValue = 255;
+ for (s=0; s<=maxSymbolValue; s++) {
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
if (count[s] > max) max = count[s];
} }
@@ -393,9 +399,11 @@ static size_t FSE_count_parallel_wksp(
* Same as FSE_countFast(), but using an externally provided scratch buffer.
* `workSpace` size must be table of >= `1024` unsigned */
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
- const void* source, size_t sourceSize, unsigned* workSpace)
+ const void* source, size_t sourceSize,
+ unsigned* workSpace)
{
- if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+ if (sourceSize < 1500) /* heuristic threshold */
+ return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
}
@@ -540,7 +548,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
find max, then give all remaining points to max */
U32 maxV = 0, maxC = 0;
for (s=0; s<=maxSymbolValue; s++)
- if (count[s] > maxC) maxV=s, maxC=count[s];
+ if (count[s] > maxC) { maxV=s; maxC=count[s]; }
norm[maxV] += (short)ToDistribute;
return 0;
}
@@ -548,7 +556,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
if (total == 0) {
/* all of the symbols were low enough for the lowOne or lowThreshold */
for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
- if (norm[s] > 0) ToDistribute--, norm[s]++;
+ if (norm[s] > 0) { ToDistribute--; norm[s]++; }
return 0;
}
@@ -604,7 +612,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
U64 restToBeat = vStep * rtbTable[proba];
proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
}
- if (proba > largestP) largestP=proba, largest=s;
+ if (proba > largestP) { largestP=proba; largest=s; }
normalizedCounter[s] = proba;
stillToDistribute -= proba;
} }
diff --git a/thirdparty/zstd/compress/huf_compress.c b/thirdparty/zstd/compress/huf_compress.c
index 5692d56e00..83230b415f 100644
--- a/thirdparty/zstd/compress/huf_compress.c
+++ b/thirdparty/zstd/compress/huf_compress.c
@@ -46,6 +46,7 @@
#include <string.h> /* memcpy, memset */
#include <stdio.h> /* printf (debug) */
#include "bitstream.h"
+#include "compiler.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
#include "fse.h" /* header compression */
#define HUF_STATIC_LINKING_ONLY
@@ -322,7 +323,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
U32 const c = count[n];
U32 const r = BIT_highbit32(c+1) + 1;
U32 pos = rank[r].current++;
- while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
+ while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
+ huffNode[pos] = huffNode[pos-1];
+ pos--;
+ }
huffNode[pos].count = c;
huffNode[pos].byte = (BYTE)n;
}
@@ -331,10 +335,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
/** HUF_buildCTable_wksp() :
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
+ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
*/
#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
-typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
+typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
{
nodeElt* const huffNode0 = (nodeElt*)workSpace;
@@ -345,9 +349,10 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu
U32 nodeRoot;
/* safety checks */
- if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */
+ if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
+ if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
memset(huffNode0, 0, sizeof(huffNodeTable));
/* sort, decreasing order */
@@ -405,6 +410,7 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu
}
/** HUF_buildCTable() :
+ * @return : maxNbBits
* Note : count is used before tree is written, so they can safely overlap
*/
size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
@@ -432,13 +438,14 @@ static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, uns
return !bad;
}
-static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+FORCE_INLINE_TEMPLATE void
+HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
{
BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
}
-size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
-
#define HUF_FLUSHBITS(s) BIT_flushBits(s)
#define HUF_FLUSHBITS_1(stream) \
@@ -447,7 +454,10 @@ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
#define HUF_FLUSHBITS_2(stream) \
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
-size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+FORCE_INLINE_TEMPLATE size_t
+HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable)
{
const BYTE* ip = (const BYTE*) src;
BYTE* const ostart = (BYTE*)dst;
@@ -491,8 +501,58 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
return BIT_closeCStream(&bitC);
}
+#if DYNAMIC_BMI2
-size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+static TARGET_ATTRIBUTE("bmi2") size_t
+HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable)
+{
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable)
+{
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable, const int bmi2)
+{
+ if (bmi2) {
+ return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
+ }
+ return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
+}
+
+#else
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable, const int bmi2)
+{
+ (void)bmi2;
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+#endif
+
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+
+static size_t
+HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+ const void* src, size_t srcSize,
+ const HUF_CElt* CTable, int bmi2)
{
size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
const BYTE* ip = (const BYTE*) src;
@@ -505,28 +565,31 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
if (srcSize < 12) return 0; /* no saving possible : too small input */
op += 6; /* jumpTable */
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
+ assert(cSize <= 65535);
MEM_writeLE16(ostart, (U16)cSize);
op += cSize;
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
+ assert(cSize <= 65535);
MEM_writeLE16(ostart+2, (U16)cSize);
op += cSize;
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
if (cSize==0) return 0;
+ assert(cSize <= 65535);
MEM_writeLE16(ostart+4, (U16)cSize);
op += cSize;
}
ip += segmentSize;
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) );
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
if (cSize==0) return 0;
op += cSize;
}
@@ -534,15 +597,20 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
return op-ostart;
}
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
static size_t HUF_compressCTable_internal(
BYTE* const ostart, BYTE* op, BYTE* const oend,
const void* src, size_t srcSize,
- unsigned singleStream, const HUF_CElt* CTable)
+ unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
{
size_t const cSize = singleStream ?
- HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
- HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
+ HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
+ HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
if (HUF_isError(cSize)) { return cSize; }
if (cSize==0) { return 0; } /* uncompressible */
op += cSize;
@@ -551,86 +619,98 @@ static size_t HUF_compressCTable_internal(
return op-ostart;
}
+typedef struct {
+ U32 count[HUF_SYMBOLVALUE_MAX + 1];
+ HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
+ huffNodeTable nodeTable;
+} HUF_compress_tables_t;
-/* `workSpace` must a table of at least 1024 unsigned */
+/* HUF_compress_internal() :
+ * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
static size_t HUF_compress_internal (
void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
unsigned singleStream,
void* workSpace, size_t wkspSize,
- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
+ const int bmi2)
{
+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
BYTE* const ostart = (BYTE*)dst;
BYTE* const oend = ostart + dstSize;
BYTE* op = ostart;
- U32* count;
- size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
- HUF_CElt* CTable;
- size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
-
/* checks & inits */
- if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
- if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
- if (!dstSize) return 0; /* cannot fit within dst budget */
+ if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
+ if (!srcSize) return 0; /* Uncompressed */
+ if (!dstSize) return 0; /* cannot fit anything within dst budget */
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
- count = (U32*)workSpace;
- workSpace = (BYTE*)workSpace + countSize;
- wkspSize -= countSize;
- CTable = (HUF_CElt*)workSpace;
- workSpace = (BYTE*)workSpace + CTableSize;
- wkspSize -= CTableSize;
-
- /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
+ /* Heuristic : If old table is valid, use it for small inputs */
if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+ return HUF_compressCTable_internal(ostart, op, oend,
+ src, srcSize,
+ singleStream, oldHufTable, bmi2);
}
/* Scan input and build symbol stats */
- { CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
+ { CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
- if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
+ if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */
}
/* Check validity of previous table */
- if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
+ if ( repeat
+ && *repeat == HUF_repeat_check
+ && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
*repeat = HUF_repeat_none;
}
/* Heuristic : use existing table for small inputs */
if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
+ return HUF_compressCTable_internal(ostart, op, oend,
+ src, srcSize,
+ singleStream, oldHufTable, bmi2);
}
/* Build Huffman Tree */
huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
- { CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
+ { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
+ maxSymbolValue, huffLog,
+ table->nodeTable, sizeof(table->nodeTable)) );
huffLog = (U32)maxBits;
- /* Zero the unused symbols so we can check it for validity */
- memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
+ /* Zero unused symbols in CTable, so we can check it for validity */
+ memset(table->CTable + (maxSymbolValue + 1), 0,
+ sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
}
/* Write table description header */
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
- /* Check if using the previous table will be beneficial */
+ { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
+ /* Check if using previous huffman table is beneficial */
if (repeat && *repeat != HUF_repeat_none) {
- size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
- size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
+ size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
+ size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
- }
- }
- /* Use the new table */
+ return HUF_compressCTable_internal(ostart, op, oend,
+ src, srcSize,
+ singleStream, oldHufTable, bmi2);
+ } }
+
+ /* Use the new huffman table */
if (hSize + 12ul >= srcSize) { return 0; }
op += hSize;
if (repeat) { *repeat = HUF_repeat_none; }
- if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
+ if (oldHufTable)
+ memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
}
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
+ return HUF_compressCTable_internal(ostart, op, oend,
+ src, srcSize,
+ singleStream, table->CTable, bmi2);
}
@@ -639,52 +719,70 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
+ maxSymbolValue, huffLog, 1 /*single stream*/,
+ workSpace, wkspSize,
+ NULL, NULL, 0, 0 /*bmi2*/);
}
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
{
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
+ maxSymbolValue, huffLog, 1 /*single stream*/,
+ workSpace, wkspSize, hufTable,
+ repeat, preferRepeat, bmi2);
}
size_t HUF_compress1X (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
- unsigned workSpace[1024];
+ unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * provide workspace to generate compression tables */
size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize)
{
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
+ maxSymbolValue, huffLog, 0 /*4 streams*/,
+ workSpace, wkspSize,
+ NULL, NULL, 0, 0 /*bmi2*/);
}
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * re-use an existing huffman compression table */
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog,
void* workSpace, size_t wkspSize,
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
{
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
+ maxSymbolValue, huffLog, 0 /* 4 streams */,
+ workSpace, wkspSize,
+ hufTable, repeat, preferRepeat, bmi2);
}
size_t HUF_compress2 (void* dst, size_t dstSize,
const void* src, size_t srcSize,
unsigned maxSymbolValue, unsigned huffLog)
{
- unsigned workSpace[1024];
+ unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
}
size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
- return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT);
+ return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
}
diff --git a/thirdparty/zstd/compress/zstd_compress.c b/thirdparty/zstd/compress/zstd_compress.c
index 8d1629246d..2aa26da4cd 100644
--- a/thirdparty/zstd/compress/zstd_compress.c
+++ b/thirdparty/zstd/compress/zstd_compress.c
@@ -21,6 +21,7 @@
* Dependencies
***************************************/
#include <string.h> /* memset */
+#include "cpu.h"
#include "mem.h"
#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
#include "fse.h"
@@ -49,7 +50,13 @@ struct ZSTD_CDict_s {
void* dictBuffer;
const void* dictContent;
size_t dictContentSize;
- ZSTD_CCtx* refContext;
+ void* workspace;
+ size_t workspaceSize;
+ ZSTD_matchState_t matchState;
+ ZSTD_compressedBlockState_t cBlockState;
+ ZSTD_compressionParameters cParams;
+ ZSTD_customMem customMem;
+ U32 dictID;
}; /* typedef'd to ZSTD_CDict within "zstd.h" */
ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -59,18 +66,17 @@ ZSTD_CCtx* ZSTD_createCCtx(void)
ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
{
- ZSTD_CCtx* cctx;
-
- if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
-
- cctx = (ZSTD_CCtx*) ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
- if (!cctx) return NULL;
- cctx->customMem = customMem;
- cctx->requestedParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
- cctx->requestedParams.fParams.contentSizeFlag = 1;
ZSTD_STATIC_ASSERT(zcss_init==0);
ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
- return cctx;
+ if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+ { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
+ if (!cctx) return NULL;
+ cctx->customMem = customMem;
+ cctx->requestedParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
+ cctx->requestedParams.fParams.contentSizeFlag = 1;
+ cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+ return cctx;
+ }
}
ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
@@ -83,11 +89,16 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
cctx->workSpace = (void*)(cctx+1);
cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
- /* entropy space (never moves) */
- if (cctx->workSpaceSize < sizeof(ZSTD_entropyCTables_t)) return NULL;
+ /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
+ if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL;
assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
- cctx->entropy = (ZSTD_entropyCTables_t*)cctx->workSpace;
-
+ cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace;
+ cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1;
+ {
+ void* const ptr = cctx->blockState.nextCBlock + 1;
+ cctx->entropyWorkspace = (U32*)ptr;
+ }
+ cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
return cctx;
}
@@ -95,13 +106,10 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
{
if (cctx==NULL) return 0; /* support free on NULL */
if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */
- ZSTD_free(cctx->workSpace, cctx->customMem);
- cctx->workSpace = NULL;
- ZSTD_freeCDict(cctx->cdictLocal);
- cctx->cdictLocal = NULL;
+ ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL;
+ ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL;
#ifdef ZSTD_MULTITHREAD
- ZSTDMT_freeCCtx(cctx->mtctx);
- cctx->mtctx = NULL;
+ ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
#endif
ZSTD_free(cctx, cctx->customMem);
return 0; /* reserved as a potential error code in the future */
@@ -122,10 +130,6 @@ static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
{
if (cctx==NULL) return 0; /* support sizeof on NULL */
- DEBUGLOG(3, "sizeof(*cctx) : %u", (U32)sizeof(*cctx));
- DEBUGLOG(3, "workSpaceSize (including streaming buffers): %u", (U32)cctx->workSpaceSize);
- DEBUGLOG(3, "inner cdict : %u", (U32)ZSTD_sizeof_CDict(cctx->cdictLocal));
- DEBUGLOG(3, "inner MTCTX : %u", (U32)ZSTD_sizeof_mtctx(cctx));
return sizeof(*cctx) + cctx->workSpaceSize
+ ZSTD_sizeof_CDict(cctx->cdictLocal)
+ ZSTD_sizeof_mtctx(cctx);
@@ -139,37 +143,19 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
/* private API call, for dictBuilder only */
const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
-#define ZSTD_CLEVEL_CUSTOM 999
-
-static ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
- ZSTD_CCtx_params CCtxParams, U64 srcSizeHint, size_t dictSize)
-{
- DEBUGLOG(4, "ZSTD_getCParamsFromCCtxParams: srcSize = %u, dictSize = %u",
- (U32)srcSizeHint, (U32)dictSize);
- return (CCtxParams.compressionLevel == ZSTD_CLEVEL_CUSTOM) ?
- CCtxParams.cParams :
- ZSTD_getCParams(CCtxParams.compressionLevel, srcSizeHint, dictSize);
-}
-
-static void ZSTD_cLevelToCCtxParams_srcSize(ZSTD_CCtx_params* CCtxParams, U64 srcSize)
-{
- DEBUGLOG(4, "ZSTD_cLevelToCCtxParams_srcSize: srcSize = %u",
- (U32)srcSize);
- CCtxParams->cParams = ZSTD_getCParamsFromCCtxParams(*CCtxParams, srcSize, 0);
- CCtxParams->compressionLevel = ZSTD_CLEVEL_CUSTOM;
-}
-
-static void ZSTD_cLevelToCParams(ZSTD_CCtx* cctx)
-{
- DEBUGLOG(4, "ZSTD_cLevelToCParams: level=%i", cctx->requestedParams.compressionLevel);
- ZSTD_cLevelToCCtxParams_srcSize(
- &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1);
-}
-
-static void ZSTD_cLevelToCCtxParams(ZSTD_CCtx_params* CCtxParams)
-{
- DEBUGLOG(4, "ZSTD_cLevelToCCtxParams");
- ZSTD_cLevelToCCtxParams_srcSize(CCtxParams, ZSTD_CONTENTSIZE_UNKNOWN);
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+ const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
+{
+ ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
+ if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
+ if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
+ if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
+ if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
+ if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
+ if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength;
+ if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
+ if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
+ return cParams;
}
static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
@@ -178,7 +164,9 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
ZSTD_CCtx_params cctxParams;
memset(&cctxParams, 0, sizeof(cctxParams));
cctxParams.cParams = cParams;
- cctxParams.compressionLevel = ZSTD_CLEVEL_CUSTOM;
+ cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
+ assert(!ZSTD_checkCParams(cParams));
+ cctxParams.fParams.contentSizeFlag = 1;
return cctxParams;
}
@@ -192,6 +180,7 @@ static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
if (!params) { return NULL; }
params->customMem = customMem;
params->compressionLevel = ZSTD_CLEVEL_DEFAULT;
+ params->fParams.contentSizeFlag = 1;
return params;
}
@@ -207,36 +196,41 @@ size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
return 0;
}
-size_t ZSTD_resetCCtxParams(ZSTD_CCtx_params* params)
+size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
{
- return ZSTD_initCCtxParams(params, ZSTD_CLEVEL_DEFAULT);
+ return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
}
-size_t ZSTD_initCCtxParams(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
+size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
if (!cctxParams) { return ERROR(GENERIC); }
memset(cctxParams, 0, sizeof(*cctxParams));
cctxParams->compressionLevel = compressionLevel;
+ cctxParams->fParams.contentSizeFlag = 1;
return 0;
}
-size_t ZSTD_initCCtxParams_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
+size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
{
if (!cctxParams) { return ERROR(GENERIC); }
CHECK_F( ZSTD_checkCParams(params.cParams) );
memset(cctxParams, 0, sizeof(*cctxParams));
cctxParams->cParams = params.cParams;
cctxParams->fParams = params.fParams;
- cctxParams->compressionLevel = ZSTD_CLEVEL_CUSTOM;
+ cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
+ assert(!ZSTD_checkCParams(params.cParams));
return 0;
}
+/* ZSTD_assignParamsToCCtxParams() :
+ * params is presumed valid at this stage */
static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
ZSTD_CCtx_params cctxParams, ZSTD_parameters params)
{
ZSTD_CCtx_params ret = cctxParams;
ret.cParams = params.cParams;
ret.fParams = params.fParams;
- ret.compressionLevel = ZSTD_CLEVEL_CUSTOM;
+ ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
+ assert(!ZSTD_checkCParams(params.cParams));
return ret;
}
@@ -245,10 +239,49 @@ static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
return ERROR(parameter_outOfBound); \
} }
+
+static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
+{
+ switch(param)
+ {
+ case ZSTD_p_compressionLevel:
+ case ZSTD_p_hashLog:
+ case ZSTD_p_chainLog:
+ case ZSTD_p_searchLog:
+ case ZSTD_p_minMatch:
+ case ZSTD_p_targetLength:
+ case ZSTD_p_compressionStrategy:
+ case ZSTD_p_compressLiterals:
+ return 1;
+
+ case ZSTD_p_format:
+ case ZSTD_p_windowLog:
+ case ZSTD_p_contentSizeFlag:
+ case ZSTD_p_checksumFlag:
+ case ZSTD_p_dictIDFlag:
+ case ZSTD_p_forceMaxWindow :
+ case ZSTD_p_nbWorkers:
+ case ZSTD_p_jobSize:
+ case ZSTD_p_overlapSizeLog:
+ case ZSTD_p_enableLongDistanceMatching:
+ case ZSTD_p_ldmHashLog:
+ case ZSTD_p_ldmMinMatch:
+ case ZSTD_p_ldmBucketSizeLog:
+ case ZSTD_p_ldmHashEveryLog:
+ default:
+ return 0;
+ }
+}
+
size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value)
{
DEBUGLOG(4, "ZSTD_CCtx_setParameter (%u, %u)", (U32)param, value);
- if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
+ if (cctx->streamStage != zcss_init) {
+ if (ZSTD_isUpdateAuthorized(param)) {
+ cctx->cParamsChanged = 1;
+ } else {
+ return ERROR(stage_wrong);
+ } }
switch(param)
{
@@ -267,9 +300,9 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
case ZSTD_p_targetLength:
case ZSTD_p_compressionStrategy:
if (cctx->cdict) return ERROR(stage_wrong);
- if (value>0) ZSTD_cLevelToCParams(cctx); /* Can optimize if srcSize is known */
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
+ case ZSTD_p_compressLiterals:
case ZSTD_p_contentSizeFlag:
case ZSTD_p_checksumFlag:
case ZSTD_p_dictIDFlag:
@@ -280,23 +313,17 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned v
* default : 0 when using a CDict, 1 when using a Prefix */
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
- case ZSTD_p_nbThreads:
- if ((value > 1) && cctx->staticSize) {
+ case ZSTD_p_nbWorkers:
+ if ((value>0) && cctx->staticSize) {
return ERROR(parameter_unsupported); /* MT not compatible with static alloc */
}
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_p_jobSize:
- return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
-
case ZSTD_p_overlapSizeLog:
return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
case ZSTD_p_enableLongDistanceMatching:
- if (cctx->cdict) return ERROR(stage_wrong);
- if (value>0) ZSTD_cLevelToCParams(cctx);
- return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
-
case ZSTD_p_ldmHashLog:
case ZSTD_p_ldmMinMatch:
case ZSTD_p_ldmBucketSizeLog:
@@ -320,69 +347,62 @@ size_t ZSTD_CCtxParam_setParameter(
CCtxParams->format = (ZSTD_format_e)value;
return (size_t)CCtxParams->format;
- case ZSTD_p_compressionLevel :
- if ((int)value > ZSTD_maxCLevel()) value = ZSTD_maxCLevel();
- if (value) /* 0 : does not change current level */
- CCtxParams->compressionLevel = value;
- return CCtxParams->compressionLevel;
+ case ZSTD_p_compressionLevel : {
+ int cLevel = (int)value; /* cast expected to restore negative sign */
+ if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
+ if (cLevel) { /* 0 : does not change current level */
+ CCtxParams->disableLiteralCompression = (cLevel<0); /* negative levels disable huffman */
+ CCtxParams->compressionLevel = cLevel;
+ }
+ if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel;
+ return 0; /* return type (size_t) cannot represent negative values */
+ }
case ZSTD_p_windowLog :
- DEBUGLOG(4, "ZSTD_CCtxParam_setParameter: set windowLog=%u", value);
- if (value) { /* 0 : does not change current windowLog */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.windowLog = value;
- }
+ CCtxParams->cParams.windowLog = value;
return CCtxParams->cParams.windowLog;
case ZSTD_p_hashLog :
- if (value) { /* 0 : does not change current hashLog */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.hashLog = value;
- }
+ CCtxParams->cParams.hashLog = value;
return CCtxParams->cParams.hashLog;
case ZSTD_p_chainLog :
- if (value) { /* 0 : does not change current chainLog */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.chainLog = value;
- }
+ CCtxParams->cParams.chainLog = value;
return CCtxParams->cParams.chainLog;
case ZSTD_p_searchLog :
- if (value) { /* 0 : does not change current searchLog */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.searchLog = value;
- }
+ CCtxParams->cParams.searchLog = value;
return value;
case ZSTD_p_minMatch :
- if (value) { /* 0 : does not change current minMatch length */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.searchLength = value;
- }
+ CCtxParams->cParams.searchLength = value;
return CCtxParams->cParams.searchLength;
case ZSTD_p_targetLength :
- if (value) { /* 0 : does not change current sufficient_len */
- CLAMPCHECK(value, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.targetLength = value;
- }
+ /* all values are valid. 0 => use default */
+ CCtxParams->cParams.targetLength = value;
return CCtxParams->cParams.targetLength;
case ZSTD_p_compressionStrategy :
- if (value) { /* 0 : does not change currentstrategy */
+ if (value>0) /* 0 => use default */
CLAMPCHECK(value, (unsigned)ZSTD_fast, (unsigned)ZSTD_btultra);
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.strategy = (ZSTD_strategy)value;
- }
+ CCtxParams->cParams.strategy = (ZSTD_strategy)value;
return (size_t)CCtxParams->cParams.strategy;
+ case ZSTD_p_compressLiterals:
+ CCtxParams->disableLiteralCompression = !value;
+ return !CCtxParams->disableLiteralCompression;
+
case ZSTD_p_contentSizeFlag :
/* Content size written in frame header _when known_ (default:1) */
DEBUGLOG(4, "set content size flag = %u", (value>0));
@@ -396,27 +416,25 @@ size_t ZSTD_CCtxParam_setParameter(
case ZSTD_p_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
DEBUGLOG(4, "set dictIDFlag = %u", (value>0));
- CCtxParams->fParams.noDictIDFlag = (value == 0);
+ CCtxParams->fParams.noDictIDFlag = !value;
return !CCtxParams->fParams.noDictIDFlag;
case ZSTD_p_forceMaxWindow :
CCtxParams->forceWindow = (value > 0);
return CCtxParams->forceWindow;
- case ZSTD_p_nbThreads :
- if (value == 0) return CCtxParams->nbThreads;
+ case ZSTD_p_nbWorkers :
#ifndef ZSTD_MULTITHREAD
- if (value > 1) return ERROR(parameter_unsupported);
- return 1;
+ if (value>0) return ERROR(parameter_unsupported);
+ return 0;
#else
- return ZSTDMT_CCtxParam_setNbThreads(CCtxParams, value);
+ return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value);
#endif
case ZSTD_p_jobSize :
#ifndef ZSTD_MULTITHREAD
return ERROR(parameter_unsupported);
#else
- if (CCtxParams->nbThreads <= 1) return ERROR(parameter_unsupported);
return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value);
#endif
@@ -424,44 +442,36 @@ size_t ZSTD_CCtxParam_setParameter(
#ifndef ZSTD_MULTITHREAD
return ERROR(parameter_unsupported);
#else
- if (CCtxParams->nbThreads <= 1) return ERROR(parameter_unsupported);
return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapSectionLog, value);
#endif
case ZSTD_p_enableLongDistanceMatching :
- if (value) {
- ZSTD_cLevelToCCtxParams(CCtxParams);
- CCtxParams->cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
- }
- return ZSTD_ldm_initializeParameters(&CCtxParams->ldmParams, value);
+ CCtxParams->ldmParams.enableLdm = (value>0);
+ return CCtxParams->ldmParams.enableLdm;
case ZSTD_p_ldmHashLog :
- if (value) { /* 0 : does not change current ldmHashLog */
+ if (value>0) /* 0 ==> auto */
CLAMPCHECK(value, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
- CCtxParams->ldmParams.hashLog = value;
- }
+ CCtxParams->ldmParams.hashLog = value;
return CCtxParams->ldmParams.hashLog;
case ZSTD_p_ldmMinMatch :
- if (value) { /* 0 : does not change current ldmMinMatch */
+ if (value>0) /* 0 ==> default */
CLAMPCHECK(value, ZSTD_LDM_MINMATCH_MIN, ZSTD_LDM_MINMATCH_MAX);
- CCtxParams->ldmParams.minMatchLength = value;
- }
+ CCtxParams->ldmParams.minMatchLength = value;
return CCtxParams->ldmParams.minMatchLength;
case ZSTD_p_ldmBucketSizeLog :
- if (value > ZSTD_LDM_BUCKETSIZELOG_MAX) {
+ if (value > ZSTD_LDM_BUCKETSIZELOG_MAX)
return ERROR(parameter_outOfBound);
- }
CCtxParams->ldmParams.bucketSizeLog = value;
- return value;
+ return CCtxParams->ldmParams.bucketSizeLog;
case ZSTD_p_ldmHashEveryLog :
- if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) {
+ if (value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
return ERROR(parameter_outOfBound);
- }
CCtxParams->ldmParams.hashEveryLog = value;
- return value;
+ return CCtxParams->ldmParams.hashEveryLog;
default: return ERROR(parameter_unsupported);
}
@@ -470,6 +480,9 @@ size_t ZSTD_CCtxParam_setParameter(
/** ZSTD_CCtx_setParametersUsingCCtxParams() :
* just applies `params` into `cctx`
* no action is performed, parameters are merely stored.
+ * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
+ * This is possible even if a compression is ongoing.
+ * In which case, new parameters will be applied on the fly, starting with next compression job.
*/
size_t ZSTD_CCtx_setParametersUsingCCtxParams(
ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
@@ -478,7 +491,6 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
if (cctx->cdict) return ERROR(stage_wrong);
cctx->requestedParams = *params;
-
return 0;
}
@@ -492,7 +504,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long lo
size_t ZSTD_CCtx_loadDictionary_advanced(
ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode)
+ ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
{
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */
@@ -503,10 +515,10 @@ size_t ZSTD_CCtx_loadDictionary_advanced(
cctx->cdict = NULL;
} else {
ZSTD_compressionParameters const cParams =
- ZSTD_getCParamsFromCCtxParams(cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize);
+ ZSTD_getCParamsFromCCtxParams(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize);
cctx->cdictLocal = ZSTD_createCDict_advanced(
dict, dictSize,
- dictLoadMethod, dictMode,
+ dictLoadMethod, dictContentType,
cParams, cctx->customMem);
cctx->cdict = cctx->cdictLocal;
if (cctx->cdictLocal == NULL)
@@ -519,13 +531,13 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
{
return ZSTD_CCtx_loadDictionary_advanced(
- cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dm_auto);
+ cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
}
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
{
return ZSTD_CCtx_loadDictionary_advanced(
- cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dm_auto);
+ cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
}
@@ -539,17 +551,17 @@ size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
{
- return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dm_rawContent);
+ return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
}
size_t ZSTD_CCtx_refPrefix_advanced(
- ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode)
+ ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
{
if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
cctx->cdict = NULL; /* prefix discards any prior cdict */
cctx->prefixDict.dict = prefix;
cctx->prefixDict.dictSize = prefixSize;
- cctx->prefixDict.dictMode = dictMode;
+ cctx->prefixDict.dictContentType = dictContentType;
return 0;
}
@@ -577,7 +589,8 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
- CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+ if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN)
+ return ERROR(parameter_unsupported);
if ((U32)(cParams.strategy) > (U32)ZSTD_btultra)
return ERROR(parameter_unsupported);
return 0;
@@ -597,7 +610,7 @@ static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters c
CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
- CLAMP(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
+ if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN) cParams.targetLength = ZSTD_TARGETLENGTH_MIN;
if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra;
return cParams;
}
@@ -653,36 +666,43 @@ ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, u
return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
}
+static size_t ZSTD_sizeof_matchState(ZSTD_compressionParameters const* cParams, const U32 forCCtx)
+{
+ size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+ size_t const hSize = ((size_t)1) << cParams->hashLog;
+ U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+ size_t const h3Size = ((size_t)1) << hashLog3;
+ size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+ size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
+ + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
+ size_t const optSpace = (forCCtx && ((cParams->strategy == ZSTD_btopt) ||
+ (cParams->strategy == ZSTD_btultra)))
+ ? optPotentialSpace
+ : 0;
+ DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
+ (U32)chainSize, (U32)hSize, (U32)h3Size);
+ return tableSpace + optSpace;
+}
+
size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
{
/* Estimate CCtx size is supported for single-threaded compression only. */
- if (params->nbThreads > 1) { return ERROR(GENERIC); }
+ if (params->nbWorkers > 0) { return ERROR(GENERIC); }
{ ZSTD_compressionParameters const cParams =
- ZSTD_getCParamsFromCCtxParams(*params, 0, 0);
+ ZSTD_getCParamsFromCCtxParams(params, 0, 0);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
U32 const divider = (cParams.searchLength==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = blockSize + 11*maxNbSeq;
- size_t const chainSize =
- (cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams.chainLog);
- size_t const hSize = ((size_t)1) << cParams.hashLog;
- U32 const hashLog3 = (cParams.searchLength>3) ?
- 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
- size_t const h3Size = ((size_t)1) << hashLog3;
- size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
- size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+ size_t const entropySpace = HUF_WORKSPACE_SIZE;
+ size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
+ size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
- size_t const optBudget =
- ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
- + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
- size_t const optSpace = ((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btultra)) ? optBudget : 0;
+ size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams);
+ size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq);
- size_t const ldmSpace = params->ldmParams.enableLdm ?
- ZSTD_ldm_getTableSize(params->ldmParams.hashLog,
- params->ldmParams.bucketSizeLog) : 0;
-
- size_t const neededSpace = entropySpace + tableSpace + tokenSpace +
- optSpace + ldmSpace;
+ size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace +
+ matchStateSize + ldmSpace + ldmSeqSpace;
DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
@@ -696,15 +716,26 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
}
-size_t ZSTD_estimateCCtxSize(int compressionLevel)
+static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
{
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
return ZSTD_estimateCCtxSize_usingCParams(cParams);
}
+size_t ZSTD_estimateCCtxSize(int compressionLevel)
+{
+ int level;
+ size_t memBudget = 0;
+ for (level=1; level<=compressionLevel; level++) {
+ size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
+ if (newMB > memBudget) memBudget = newMB;
+ }
+ return memBudget;
+}
+
size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
{
- if (params->nbThreads > 1) { return ERROR(GENERIC); }
+ if (params->nbWorkers > 0) { return ERROR(GENERIC); }
{ size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params->cParams.windowLog);
size_t const inBuffSize = ((size_t)1 << params->cParams.windowLog) + blockSize;
@@ -721,11 +752,44 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
}
-size_t ZSTD_estimateCStreamSize(int compressionLevel) {
+static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) {
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
return ZSTD_estimateCStreamSize_usingCParams(cParams);
}
+size_t ZSTD_estimateCStreamSize(int compressionLevel) {
+ int level;
+ size_t memBudget = 0;
+ for (level=1; level<=compressionLevel; level++) {
+ size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
+ if (newMB > memBudget) memBudget = newMB;
+ }
+ return memBudget;
+}
+
+/* ZSTD_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads (non-blocking mode).
+ */
+ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
+{
+#ifdef ZSTD_MULTITHREAD
+ if (cctx->appliedParams.nbWorkers > 0) {
+ return ZSTDMT_getFrameProgression(cctx->mtctx);
+ }
+#endif
+ { ZSTD_frameProgression fp;
+ size_t const buffered = (cctx->inBuff == NULL) ? 0 :
+ cctx->inBuffPos - cctx->inToCompress;
+ if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
+ assert(buffered <= ZSTD_BLOCKSIZE_MAX);
+ fp.ingested = cctx->consumedSrcSize + buffered;
+ fp.consumed = cctx->consumedSrcSize;
+ fp.produced = cctx->producedCSize;
+ return fp;
+} }
+
+
static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
ZSTD_compressionParameters cParams2)
{
@@ -761,9 +825,9 @@ static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t blockSize1,
size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
- DEBUGLOG(4, "ZSTD_sufficientBuff: windowSize2=%u from wlog=%u",
+ DEBUGLOG(4, "ZSTD_sufficientBuff: is windowSize2=%u <= wlog1=%u",
(U32)windowSize2, cParams2.windowLog);
- DEBUGLOG(4, "ZSTD_sufficientBuff: blockSize2 %u <=? blockSize1 %u",
+ DEBUGLOG(4, "ZSTD_sufficientBuff: is blockSize2=%u <= blockSize1=%u",
(U32)blockSize2, (U32)blockSize1);
return (blockSize2 <= blockSize1) /* seqStore space depends on blockSize */
& (neededBufferSize2 <= bufferSize1);
@@ -782,37 +846,101 @@ static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
ZSTD_sufficientBuff(buffSize1, blockSize1, buffPol2, params2.cParams, pledgedSrcSize);
}
+static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
+{
+ int i;
+ for (i = 0; i < ZSTD_REP_NUM; ++i)
+ bs->rep[i] = repStartValue[i];
+ bs->entropy.hufCTable_repeatMode = HUF_repeat_none;
+ bs->entropy.offcode_repeatMode = FSE_repeat_none;
+ bs->entropy.matchlength_repeatMode = FSE_repeat_none;
+ bs->entropy.litlength_repeatMode = FSE_repeat_none;
+}
+
+/*! ZSTD_invalidateMatchState()
+ * Invalidate all the matches in the match finder tables.
+ * Requires nextSrc and base to be set (can be NULL).
+ */
+static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
+{
+ ZSTD_window_clear(&ms->window);
+
+ ms->nextToUpdate = ms->window.dictLimit + 1;
+ ms->loadedDictEnd = 0;
+ ms->opt.litLengthSum = 0; /* force reset of btopt stats */
+}
+
/*! ZSTD_continueCCtx() :
* reuse CCtx without reset (note : requires no dictionary) */
static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize)
{
- U32 const end = (U32)(cctx->nextSrc - cctx->base);
size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
- DEBUGLOG(4, "ZSTD_continueCCtx");
+ DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place");
cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */
cctx->appliedParams = params;
cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
cctx->consumedSrcSize = 0;
+ cctx->producedCSize = 0;
if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
cctx->appliedParams.fParams.contentSizeFlag = 0;
DEBUGLOG(4, "pledged content size : %u ; flag : %u",
(U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
- cctx->lowLimit = end;
- cctx->dictLimit = end;
- cctx->nextToUpdate = end+1;
cctx->stage = ZSTDcs_init;
cctx->dictID = 0;
- cctx->loadedDictEnd = 0;
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->seqStore.rep[i] = repStartValue[i]; }
- cctx->optState.litLengthSum = 0; /* force reset of btopt stats */
+ if (params.ldmParams.enableLdm)
+ ZSTD_window_clear(&cctx->ldmState.window);
+ ZSTD_referenceExternalSequences(cctx, NULL, 0);
+ ZSTD_invalidateMatchState(&cctx->blockState.matchState);
+ ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
XXH64_reset(&cctx->xxhState, 0);
return 0;
}
typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
+static void* ZSTD_reset_matchState(ZSTD_matchState_t* ms, void* ptr, ZSTD_compressionParameters const* cParams, ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
+{
+ size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+ size_t const hSize = ((size_t)1) << cParams->hashLog;
+ U32 const hashLog3 = (forCCtx && cParams->searchLength==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+ size_t const h3Size = ((size_t)1) << hashLog3;
+ size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
+
+ assert(((size_t)ptr & 3) == 0);
+
+ ms->hashLog3 = hashLog3;
+ memset(&ms->window, 0, sizeof(ms->window));
+ ZSTD_invalidateMatchState(ms);
+
+ /* opt parser space */
+ if (forCCtx && ((cParams->strategy == ZSTD_btopt) | (cParams->strategy == ZSTD_btultra))) {
+ DEBUGLOG(4, "reserving optimal parser space");
+ ms->opt.litFreq = (U32*)ptr;
+ ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
+ ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1);
+ ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1);
+ ptr = ms->opt.offCodeFreq + (MaxOff+1);
+ ms->opt.matchTable = (ZSTD_match_t*)ptr;
+ ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1;
+ ms->opt.priceTable = (ZSTD_optimal_t*)ptr;
+ ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1;
+ }
+
+ /* table Space */
+ DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset);
+ assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
+ if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
+ ms->hashTable = (U32*)(ptr);
+ ms->chainTable = ms->hashTable + hSize;
+ ms->hashTable3 = ms->chainTable + chainSize;
+ ptr = ms->hashTable3 + h3Size;
+
+ assert(((size_t)ptr & 3) == 0);
+ return ptr;
+}
+
/*! ZSTD_resetCCtx_internal() :
note : `params` are assumed fully validated at this stage */
static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
@@ -830,19 +958,14 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
zbuff, pledgedSrcSize)) {
DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%u)",
zc->appliedParams.cParams.windowLog, (U32)zc->blockSize);
- assert(!(params.ldmParams.enableLdm &&
- params.ldmParams.hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET));
- zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
- zc->entropy->offcode_repeatMode = FSE_repeat_none;
- zc->entropy->matchlength_repeatMode = FSE_repeat_none;
- zc->entropy->litlength_repeatMode = FSE_repeat_none;
return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
} }
DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
if (params.ldmParams.enableLdm) {
/* Adjust long distance matching parameters */
- ZSTD_ldm_adjustParameters(&params.ldmParams, params.cParams.windowLog);
+ params.ldmParams.windowLog = params.cParams.windowLog;
+ ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
assert(params.ldmParams.hashEveryLog < 32);
zc->ldmState.hashPower =
@@ -854,34 +977,25 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
size_t const maxNbSeq = blockSize / divider;
size_t const tokenSpace = blockSize + 11*maxNbSeq;
- size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ?
- 0 : ((size_t)1 << params.cParams.chainLog);
- size_t const hSize = ((size_t)1) << params.cParams.hashLog;
- U32 const hashLog3 = (params.cParams.searchLength>3) ?
- 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
- size_t const h3Size = ((size_t)1) << hashLog3;
- size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
+ size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
+ size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
void* ptr;
/* Check if workSpace is large enough, alloc a new one if needed */
- { size_t const entropySpace = sizeof(ZSTD_entropyCTables_t);
- size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
- + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
- size_t const optSpace = ( (params.cParams.strategy == ZSTD_btopt)
- || (params.cParams.strategy == ZSTD_btultra)) ?
- optPotentialSpace : 0;
+ { size_t const entropySpace = HUF_WORKSPACE_SIZE;
+ size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
size_t const bufferSpace = buffInSize + buffOutSize;
- size_t const ldmSpace = params.ldmParams.enableLdm
- ? ZSTD_ldm_getTableSize(params.ldmParams.hashLog, params.ldmParams.bucketSizeLog)
- : 0;
- size_t const neededSpace = entropySpace + optSpace + ldmSpace +
- tableSpace + tokenSpace + bufferSpace;
- DEBUGLOG(4, "Need %uKB workspace, including %uKB for tables, and %uKB for buffers",
- (U32)(neededSpace>>10), (U32)(tableSpace>>10), (U32)(bufferSpace>>10));
- DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u - windowSize: %u - blockSize: %u",
- (U32)chainSize, (U32)hSize, (U32)h3Size, (U32)windowSize, (U32)blockSize);
+ size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams);
+ size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq);
+
+ size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace +
+ ldmSeqSpace + matchStateSize + tokenSpace +
+ bufferSpace;
+ DEBUGLOG(4, "Need %uKB workspace, including %uKB for match state, and %uKB for buffers",
+ (U32)(neededSpace>>10), (U32)(matchStateSize>>10), (U32)(bufferSpace>>10));
+ DEBUGLOG(4, "windowSize: %u - blockSize: %u", (U32)windowSize, (U32)blockSize);
if (zc->workSpaceSize < neededSpace) { /* too small : resize */
DEBUGLOG(4, "Need to update workSpaceSize from %uK to %uK",
@@ -897,16 +1011,20 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
zc->workSpaceSize = neededSpace;
ptr = zc->workSpace;
- /* entropy space */
+ /* Statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */
- assert(zc->workSpaceSize >= sizeof(ZSTD_entropyCTables_t));
- zc->entropy = (ZSTD_entropyCTables_t*)zc->workSpace;
+ assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t));
+ zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace;
+ zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1;
+ ptr = zc->blockState.nextCBlock + 1;
+ zc->entropyWorkspace = (U32*)ptr;
} }
/* init params */
zc->appliedParams = params;
zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
zc->consumedSrcSize = 0;
+ zc->producedCSize = 0;
if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
zc->appliedParams.fParams.contentSizeFlag = 0;
DEBUGLOG(4, "pledged content size : %u ; flag : %u",
@@ -916,37 +1034,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
XXH64_reset(&zc->xxhState, 0);
zc->stage = ZSTDcs_init;
zc->dictID = 0;
- zc->loadedDictEnd = 0;
- zc->entropy->hufCTable_repeatMode = HUF_repeat_none;
- zc->entropy->offcode_repeatMode = FSE_repeat_none;
- zc->entropy->matchlength_repeatMode = FSE_repeat_none;
- zc->entropy->litlength_repeatMode = FSE_repeat_none;
- zc->nextToUpdate = 1;
- zc->nextSrc = NULL;
- zc->base = NULL;
- zc->dictBase = NULL;
- zc->dictLimit = 0;
- zc->lowLimit = 0;
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->seqStore.rep[i] = repStartValue[i]; }
- zc->hashLog3 = hashLog3;
- zc->optState.litLengthSum = 0;
-
- ptr = zc->entropy + 1;
-
- /* opt parser space */
- if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btultra)) {
- DEBUGLOG(4, "reserving optimal parser space");
- assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
- zc->optState.litFreq = (U32*)ptr;
- zc->optState.litLengthFreq = zc->optState.litFreq + (1<<Litbits);
- zc->optState.matchLengthFreq = zc->optState.litLengthFreq + (MaxLL+1);
- zc->optState.offCodeFreq = zc->optState.matchLengthFreq + (MaxML+1);
- ptr = zc->optState.offCodeFreq + (MaxOff+1);
- zc->optState.matchTable = (ZSTD_match_t*)ptr;
- ptr = zc->optState.matchTable + ZSTD_OPT_NUM+1;
- zc->optState.priceTable = (ZSTD_optimal_t*)ptr;
- ptr = zc->optState.priceTable + ZSTD_OPT_NUM+1;
- }
+
+ ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
+
+ ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32;
/* ldm hash table */
/* initialize bucketOffsets table later for pointer alignment */
@@ -956,16 +1047,15 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
zc->ldmState.hashTable = (ldmEntry_t*)ptr;
ptr = zc->ldmState.hashTable + ldmHSize;
+ zc->ldmSequences = (rawSeq*)ptr;
+ ptr = zc->ldmSequences + maxNbLdmSeq;
+ zc->maxNbLdmSequences = maxNbLdmSeq;
+
+ memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
}
+ assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
- /* table Space */
- DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset);
- if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
- assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
- zc->hashTable = (U32*)(ptr);
- zc->chainTable = zc->hashTable + hSize;
- zc->hashTable3 = zc->chainTable + chainSize;
- ptr = zc->hashTable3 + h3Size;
+ ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, &params.cParams, crp, /* forCCtx */ 1);
/* sequences storage */
zc->seqStore.sequencesStart = (seqDef*)ptr;
@@ -984,7 +1074,9 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
memset(ptr, 0, ldmBucketSize);
zc->ldmState.bucketOffsets = (BYTE*)ptr;
ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
+ ZSTD_window_clear(&zc->ldmState.window);
}
+ ZSTD_referenceExternalSequences(zc, NULL, 0);
/* buffers */
zc->inBuffSize = buffInSize;
@@ -1002,9 +1094,61 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
* do not use with extDict variant ! */
void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
int i;
- for (i=0; i<ZSTD_REP_NUM; i++) cctx->seqStore.rep[i] = 0;
+ for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
+ assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
}
+static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
+ const ZSTD_CDict* cdict,
+ unsigned windowLog,
+ ZSTD_frameParameters fParams,
+ U64 pledgedSrcSize,
+ ZSTD_buffered_policy_e zbuff)
+{
+ { ZSTD_CCtx_params params = cctx->requestedParams;
+ /* Copy only compression parameters related to tables. */
+ params.cParams = cdict->cParams;
+ if (windowLog) params.cParams.windowLog = windowLog;
+ params.fParams = fParams;
+ ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+ ZSTDcrp_noMemset, zbuff);
+ assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy);
+ assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog);
+ assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog);
+ }
+
+ /* copy tables */
+ { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog);
+ size_t const hSize = (size_t)1 << cdict->cParams.hashLog;
+ size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
+ assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
+ assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
+ assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */
+ assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
+ memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
+ }
+ /* Zero the hashTable3, since the cdict never fills it */
+ { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
+ assert(cdict->matchState.hashLog3 == 0);
+ memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
+ }
+
+ /* copy dictionary offsets */
+ {
+ ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
+ ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
+ dstMatchState->window = srcMatchState->window;
+ dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+ dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
+ dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+ }
+ cctx->dictID = cdict->dictID;
+
+ /* copy block state */
+ memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+ return 0;
+}
/*! ZSTD_copyCCtx_internal() :
* Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
@@ -1015,7 +1159,6 @@ void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
* @return : 0, or an error code */
static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
const ZSTD_CCtx* srcCCtx,
- unsigned windowLog,
ZSTD_frameParameters fParams,
U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
@@ -1027,41 +1170,39 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
{ ZSTD_CCtx_params params = dstCCtx->requestedParams;
/* Copy only compression parameters related to tables. */
params.cParams = srcCCtx->appliedParams.cParams;
- if (windowLog) params.cParams.windowLog = windowLog;
params.fParams = fParams;
ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
ZSTDcrp_noMemset, zbuff);
+ assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
+ assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
+ assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
+ assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
+ assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
}
/* copy tables */
{ size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
- size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
+ size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3;
size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
- assert((U32*)dstCCtx->chainTable == (U32*)dstCCtx->hashTable + hSize); /* chainTable must follow hashTable */
- assert((U32*)dstCCtx->hashTable3 == (U32*)dstCCtx->chainTable + chainSize);
- memcpy(dstCCtx->hashTable, srcCCtx->hashTable, tableSpace); /* presumes all tables follow each other */
+ assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
+ assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize);
+ memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */
}
/* copy dictionary offsets */
- dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
- dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
- dstCCtx->nextSrc = srcCCtx->nextSrc;
- dstCCtx->base = srcCCtx->base;
- dstCCtx->dictBase = srcCCtx->dictBase;
- dstCCtx->dictLimit = srcCCtx->dictLimit;
- dstCCtx->lowLimit = srcCCtx->lowLimit;
- dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
- dstCCtx->dictID = srcCCtx->dictID;
-
- /* copy entropy tables */
- memcpy(dstCCtx->entropy, srcCCtx->entropy, sizeof(ZSTD_entropyCTables_t));
- /* copy repcodes */
{
- int i;
- for (i = 0; i < ZSTD_REP_NUM; ++i)
- dstCCtx->seqStore.rep[i] = srcCCtx->seqStore.rep[i];
+ ZSTD_matchState_t const* srcMatchState = &srcCCtx->blockState.matchState;
+ ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
+ dstMatchState->window = srcMatchState->window;
+ dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+ dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
+ dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
}
+ dstCCtx->dictID = srcCCtx->dictID;
+
+ /* copy block state */
+ memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
return 0;
}
@@ -1080,51 +1221,69 @@ size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long
fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
- 0 /*windowLog from srcCCtx*/, fParams, pledgedSrcSize,
+ fParams, pledgedSrcSize,
zbuff);
}
+#define ZSTD_ROWSIZE 16
/*! ZSTD_reduceTable() :
- * reduce table indexes by `reducerValue` */
-static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
+ * reduce table indexes by `reducerValue`, or squash to zero.
+ * PreserveMark preserves "unsorted mark" for btlazy2 strategy.
+ * It must be set to a clear 0/1 value, to remove branch during inlining.
+ * Presume table size is a multiple of ZSTD_ROWSIZE
+ * to help auto-vectorization */
+FORCE_INLINE_TEMPLATE void
+ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
+{
+ int const nbRows = (int)size / ZSTD_ROWSIZE;
+ int cellNb = 0;
+ int rowNb;
+ assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
+ assert(size < (1U<<31)); /* can be casted to int */
+ for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
+ int column;
+ for (column=0; column<ZSTD_ROWSIZE; column++) {
+ if (preserveMark) {
+ U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
+ table[cellNb] += adder;
+ }
+ if (table[cellNb] < reducerValue) table[cellNb] = 0;
+ else table[cellNb] -= reducerValue;
+ cellNb++;
+ } }
+}
+
+static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
{
- U32 u;
- for (u=0 ; u < size ; u++) {
- if (table[u] < reducerValue) table[u] = 0;
- else table[u] -= reducerValue;
- }
+ ZSTD_reduceTable_internal(table, size, reducerValue, 0);
}
-/*! ZSTD_ldm_reduceTable() :
- * reduce table indexes by `reducerValue` */
-static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
- U32 const reducerValue)
+static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
{
- U32 u;
- for (u = 0; u < size; u++) {
- if (table[u].offset < reducerValue) table[u].offset = 0;
- else table[u].offset -= reducerValue;
- }
+ ZSTD_reduceTable_internal(table, size, reducerValue, 1);
}
/*! ZSTD_reduceIndex() :
* rescale all indexes to avoid future overflow (indexes are U32) */
static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
{
- { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
- ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
-
- { U32 const chainSize = (zc->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((U32)1 << zc->appliedParams.cParams.chainLog);
- ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
+ ZSTD_matchState_t* const ms = &zc->blockState.matchState;
+ { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
+ ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
+ }
- { U32 const h3Size = (zc->hashLog3) ? (U32)1 << zc->hashLog3 : 0;
- ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
+ if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
+ U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
+ if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
+ ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
+ else
+ ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
+ }
- { if (zc->appliedParams.ldmParams.enableLdm) {
- U32 const ldmHSize = (U32)1 << zc->appliedParams.ldmParams.hashLog;
- ZSTD_ldm_reduceTable(zc->ldmState.hashTable, ldmHSize, reducerValue);
- }
+ if (ms->hashLog3) {
+ U32 const h3Size = (U32)1 << ms->hashLog3;
+ ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
}
}
@@ -1199,10 +1358,12 @@ static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, cons
static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
-static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy,
- ZSTD_strategy strategy,
+static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ ZSTD_strategy strategy, int disableLiteralCompression,
void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
+ const void* src, size_t srcSize,
+ U32* workspace, const int bmi2)
{
size_t const minGain = ZSTD_minGain(srcSize);
size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
@@ -1211,34 +1372,51 @@ static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t * entropy,
symbolEncodingType_e hType = set_compressed;
size_t cLitSize;
+ DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
+ disableLiteralCompression);
+
+ /* Prepare nextEntropy assuming reusing the existing table */
+ nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode;
+ memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable,
+ sizeof(prevEntropy->hufCTable));
+
+ if (disableLiteralCompression)
+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
/* small ? don't even attempt compression (speed opt) */
-# define LITERAL_NOENTROPY 63
- { size_t const minLitSize = entropy->hufCTable_repeatMode == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
+# define COMPRESS_LITERALS_SIZE_MIN 63
+ { size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
- { HUF_repeat repeat = entropy->hufCTable_repeatMode;
+ { HUF_repeat repeat = prevEntropy->hufCTable_repeatMode;
int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
- entropy->workspace, sizeof(entropy->workspace), (HUF_CElt*)entropy->hufCTable, &repeat, preferRepeat)
+ workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2)
: HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
- entropy->workspace, sizeof(entropy->workspace), (HUF_CElt*)entropy->hufCTable, &repeat, preferRepeat);
- if (repeat != HUF_repeat_none) { hType = set_repeat; } /* reused the existing table */
- else { entropy->hufCTable_repeatMode = HUF_repeat_check; } /* now have a table to reuse */
+ workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2);
+ if (repeat != HUF_repeat_none) {
+ /* reused the existing table */
+ hType = set_repeat;
+ }
}
if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
- entropy->hufCTable_repeatMode = HUF_repeat_none;
+ memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable));
return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
}
if (cLitSize==1) {
- entropy->hufCTable_repeatMode = HUF_repeat_none;
+ memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable));
return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
}
+ if (hType == set_compressed) {
+ /* using a newly constructed table */
+ nextEntropy->hufCTable_repeatMode = HUF_repeat_check;
+ }
+
/* Build header */
switch(lhSize)
{
@@ -1332,10 +1510,11 @@ symbolEncodingType_e ZSTD_selectEncodingType(
MEM_STATIC
size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
- FSE_CTable* CTable, U32 FSELog, symbolEncodingType_e type,
+ FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
U32* count, U32 max,
BYTE const* codeTable, size_t nbSeq,
S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+ FSE_CTable const* prevCTable, size_t prevCTableSize,
void* workspace, size_t workspaceSize)
{
BYTE* op = (BYTE*)dst;
@@ -1344,12 +1523,13 @@ size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
switch (type) {
case set_rle:
*op = codeTable[0];
- CHECK_F(FSE_buildCTable_rle(CTable, (BYTE)max));
+ CHECK_F(FSE_buildCTable_rle(nextCTable, (BYTE)max));
return 1;
case set_repeat:
+ memcpy(nextCTable, prevCTable, prevCTableSize);
return 0;
case set_basic:
- CHECK_F(FSE_buildCTable_wksp(CTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
+ CHECK_F(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
return 0;
case set_compressed: {
S16 norm[MaxSeq + 1];
@@ -1363,7 +1543,7 @@ size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
{ size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
if (FSE_isError(NCountSize)) return NCountSize;
- CHECK_F(FSE_buildCTable_wksp(CTable, norm, max, tableLog, workspace, workspaceSize));
+ CHECK_F(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
return NCountSize;
}
}
@@ -1371,8 +1551,8 @@ size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
}
}
-MEM_STATIC
-size_t ZSTD_encodeSequences(
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_encodeSequences_body(
void* dst, size_t dstCapacity,
FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
@@ -1419,7 +1599,8 @@ size_t ZSTD_encodeSequences(
DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
sequences[n].litLength,
sequences[n].matchLength + MINMATCH,
- sequences[n].offset); /* 32b*/ /* 64b*/
+ sequences[n].offset);
+ /* 32b*/ /* 64b*/
/* (7)*/ /* (7)*/
FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
@@ -1445,8 +1626,11 @@ size_t ZSTD_encodeSequences(
BIT_flushBits(&blockStream); /* (7)*/
} }
+ DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
FSE_flushCState(&blockStream, &stateMatchLength);
+ DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
FSE_flushCState(&blockStream, &stateOffsetBits);
+ DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
FSE_flushCState(&blockStream, &stateLitLength);
{ size_t const streamSize = BIT_closeCStream(&blockStream);
@@ -1455,16 +1639,77 @@ size_t ZSTD_encodeSequences(
}
}
+static size_t
+ZSTD_encodeSequences_default(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+ return ZSTD_encodeSequences_body(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+}
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_encodeSequences_bmi2(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+ return ZSTD_encodeSequences_body(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+}
+
+#endif
+
+size_t ZSTD_encodeSequences(
+ void* dst, size_t dstCapacity,
+ FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+ FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+ FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+ seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+{
+#if DYNAMIC_BMI2
+ if (bmi2) {
+ return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+ }
+#endif
+ (void)bmi2;
+ return ZSTD_encodeSequences_default(dst, dstCapacity,
+ CTable_MatchLength, mlCodeTable,
+ CTable_OffsetBits, ofCodeTable,
+ CTable_LitLength, llCodeTable,
+ sequences, nbSeq, longOffsets);
+}
+
MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
- ZSTD_entropyCTables_t* entropy,
- ZSTD_compressionParameters const* cParams,
- void* dst, size_t dstCapacity)
+ ZSTD_entropyCTables_t const* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ ZSTD_CCtx_params const* cctxParams,
+ void* dst, size_t dstCapacity, U32* workspace,
+ const int bmi2)
{
- const int longOffsets = cParams->windowLog > STREAM_ACCUMULATOR_MIN;
+ const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
U32 count[MaxSeq+1];
- FSE_CTable* CTable_LitLength = entropy->litlengthCTable;
- FSE_CTable* CTable_OffsetBits = entropy->offcodeCTable;
- FSE_CTable* CTable_MatchLength = entropy->matchlengthCTable;
+ FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+ FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+ FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
const seqDef* const sequences = seqStorePtr->sequencesStart;
const BYTE* const ofCodeTable = seqStorePtr->ofCode;
@@ -1476,13 +1721,17 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
BYTE* seqHead;
- ZSTD_STATIC_ASSERT(sizeof(entropy->workspace) >= (1<<MAX(MLFSELog,LLFSELog)));
+ ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
/* Compress literals */
{ const BYTE* const literals = seqStorePtr->litStart;
size_t const litSize = seqStorePtr->lit - literals;
size_t const cSize = ZSTD_compressLiterals(
- entropy, cParams->strategy, op, dstCapacity, literals, litSize);
+ prevEntropy, nextEntropy,
+ cctxParams->cParams.strategy, cctxParams->disableLiteralCompression,
+ op, dstCapacity,
+ literals, litSize,
+ workspace, bmi2);
if (ZSTD_isError(cSize))
return cSize;
assert(cSize <= dstCapacity);
@@ -1497,7 +1746,15 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
else
op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
- if (nbSeq==0) return op - ostart;
+ if (nbSeq==0) {
+ memcpy(nextEntropy->litlengthCTable, prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable));
+ nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+ memcpy(nextEntropy->offcodeCTable, prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable));
+ nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+ memcpy(nextEntropy->matchlengthCTable, prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable));
+ nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+ return op - ostart;
+ }
/* seqHead : flags for FSE encoding type */
seqHead = op++;
@@ -1506,36 +1763,42 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
ZSTD_seqToCodes(seqStorePtr);
/* build CTable for Literal Lengths */
{ U32 max = MaxLL;
- size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, entropy->workspace);
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
DEBUGLOG(5, "Building LL table");
- LLtype = ZSTD_selectEncodingType(&entropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
+ nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+ LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
- entropy->workspace, sizeof(entropy->workspace));
+ prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
+ workspace, HUF_WORKSPACE_SIZE);
if (ZSTD_isError(countSize)) return countSize;
op += countSize;
} }
/* build CTable for Offsets */
{ U32 max = MaxOff;
- size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, entropy->workspace);
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace);
/* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
DEBUGLOG(5, "Building OF table");
- Offtype = ZSTD_selectEncodingType(&entropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
+ nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+ Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
- entropy->workspace, sizeof(entropy->workspace));
+ prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
+ workspace, HUF_WORKSPACE_SIZE);
if (ZSTD_isError(countSize)) return countSize;
op += countSize;
} }
/* build CTable for MatchLengths */
{ U32 max = MaxML;
- size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, entropy->workspace);
+ size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
DEBUGLOG(5, "Building ML table");
- MLtype = ZSTD_selectEncodingType(&entropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
+ nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+ MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
{ size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
- entropy->workspace, sizeof(entropy->workspace));
+ prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
+ workspace, HUF_WORKSPACE_SIZE);
if (ZSTD_isError(countSize)) return countSize;
op += countSize;
} }
@@ -1548,7 +1811,7 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
CTable_OffsetBits, ofCodeTable,
CTable_LitLength, llCodeTable,
sequences, nbSeq,
- longOffsets);
+ longOffsets, bmi2);
if (ZSTD_isError(bitstreamSize)) return bitstreamSize;
op += bitstreamSize;
}
@@ -1557,48 +1820,40 @@ MEM_STATIC size_t ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
}
MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
- ZSTD_entropyCTables_t* entropy,
- ZSTD_compressionParameters const* cParams,
+ ZSTD_entropyCTables_t const* prevEntropy,
+ ZSTD_entropyCTables_t* nextEntropy,
+ ZSTD_CCtx_params const* cctxParams,
void* dst, size_t dstCapacity,
- size_t srcSize)
+ size_t srcSize, U32* workspace, int bmi2)
{
- size_t const cSize = ZSTD_compressSequences_internal(seqStorePtr, entropy, cParams,
- dst, dstCapacity);
- /* If the srcSize <= dstCapacity, then there is enough space to write a
- * raw uncompressed block. Since we ran out of space, the block must not
- * be compressible, so fall back to a raw uncompressed block.
+ size_t const cSize = ZSTD_compressSequences_internal(
+ seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity,
+ workspace, bmi2);
+ /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
+ * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
*/
- int const uncompressibleError = (cSize == ERROR(dstSize_tooSmall)) && (srcSize <= dstCapacity);
- if (ZSTD_isError(cSize) && !uncompressibleError)
- return cSize;
+ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
+ return 0; /* block not compressed */
+ if (ZSTD_isError(cSize)) return cSize;
+
+ /* Check compressibility */
+ { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */
+ if (cSize >= maxCSize) return 0; /* block not compressed */
+ }
+
/* We check that dictionaries have offset codes available for the first
* block. After the first block, the offcode table might not have large
* enough codes to represent the offsets in the data.
*/
- if (entropy->offcode_repeatMode == FSE_repeat_valid)
- entropy->offcode_repeatMode = FSE_repeat_check;
-
- /* Check compressibility */
- { size_t const minGain = ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */
- size_t const maxCSize = srcSize - minGain;
- if (cSize >= maxCSize || uncompressibleError) {
- entropy->hufCTable_repeatMode = HUF_repeat_none;
- entropy->offcode_repeatMode = FSE_repeat_none;
- entropy->matchlength_repeatMode = FSE_repeat_none;
- entropy->litlength_repeatMode = FSE_repeat_none;
- return 0; /* block not compressed */
- } }
- assert(!ZSTD_isError(cSize));
+ if (nextEntropy->offcode_repeatMode == FSE_repeat_valid)
+ nextEntropy->offcode_repeatMode = FSE_repeat_check;
- /* block is compressed => confirm repcodes in history */
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->rep[i] = seqStorePtr->repToConfirm[i]; }
return cSize;
}
/* ZSTD_selectBlockCompressor() :
* Not static, but internal use only (used by long distance matcher)
* assumption : strat is a valid strategy */
-typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
{
static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
@@ -1632,32 +1887,83 @@ static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
ssPtr->longLengthID = 0;
}
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+ void* dst, size_t dstCapacity,
+ const void* src, size_t srcSize)
{
- DEBUGLOG(5, "ZSTD_compressBlock_internal : dstCapacity = %u", (U32)dstCapacity);
- if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1)
+ ZSTD_matchState_t* const ms = &zc->blockState.matchState;
+ DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+ (U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
+ if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+ ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
return 0; /* don't even attempt compression below a certain srcSize */
+ }
ZSTD_resetSeqStore(&(zc->seqStore));
/* limited update after a very long match */
- { const BYTE* const base = zc->base;
+ { const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const U32 current = (U32)(istart-base);
- if (current > zc->nextToUpdate + 384)
- zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384));
+ if (current > ms->nextToUpdate + 384)
+ ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384));
}
- /* find and store sequences */
- { U32 const extDict = zc->lowLimit < zc->dictLimit;
- const ZSTD_blockCompressor blockCompressor =
- zc->appliedParams.ldmParams.enableLdm
- ? (extDict ? ZSTD_compressBlock_ldm_extDict : ZSTD_compressBlock_ldm)
- : ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
- size_t const lastLLSize = blockCompressor(zc, src, srcSize);
- const BYTE* const anchor = (const BYTE*)src + srcSize - lastLLSize;
- ZSTD_storeLastLiterals(&zc->seqStore, anchor, lastLLSize);
+
+ /* select and store sequences */
+ { U32 const extDict = ZSTD_window_hasExtDict(ms->window);
+ size_t lastLLSize;
+ { int i;
+ for (i = 0; i < ZSTD_REP_NUM; ++i)
+ zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
+ }
+ if (zc->externSeqStore.pos < zc->externSeqStore.size) {
+ assert(!zc->appliedParams.ldmParams.enableLdm);
+ /* Updates ldmSeqStore.pos */
+ lastLLSize =
+ ZSTD_ldm_blockCompress(&zc->externSeqStore,
+ ms, &zc->seqStore,
+ zc->blockState.nextCBlock->rep,
+ &zc->appliedParams.cParams,
+ src, srcSize, extDict);
+ assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
+ } else if (zc->appliedParams.ldmParams.enableLdm) {
+ rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
+
+ ldmSeqStore.seq = zc->ldmSequences;
+ ldmSeqStore.capacity = zc->maxNbLdmSequences;
+ /* Updates ldmSeqStore.size */
+ CHECK_F(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
+ &zc->appliedParams.ldmParams,
+ src, srcSize));
+ /* Updates ldmSeqStore.pos */
+ lastLLSize =
+ ZSTD_ldm_blockCompress(&ldmSeqStore,
+ ms, &zc->seqStore,
+ zc->blockState.nextCBlock->rep,
+ &zc->appliedParams.cParams,
+ src, srcSize, extDict);
+ assert(ldmSeqStore.pos == ldmSeqStore.size);
+ } else { /* not long range mode */
+ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
+ lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
+ }
+ { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
+ ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
+ } }
+
+ /* encode sequences and literals */
+ { size_t const cSize = ZSTD_compressSequences(&zc->seqStore,
+ &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+ &zc->appliedParams,
+ dst, dstCapacity,
+ srcSize, zc->entropyWorkspace, zc->bmi2);
+ if (ZSTD_isError(cSize) || cSize == 0) return cSize;
+ /* confirm repcodes and entropy tables */
+ { ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
+ zc->blockState.prevCBlock = zc->blockState.nextCBlock;
+ zc->blockState.nextCBlock = tmp;
+ }
+ return cSize;
}
- /* encode */
- return ZSTD_compressSequences(&zc->seqStore, zc->entropy, &zc->appliedParams.cParams, dst, dstCapacity, srcSize);
}
@@ -1686,54 +1992,27 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
XXH64_update(&cctx->xxhState, src, srcSize);
while (remaining) {
+ ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
if (remaining < blockSize) blockSize = remaining;
- /* preemptive overflow correction:
- * 1. correction is large enough:
- * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog - blockSize
- * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
- *
- * current - newCurrent
- * > (3<<29 + 1<<windowLog - blockSize) - (1<<windowLog + 1<<chainLog)
- * > (3<<29 - blockSize) - (1<<chainLog)
- * > (3<<29 - blockSize) - (1<<30) (NOTE: chainLog <= 30)
- * > 1<<29 - 1<<17
- *
- * 2. (ip+blockSize - cctx->base) doesn't overflow:
- * In 32 bit mode we limit windowLog to 30 so we don't get
- * differences larger than 1<<31-1.
- * 3. cctx->lowLimit < 1<<32:
- * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
- */
- if (cctx->lowLimit > (3U<<29)) {
- U32 const cycleMask = ((U32)1 << ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy)) - 1;
- U32 const current = (U32)(ip - cctx->base);
- U32 const newCurrent = (current & cycleMask) + ((U32)1 << cctx->appliedParams.cParams.windowLog);
- U32 const correction = current - newCurrent;
+ if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) {
+ U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
+ U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
- assert(current > newCurrent);
- assert(correction > 1<<28); /* Loose bound, should be about 1<<29 */
+
ZSTD_reduceIndex(cctx, correction);
- cctx->base += correction;
- cctx->dictBase += correction;
- cctx->lowLimit -= correction;
- cctx->dictLimit -= correction;
- if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
- else cctx->nextToUpdate -= correction;
- DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x\n", correction, cctx->lowLimit);
- }
- /* enforce maxDist */
- if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
- U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
- if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
- if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
+ if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
+ else ms->nextToUpdate -= correction;
+ ms->loadedDictEnd = 0;
}
+ ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd);
+ if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
{ size_t cSize = ZSTD_compressBlock_internal(cctx,
op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
@@ -1810,16 +2089,44 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
return pos;
}
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
+{
+ if (dstCapacity < ZSTD_blockHeaderSize) return ERROR(dstSize_tooSmall);
+ { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */
+ MEM_writeLE24(dst, cBlockHeader24);
+ return ZSTD_blockHeaderSize;
+ }
+}
+
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+{
+ if (cctx->stage != ZSTDcs_init)
+ return ERROR(stage_wrong);
+ if (cctx->appliedParams.ldmParams.enableLdm)
+ return ERROR(parameter_unsupported);
+ cctx->externSeqStore.seq = seq;
+ cctx->externSeqStore.size = nbSeq;
+ cctx->externSeqStore.capacity = nbSeq;
+ cctx->externSeqStore.pos = 0;
+ return 0;
+}
+
static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
U32 frame, U32 lastFrameChunk)
{
- const BYTE* const ip = (const BYTE*) src;
+ ZSTD_matchState_t* ms = &cctx->blockState.matchState;
size_t fhSize = 0;
- DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u", cctx->stage);
+ DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
+ cctx->stage, (U32)srcSize);
if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
if (frame && (cctx->stage==ZSTDcs_init)) {
@@ -1833,26 +2140,11 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
if (!srcSize) return fhSize; /* do not generate an empty block if no input */
- /* Check if blocks follow each other */
- if (src != cctx->nextSrc) {
- /* not contiguous */
- size_t const distanceFromBase = (size_t)(cctx->nextSrc - cctx->base);
- cctx->lowLimit = cctx->dictLimit;
- assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
- cctx->dictLimit = (U32)distanceFromBase;
- cctx->dictBase = cctx->base;
- cctx->base = ip - distanceFromBase;
- cctx->nextToUpdate = cctx->dictLimit;
- if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */
- }
- cctx->nextSrc = ip + srcSize;
-
- /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
- if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
- ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
- U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
- cctx->lowLimit = lowLimitMax;
+ if (!ZSTD_window_update(&ms->window, src, srcSize)) {
+ ms->nextToUpdate = ms->window.dictLimit;
}
+ if (cctx->appliedParams.ldmParams.enableLdm)
+ ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize);
{ size_t const cSize = frame ?
@@ -1860,6 +2152,14 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
if (ZSTD_isError(cSize)) return cSize;
cctx->consumedSrcSize += srcSize;
+ cctx->producedCSize += (cSize + fhSize);
+ if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */
+ if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) {
+ DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u",
+ (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
+ return ERROR(srcSize_wrong);
+ }
+ }
return cSize + fhSize;
}
}
@@ -1868,14 +2168,15 @@ size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
+ DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (U32)srcSize);
return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
}
size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
{
- ZSTD_compressionParameters const cParams =
- ZSTD_getCParamsFromCCtxParams(cctx->appliedParams, 0, 0);
+ ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
+ assert(!ZSTD_checkCParams(cParams));
return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
}
@@ -1889,50 +2190,45 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const
/*! ZSTD_loadDictionaryContent() :
* @return : 0, or an error code
*/
-static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* src, size_t srcSize)
{
const BYTE* const ip = (const BYTE*) src;
const BYTE* const iend = ip + srcSize;
+ ZSTD_compressionParameters const* cParams = &params->cParams;
- /* input becomes current prefix */
- zc->lowLimit = zc->dictLimit;
- zc->dictLimit = (U32)(zc->nextSrc - zc->base);
- zc->dictBase = zc->base;
- zc->base = ip - zc->dictLimit;
- zc->nextToUpdate = zc->dictLimit;
- zc->loadedDictEnd = zc->appliedParams.forceWindow ? 0 : (U32)(iend - zc->base);
+ ZSTD_window_update(&ms->window, src, srcSize);
+ ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
- zc->nextSrc = iend;
if (srcSize <= HASH_READ_SIZE) return 0;
- switch(zc->appliedParams.cParams.strategy)
+ switch(params->cParams.strategy)
{
case ZSTD_fast:
- ZSTD_fillHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
+ ZSTD_fillHashTable(ms, cParams, iend);
break;
case ZSTD_dfast:
- ZSTD_fillDoubleHashTable (zc, iend, zc->appliedParams.cParams.searchLength);
+ ZSTD_fillDoubleHashTable(ms, cParams, iend);
break;
case ZSTD_greedy:
case ZSTD_lazy:
case ZSTD_lazy2:
if (srcSize >= HASH_READ_SIZE)
- ZSTD_insertAndFindFirstIndex(zc, iend-HASH_READ_SIZE, zc->appliedParams.cParams.searchLength);
+ ZSTD_insertAndFindFirstIndex(ms, cParams, iend-HASH_READ_SIZE);
break;
- case ZSTD_btlazy2:
+ case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
case ZSTD_btopt:
case ZSTD_btultra:
if (srcSize >= HASH_READ_SIZE)
- ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, (U32)1 << zc->appliedParams.cParams.searchLog, zc->appliedParams.cParams.searchLength);
+ ZSTD_updateTree(ms, cParams, iend-HASH_READ_SIZE, iend);
break;
default:
assert(0); /* not possible : not a valid strategy id */
}
- zc->nextToUpdate = (U32)(iend - zc->base);
+ ms->nextToUpdate = (U32)(iend - ms->window.base);
return 0;
}
@@ -1956,25 +2252,26 @@ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSym
* https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
*/
/*! ZSTD_loadZstdDictionary() :
- * @return : 0, or an error code
+ * @return : dictID, or an error code
* assumptions : magic number supposed already checked
* dictSize supposed > 8
*/
-static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* dict, size_t dictSize, void* workspace)
{
const BYTE* dictPtr = (const BYTE*)dict;
const BYTE* const dictEnd = dictPtr + dictSize;
short offcodeNCount[MaxOff+1];
unsigned offcodeMaxValue = MaxOff;
+ size_t dictID;
- ZSTD_STATIC_ASSERT(sizeof(cctx->entropy->workspace) >= (1<<MAX(MLFSELog,LLFSELog)));
+ ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
dictPtr += 4; /* skip magic number */
- cctx->dictID = cctx->appliedParams.fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
+ dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
dictPtr += 4;
{ unsigned maxSymbolValue = 255;
- size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)cctx->entropy->hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
+ size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
if (maxSymbolValue < 255) return ERROR(dictionary_corrupted);
dictPtr += hufHeaderSize;
@@ -1985,7 +2282,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
- CHECK_E( FSE_buildCTable_wksp(cctx->entropy->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
+ CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
dictionary_corrupted);
dictPtr += offcodeHeaderSize;
}
@@ -1997,7 +2294,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
/* Every match length code must have non-zero probability */
CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
- CHECK_E( FSE_buildCTable_wksp(cctx->entropy->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
+ CHECK_E( FSE_buildCTable_wksp(bs->entropy.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE),
dictionary_corrupted);
dictPtr += matchlengthHeaderSize;
}
@@ -2009,15 +2306,15 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
/* Every literal length code must have non-zero probability */
CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
- CHECK_E( FSE_buildCTable_wksp(cctx->entropy->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->entropy->workspace, sizeof(cctx->entropy->workspace)),
+ CHECK_E( FSE_buildCTable_wksp(bs->entropy.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE),
dictionary_corrupted);
dictPtr += litlengthHeaderSize;
}
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
- cctx->seqStore.rep[0] = MEM_readLE32(dictPtr+0);
- cctx->seqStore.rep[1] = MEM_readLE32(dictPtr+4);
- cctx->seqStore.rep[2] = MEM_readLE32(dictPtr+8);
+ bs->rep[0] = MEM_readLE32(dictPtr+0);
+ bs->rep[1] = MEM_readLE32(dictPtr+4);
+ bs->rep[2] = MEM_readLE32(dictPtr+8);
dictPtr += 12;
{ size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
@@ -2031,50 +2328,55 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx* cctx, const void* dict, size_t
/* All repCodes must be <= dictContentSize and != 0*/
{ U32 u;
for (u=0; u<3; u++) {
- if (cctx->seqStore.rep[u] == 0) return ERROR(dictionary_corrupted);
- if (cctx->seqStore.rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
+ if (bs->rep[u] == 0) return ERROR(dictionary_corrupted);
+ if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
} }
- cctx->entropy->hufCTable_repeatMode = HUF_repeat_valid;
- cctx->entropy->offcode_repeatMode = FSE_repeat_valid;
- cctx->entropy->matchlength_repeatMode = FSE_repeat_valid;
- cctx->entropy->litlength_repeatMode = FSE_repeat_valid;
- return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
+ bs->entropy.hufCTable_repeatMode = HUF_repeat_valid;
+ bs->entropy.offcode_repeatMode = FSE_repeat_valid;
+ bs->entropy.matchlength_repeatMode = FSE_repeat_valid;
+ bs->entropy.litlength_repeatMode = FSE_repeat_valid;
+ CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize));
+ return dictID;
}
}
/** ZSTD_compress_insertDictionary() :
-* @return : 0, or an error code */
-static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* cctx,
+* @return : dictID, or an error code */
+static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms,
+ ZSTD_CCtx_params const* params,
const void* dict, size_t dictSize,
- ZSTD_dictMode_e dictMode)
+ ZSTD_dictContentType_e dictContentType,
+ void* workspace)
{
DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
if ((dict==NULL) || (dictSize<=8)) return 0;
+ ZSTD_reset_compressedBlockState(bs);
+
/* dict restricted modes */
- if (dictMode==ZSTD_dm_rawContent)
- return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
+ if (dictContentType == ZSTD_dct_rawContent)
+ return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
- if (dictMode == ZSTD_dm_auto) {
+ if (dictContentType == ZSTD_dct_auto) {
DEBUGLOG(4, "raw content dictionary detected");
- return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
+ return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
}
- if (dictMode == ZSTD_dm_fullDict)
+ if (dictContentType == ZSTD_dct_fullDict)
return ERROR(dictionary_wrong);
assert(0); /* impossible */
}
/* dict as full zstd dictionary */
- return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
+ return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, workspace);
}
/*! ZSTD_compressBegin_internal() :
* @return : 0, or an error code */
size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, U64 pledgedSrcSize,
ZSTD_buffered_policy_e zbuff)
@@ -2086,19 +2388,26 @@ size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
if (cdict && cdict->dictContentSize>0) {
cctx->requestedParams = params;
- return ZSTD_copyCCtx_internal(cctx, cdict->refContext,
- params.cParams.windowLog, params.fParams, pledgedSrcSize,
- zbuff);
+ return ZSTD_resetCCtx_usingCDict(cctx, cdict, params.cParams.windowLog,
+ params.fParams, pledgedSrcSize, zbuff);
}
CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
ZSTDcrp_continue, zbuff) );
- return ZSTD_compress_insertDictionary(cctx, dict, dictSize, dictMode);
+ {
+ size_t const dictID = ZSTD_compress_insertDictionary(
+ cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+ &params, dict, dictSize, dictContentType, cctx->entropyWorkspace);
+ if (ZSTD_isError(dictID)) return dictID;
+ assert(dictID <= (size_t)(U32)-1);
+ cctx->dictID = (U32)dictID;
+ }
+ return 0;
}
size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params,
unsigned long long pledgedSrcSize)
@@ -2107,7 +2416,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
/* compression parameters verification and optimization */
CHECK_F( ZSTD_checkCParams(params.cParams) );
return ZSTD_compressBegin_internal(cctx,
- dict, dictSize, dictMode,
+ dict, dictSize, dictContentType,
cdict,
params, pledgedSrcSize,
ZSTDb_not_buffered);
@@ -2122,18 +2431,18 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
ZSTD_CCtx_params const cctxParams =
ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
return ZSTD_compressBegin_advanced_internal(cctx,
- dict, dictSize, ZSTD_dm_auto,
+ dict, dictSize, ZSTD_dct_auto,
NULL /*cdict*/,
cctxParams, pledgedSrcSize);
}
size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
{
- ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
+ ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
ZSTD_CCtx_params const cctxParams =
ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
- DEBUGLOG(4, "ZSTD_compressBegin_usingDict");
- return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
+ DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize);
+ return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
}
@@ -2152,7 +2461,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
BYTE* op = ostart;
size_t fhSize = 0;
- DEBUGLOG(5, "ZSTD_writeEpilogue");
+ DEBUGLOG(4, "ZSTD_writeEpilogue");
if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */
/* special case : empty frame */
@@ -2176,6 +2485,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
if (cctx->appliedParams.fParams.checksumFlag) {
U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
if (dstCapacity<4) return ERROR(dstSize_tooSmall);
+ DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", checksum);
MEM_writeLE32(op, checksum);
op += 4;
}
@@ -2184,7 +2494,6 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
return op-ostart;
}
-
size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
@@ -2242,25 +2551,27 @@ size_t ZSTD_compress_advanced_internal(
const void* dict,size_t dictSize,
ZSTD_CCtx_params params)
{
- DEBUGLOG(4, "ZSTD_compress_advanced_internal");
- CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dm_auto, NULL,
+ DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)",
+ (U32)srcSize);
+ CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
params, srcSize, ZSTDb_not_buffered) );
return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
}
-size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
const void* dict, size_t dictSize, int compressionLevel)
{
- ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0);
- params.fParams.contentSizeFlag = 1;
- DEBUGLOG(4, "ZSTD_compress_usingDict (level=%i, srcSize=%u, dictSize=%u)",
- compressionLevel, (U32)srcSize, (U32)dictSize);
- return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
+ ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0);
+ ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
+ assert(params.fParams.contentSizeFlag == 1);
+ ZSTD_CCtxParam_setParameter(&cctxParams, ZSTD_p_compressLiterals, compressionLevel>=0);
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams);
}
-size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
+size_t ZSTD_compressCCtx (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
{
- return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
+ DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize);
+ return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
}
size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
@@ -2284,9 +2595,7 @@ size_t ZSTD_estimateCDictSize_advanced(
ZSTD_dictLoadMethod_e dictLoadMethod)
{
DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (U32)sizeof(ZSTD_CDict));
- DEBUGLOG(5, "CCtx estimate : %u",
- (U32)ZSTD_estimateCCtxSize_usingCParams(cParams));
- return sizeof(ZSTD_CDict) + ZSTD_estimateCCtxSize_usingCParams(cParams)
+ return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+ (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
}
@@ -2300,23 +2609,24 @@ size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support sizeof on NULL */
DEBUGLOG(5, "sizeof(*cdict) : %u", (U32)sizeof(*cdict));
- DEBUGLOG(5, "ZSTD_sizeof_CCtx : %u", (U32)ZSTD_sizeof_CCtx(cdict->refContext));
- return ZSTD_sizeof_CCtx(cdict->refContext) + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
+ return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
}
static size_t ZSTD_initCDict_internal(
ZSTD_CDict* cdict,
const void* dictBuffer, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
ZSTD_compressionParameters cParams)
{
- DEBUGLOG(3, "ZSTD_initCDict_internal, mode %u", (U32)dictMode);
+ DEBUGLOG(3, "ZSTD_initCDict_internal, dictContentType %u", (U32)dictContentType);
+ assert(!ZSTD_checkCParams(cParams));
+ cdict->cParams = cParams;
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
cdict->dictBuffer = NULL;
cdict->dictContent = dictBuffer;
} else {
- void* const internalBuffer = ZSTD_malloc(dictSize, cdict->refContext->customMem);
+ void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem);
cdict->dictBuffer = internalBuffer;
cdict->dictContent = internalBuffer;
if (!internalBuffer) return ERROR(memory_allocation);
@@ -2324,13 +2634,31 @@ static size_t ZSTD_initCDict_internal(
}
cdict->dictContentSize = dictSize;
- { ZSTD_CCtx_params cctxParams = cdict->refContext->requestedParams;
- cctxParams.cParams = cParams;
- CHECK_F( ZSTD_compressBegin_internal(cdict->refContext,
- cdict->dictContent, dictSize, dictMode,
- NULL,
- cctxParams, ZSTD_CONTENTSIZE_UNKNOWN,
- ZSTDb_not_buffered) );
+ /* Reset the state to no dictionary */
+ ZSTD_reset_compressedBlockState(&cdict->cBlockState);
+ { void* const end = ZSTD_reset_matchState(
+ &cdict->matchState,
+ (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
+ &cParams, ZSTDcrp_continue, /* forCCtx */ 0);
+ assert(end == (char*)cdict->workspace + cdict->workspaceSize);
+ (void)end;
+ }
+ /* (Maybe) load the dictionary
+ * Skips loading the dictionary if it is <= 8 bytes.
+ */
+ { ZSTD_CCtx_params params;
+ memset(&params, 0, sizeof(params));
+ params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
+ params.fParams.contentSizeFlag = 1;
+ params.cParams = cParams;
+ { size_t const dictID = ZSTD_compress_insertDictionary(
+ &cdict->cBlockState, &cdict->matchState, &params,
+ cdict->dictContent, cdict->dictContentSize,
+ dictContentType, cdict->workspace);
+ if (ZSTD_isError(dictID)) return dictID;
+ assert(dictID <= (size_t)(U32)-1);
+ cdict->dictID = (U32)dictID;
+ }
}
return 0;
@@ -2338,24 +2666,27 @@ static size_t ZSTD_initCDict_internal(
ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
{
- DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (U32)dictMode);
+ DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (U32)dictContentType);
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
{ ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
- ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
+ size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+ void* const workspace = ZSTD_malloc(workspaceSize, customMem);
- if (!cdict || !cctx) {
+ if (!cdict || !workspace) {
ZSTD_free(cdict, customMem);
- ZSTD_freeCCtx(cctx);
+ ZSTD_free(workspace, customMem);
return NULL;
}
- cdict->refContext = cctx;
+ cdict->customMem = customMem;
+ cdict->workspace = workspace;
+ cdict->workspaceSize = workspaceSize;
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dictBuffer, dictSize,
- dictLoadMethod, dictMode,
+ dictLoadMethod, dictContentType,
cParams) )) {
ZSTD_freeCDict(cdict);
return NULL;
@@ -2369,7 +2700,7 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL
{
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
return ZSTD_createCDict_advanced(dict, dictSize,
- ZSTD_dlm_byCopy, ZSTD_dm_auto,
+ ZSTD_dlm_byCopy, ZSTD_dct_auto,
cParams, ZSTD_defaultCMem);
}
@@ -2377,15 +2708,15 @@ ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int
{
ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
return ZSTD_createCDict_advanced(dict, dictSize,
- ZSTD_dlm_byRef, ZSTD_dm_auto,
+ ZSTD_dlm_byRef, ZSTD_dct_auto,
cParams, ZSTD_defaultCMem);
}
size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
{
if (cdict==NULL) return 0; /* support free on NULL */
- { ZSTD_customMem const cMem = cdict->refContext->customMem;
- ZSTD_freeCCtx(cdict->refContext);
+ { ZSTD_customMem const cMem = cdict->customMem;
+ ZSTD_free(cdict->workspace, cMem);
ZSTD_free(cdict->dictBuffer, cMem);
ZSTD_free(cdict, cMem);
return 0;
@@ -2405,18 +2736,18 @@ size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
* Note : there is no corresponding "free" function.
* Since workspace was allocated externally, it must be freed externally.
*/
-ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
+const ZSTD_CDict* ZSTD_initStaticCDict(
+ void* workspace, size_t workspaceSize,
const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
ZSTD_compressionParameters cParams)
{
- size_t const cctxSize = ZSTD_estimateCCtxSize_usingCParams(cParams);
+ size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize)
- + cctxSize;
+ + HUF_WORKSPACE_SIZE + matchStateSize;
ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
void* ptr;
- DEBUGLOG(4, "(size_t)workspace & 7 : %u", (U32)(size_t)workspace & 7);
if ((size_t)workspace & 7) return NULL; /* 8-aligned */
DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
(U32)workspaceSize, (U32)neededSize, (U32)(workspaceSize < neededSize));
@@ -2429,19 +2760,22 @@ ZSTD_CDict* ZSTD_initStaticCDict(void* workspace, size_t workspaceSize,
} else {
ptr = cdict+1;
}
- cdict->refContext = ZSTD_initStaticCCtx(ptr, cctxSize);
+ cdict->workspace = ptr;
+ cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize;
if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
dict, dictSize,
- ZSTD_dlm_byRef, dictMode,
+ ZSTD_dlm_byRef, dictContentType,
cParams) ))
return NULL;
return cdict;
}
-ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) {
- return cdict->refContext->appliedParams.cParams;
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
+{
+ assert(cdict != NULL);
+ return cdict->cParams;
}
/* ZSTD_compressBegin_usingCDict_advanced() :
@@ -2454,9 +2788,18 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
if (cdict==NULL) return ERROR(dictionary_wrong);
{ ZSTD_CCtx_params params = cctx->requestedParams;
params.cParams = ZSTD_getCParamsFromCDict(cdict);
+ /* Increase window log to fit the entire dictionary and source if the
+ * source size is known. Limit the increase to 19, which is the
+ * window log for compression level 1 with the largest source size.
+ */
+ if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+ U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
+ U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
+ params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog);
+ }
params.fParams = fParams;
return ZSTD_compressBegin_internal(cctx,
- NULL, 0, ZSTD_dm_auto,
+ NULL, 0, ZSTD_dct_auto,
cdict,
params, pledgedSrcSize,
ZSTDb_not_buffered);
@@ -2534,29 +2877,30 @@ size_t ZSTD_CStreamOutSize(void)
return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
}
-static size_t ZSTD_resetCStream_internal(ZSTD_CStream* zcs,
- const void* const dict, size_t const dictSize, ZSTD_dictMode_e const dictMode,
+static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
+ const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
const ZSTD_CDict* const cdict,
ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize)
{
- DEBUGLOG(4, "ZSTD_resetCStream_internal");
+ DEBUGLOG(4, "ZSTD_resetCStream_internal (disableLiteralCompression=%i)",
+ params.disableLiteralCompression);
/* params are supposed to be fully validated at this point */
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
- CHECK_F( ZSTD_compressBegin_internal(zcs,
- dict, dictSize, dictMode,
+ CHECK_F( ZSTD_compressBegin_internal(cctx,
+ dict, dictSize, dictContentType,
cdict,
params, pledgedSrcSize,
ZSTDb_buffered) );
- zcs->inToCompress = 0;
- zcs->inBuffPos = 0;
- zcs->inBuffTarget = zcs->blockSize
- + (zcs->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
- zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
- zcs->streamStage = zcss_load;
- zcs->frameEnded = 0;
+ cctx->inToCompress = 0;
+ cctx->inBuffPos = 0;
+ cctx->inBuffTarget = cctx->blockSize
+ + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
+ cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
+ cctx->streamStage = zcss_load;
+ cctx->frameEnded = 0;
return 0; /* ready to go */
}
@@ -2568,8 +2912,8 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize);
if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
params.fParams.contentSizeFlag = 1;
- params.cParams = ZSTD_getCParamsFromCCtxParams(params, pledgedSrcSize, 0);
- return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize);
+ params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, 0);
+ return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize);
}
/*! ZSTD_initCStream_internal() :
@@ -2592,7 +2936,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
}
ZSTD_freeCDict(zcs->cdictLocal);
zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
- ZSTD_dlm_byCopy, ZSTD_dm_auto,
+ ZSTD_dlm_byCopy, ZSTD_dct_auto,
params.cParams, zcs->customMem);
zcs->cdict = zcs->cdictLocal;
if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
@@ -2605,10 +2949,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
zcs->cdict = cdict;
}
- params.compressionLevel = ZSTD_CLEVEL_CUSTOM; /* enforce usage of cParams, instead of a dynamic derivation from cLevel (but does that happen ?) */
- zcs->requestedParams = params;
-
- return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, zcs->cdict, params, pledgedSrcSize);
+ return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize);
}
/* ZSTD_initCStream_usingCDict_advanced() :
@@ -2637,20 +2978,22 @@ size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */
}
+
/* ZSTD_initCStream_advanced() :
- * pledgedSrcSize must be correct.
+ * pledgedSrcSize must be exact.
* if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
* dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */
size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
const void* dict, size_t dictSize,
ZSTD_parameters params, unsigned long long pledgedSrcSize)
{
- ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u",
(U32)pledgedSrcSize, params.fParams.contentSizeFlag);
CHECK_F( ZSTD_checkCParams(params.cParams) );
if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */
- return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize);
+ { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
+ return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize);
+ }
}
size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
@@ -2687,7 +3030,7 @@ MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
/** ZSTD_compressStream_generic():
* internal function for all *compressStream*() variants and *compress_generic()
- * non-static, because can be called from zstdmt.c
+ * non-static, because can be called from zstdmt_compress.c
* @return : hint size for next input */
size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
ZSTD_outBuffer* output,
@@ -2862,46 +3205,56 @@ size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
DEBUGLOG(4, "ZSTD_compress_generic : transparent init stage");
if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */
params.cParams = ZSTD_getCParamsFromCCtxParams(
- cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
+ &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
#ifdef ZSTD_MULTITHREAD
- if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN)
- params.nbThreads = 1; /* do not invoke multi-threading when src size is too small */
- if (params.nbThreads > 1) {
- if (cctx->mtctx == NULL || (params.nbThreads != ZSTDMT_getNbThreads(cctx->mtctx))) {
- DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbThreads=%u (previous: %u)",
- params.nbThreads, ZSTDMT_getNbThreads(cctx->mtctx));
+ if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
+ params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
+ }
+ if (params.nbWorkers > 0) {
+ /* mt context creation */
+ if (cctx->mtctx == NULL || (params.nbWorkers != ZSTDMT_getNbWorkers(cctx->mtctx))) {
+ DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbWorkers=%u",
+ params.nbWorkers);
+ if (cctx->mtctx != NULL)
+ DEBUGLOG(4, "ZSTD_compress_generic: previous nbWorkers was %u",
+ ZSTDMT_getNbWorkers(cctx->mtctx));
ZSTDMT_freeCCtx(cctx->mtctx);
- cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbThreads, cctx->customMem);
+ cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
if (cctx->mtctx == NULL) return ERROR(memory_allocation);
}
- DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbThreads=%u", params.nbThreads);
+ /* mt compression */
+ DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
CHECK_F( ZSTDMT_initCStream_internal(
cctx->mtctx,
- prefixDict.dict, prefixDict.dictSize, ZSTD_dm_rawContent,
+ prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent,
cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
cctx->streamStage = zcss_load;
- cctx->appliedParams.nbThreads = params.nbThreads;
+ cctx->appliedParams.nbWorkers = params.nbWorkers;
} else
#endif
- { CHECK_F( ZSTD_resetCStream_internal(
- cctx, prefixDict.dict, prefixDict.dictSize,
- prefixDict.dictMode, cctx->cdict, params,
- cctx->pledgedSrcSizePlusOne-1) );
+ { CHECK_F( ZSTD_resetCStream_internal(cctx,
+ prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
+ cctx->cdict,
+ params, cctx->pledgedSrcSizePlusOne-1) );
assert(cctx->streamStage == zcss_load);
- assert(cctx->appliedParams.nbThreads <= 1);
+ assert(cctx->appliedParams.nbWorkers == 0);
} }
/* compression stage */
#ifdef ZSTD_MULTITHREAD
- if (cctx->appliedParams.nbThreads > 1) {
- size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
- if ( ZSTD_isError(flushMin)
- || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
- ZSTD_startNewCompression(cctx);
+ if (cctx->appliedParams.nbWorkers > 0) {
+ if (cctx->cParamsChanged) {
+ ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
+ cctx->cParamsChanged = 0;
}
- return flushMin;
- }
+ { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
+ if ( ZSTD_isError(flushMin)
+ || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
+ ZSTD_startNewCompression(cctx);
+ }
+ return flushMin;
+ } }
#endif
CHECK_F( ZSTD_compressStream_generic(cctx, output, input, endOp) );
DEBUGLOG(5, "completed ZSTD_compress_generic");
@@ -2927,7 +3280,7 @@ size_t ZSTD_compress_generic_simpleArgs (
/*====== Finalize ======*/
/*! ZSTD_flushStream() :
-* @return : amount of data remaining to flush */
+ * @return : amount of data remaining to flush */
size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
{
ZSTD_inBuffer input = { NULL, 0, 0 };
@@ -2959,11 +3312,11 @@ int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
{ /* "default" - guarantees a monotonically increasing memory budget */
/* W, C, H, S, L, TL, strat */
- { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */
- { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */
- { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */
- { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3 */
- { 20, 17, 18, 1, 5, 16, ZSTD_dfast }, /* level 4 */
+ { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
+ { 19, 13, 14, 1, 7, 1, ZSTD_fast }, /* level 1 */
+ { 19, 15, 16, 1, 6, 1, ZSTD_fast }, /* level 2 */
+ { 20, 16, 17, 1, 5, 8, ZSTD_dfast }, /* level 3 */
+ { 20, 17, 18, 1, 5, 8, ZSTD_dfast }, /* level 4 */
{ 20, 17, 18, 2, 5, 16, ZSTD_greedy }, /* level 5 */
{ 21, 17, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
{ 21, 18, 19, 3, 5, 16, ZSTD_lazy }, /* level 7 */
@@ -2972,9 +3325,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
{ 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
{ 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
- { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */
- { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */
- { 22, 21, 22, 4, 5, 16, ZSTD_btlazy2 }, /* level 15 */
+ { 22, 21, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
+ { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
+ { 22, 22, 22, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
{ 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */
{ 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */
{ 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */
@@ -2985,8 +3338,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
},
{ /* for srcSize <= 256 KB */
/* W, C, H, S, L, T, strat */
- { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */
- { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */
+ { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
+ { 18, 13, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */
{ 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
{ 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
{ 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
@@ -2997,8 +3350,8 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
{ 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
{ 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
{ 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
- { 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/
- { 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */
+ { 18, 18, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 12.*/
+ { 18, 19, 17, 7, 4, 8, ZSTD_btlazy2 }, /* level 13 */
{ 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
{ 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
{ 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
@@ -3011,9 +3364,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
},
{ /* for srcSize <= 128 KB */
/* W, C, H, S, L, T, strat */
- { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */
- { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */
- { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */
+ { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* level 0 - not used */
+ { 17, 12, 13, 1, 6, 1, ZSTD_fast }, /* level 1 */
+ { 17, 13, 16, 1, 5, 1, ZSTD_fast }, /* level 2 */
{ 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
{ 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
{ 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
@@ -3037,9 +3390,9 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
},
{ /* for srcSize <= 16 KB */
/* W, C, H, S, L, T, strat */
- { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */
- { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */
- { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */
+ { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
+ { 14, 14, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */
+ { 14, 14, 14, 1, 4, 1, ZSTD_fast }, /* level 2 */
{ 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
{ 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
{ 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
@@ -3063,47 +3416,22 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV
},
};
-#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
-/* This function just controls
- * the monotonic memory budget increase of ZSTD_defaultCParameters[0].
- * Run once, on first ZSTD_getCParams() usage, if ZSTD_DEBUG is enabled
- */
-MEM_STATIC void ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget(void)
-{
- int level;
- for (level=1; level<ZSTD_maxCLevel(); level++) {
- ZSTD_compressionParameters const c1 = ZSTD_defaultCParameters[0][level];
- ZSTD_compressionParameters const c2 = ZSTD_defaultCParameters[0][level+1];
- assert(c1.windowLog <= c2.windowLog);
-# define ZSTD_TABLECOST(h,c) ((1<<(h)) + (1<<(c)))
- assert(ZSTD_TABLECOST(c1.hashLog, c1.chainLog) <= ZSTD_TABLECOST(c2.hashLog, c2.chainLog));
- }
-}
-#endif
-
/*! ZSTD_getCParams() :
-* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
+* @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
* Size values are optional, provide 0 if not known or unused */
ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
{
size_t const addedSize = srcSizeHint ? 0 : 500;
U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : (U64)-1;
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
-
-#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
- static int g_monotonicTest = 1;
- if (g_monotonicTest) {
- ZSTD_check_compressionLevel_monotonicIncrease_memoryBudget();
- g_monotonicTest=0;
- }
-#endif
-
- DEBUGLOG(4, "ZSTD_getCParams: cLevel=%i, srcSize=%u, dictSize=%u => table %u",
- compressionLevel, (U32)srcSizeHint, (U32)dictSize, tableID);
- if (compressionLevel <= 0) compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default; no negative compressionLevel yet */
- if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
- { ZSTD_compressionParameters const cp = ZSTD_defaultCParameters[tableID][compressionLevel];
- return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }
+ int row = compressionLevel;
+ DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel);
+ if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
+ if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */
+ if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
+ { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+ if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */
+ return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); }
}
@@ -3113,6 +3441,7 @@ ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long l
ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
ZSTD_parameters params;
ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
+ DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
memset(&params, 0, sizeof(params));
params.cParams = cParams;
params.fParams.contentSizeFlag = 1;
diff --git a/thirdparty/zstd/compress/zstd_compress_internal.h b/thirdparty/zstd/compress/zstd_compress_internal.h
index f104fe981e..81f12ca6df 100644
--- a/thirdparty/zstd/compress/zstd_compress_internal.h
+++ b/thirdparty/zstd/compress/zstd_compress_internal.h
@@ -30,8 +30,14 @@ extern "C" {
/*-*************************************
* Constants
***************************************/
-static const U32 g_searchStrength = 8;
-#define HASH_READ_SIZE 8
+#define kSearchStrength 8
+#define HASH_READ_SIZE 8
+#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
+ It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
+ It's not a big deal though : candidate will just be sorted again.
+ Additionnally, candidate position 1 will be lost.
+ But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
/*-*************************************
@@ -43,7 +49,7 @@ typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
typedef struct ZSTD_prefixDict_s {
const void* dict;
size_t dictSize;
- ZSTD_dictMode_e dictMode;
+ ZSTD_dictContentType_e dictContentType;
} ZSTD_prefixDict;
typedef struct {
@@ -51,7 +57,6 @@ typedef struct {
FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
- U32 workspace[HUF_WORKSPACE_SIZE_U32];
HUF_repeat hufCTable_repeatMode;
FSE_repeat offcode_repeatMode;
FSE_repeat matchlength_repeatMode;
@@ -94,11 +99,43 @@ typedef struct {
} optState_t;
typedef struct {
+ ZSTD_entropyCTables_t entropy;
+ U32 rep[ZSTD_REP_NUM];
+} ZSTD_compressedBlockState_t;
+
+typedef struct {
+ BYTE const* nextSrc; /* next block here to continue on current prefix */
+ BYTE const* base; /* All regular indexes relative to this position */
+ BYTE const* dictBase; /* extDict indexes relative to this position */
+ U32 dictLimit; /* below that point, need extDict */
+ U32 lowLimit; /* below that point, no more data */
+} ZSTD_window_t;
+
+typedef struct {
+ ZSTD_window_t window; /* State for window round buffer management */
+ U32 loadedDictEnd; /* index of end of dictionary */
+ U32 nextToUpdate; /* index from which to continue table update */
+ U32 nextToUpdate3; /* index from which to continue table update */
+ U32 hashLog3; /* dispatch table : larger == faster, more memory */
+ U32* hashTable;
+ U32* hashTable3;
+ U32* chainTable;
+ optState_t opt; /* optimal parser state */
+} ZSTD_matchState_t;
+
+typedef struct {
+ ZSTD_compressedBlockState_t* prevCBlock;
+ ZSTD_compressedBlockState_t* nextCBlock;
+ ZSTD_matchState_t matchState;
+} ZSTD_blockState_t;
+
+typedef struct {
U32 offset;
U32 checksum;
} ldmEntry_t;
typedef struct {
+ ZSTD_window_t window; /* State for the window round buffer management */
ldmEntry_t* hashTable;
BYTE* bucketOffsets; /* Next position in bucket to insert entry */
U64 hashPower; /* Used to compute the rolling hash.
@@ -111,60 +148,68 @@ typedef struct {
U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
U32 minMatchLength; /* Minimum match length */
U32 hashEveryLog; /* Log number of entries to skip */
+ U32 windowLog; /* Window log for the LDM */
} ldmParams_t;
+typedef struct {
+ U32 offset;
+ U32 litLength;
+ U32 matchLength;
+} rawSeq;
+
+typedef struct {
+ rawSeq* seq; /* The start of the sequences */
+ size_t pos; /* The position where reading stopped. <= size. */
+ size_t size; /* The number of sequences. <= capacity. */
+ size_t capacity; /* The capacity of the `seq` pointer */
+} rawSeqStore_t;
+
struct ZSTD_CCtx_params_s {
ZSTD_format_e format;
ZSTD_compressionParameters cParams;
ZSTD_frameParameters fParams;
int compressionLevel;
- U32 forceWindow; /* force back-references to respect limit of
+ int disableLiteralCompression;
+ int forceWindow; /* force back-references to respect limit of
* 1<<wLog, even for dictionary */
/* Multithreading: used to pass parameters to mtctx */
- U32 nbThreads;
+ unsigned nbWorkers;
unsigned jobSize;
unsigned overlapSizeLog;
/* Long distance matching parameters */
ldmParams_t ldmParams;
- /* For use with createCCtxParams() and freeCCtxParams() only */
+ /* Internal use, for createCCtxParams() and freeCCtxParams() only */
ZSTD_customMem customMem;
-
}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
struct ZSTD_CCtx_s {
- const BYTE* nextSrc; /* next block here to continue on current prefix */
- const BYTE* base; /* All regular indexes relative to this position */
- const BYTE* dictBase; /* extDict indexes relative to this position */
- U32 dictLimit; /* below that point, need extDict */
- U32 lowLimit; /* below that point, no more data */
- U32 nextToUpdate; /* index from which to continue dictionary update */
- U32 nextToUpdate3; /* index from which to continue dictionary update */
- U32 hashLog3; /* dispatch table : larger == faster, more memory */
- U32 loadedDictEnd; /* index of end of dictionary */
ZSTD_compressionStage_e stage;
- U32 dictID;
+ int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
+ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
ZSTD_CCtx_params requestedParams;
ZSTD_CCtx_params appliedParams;
+ U32 dictID;
void* workSpace;
size_t workSpaceSize;
size_t blockSize;
- U64 pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
- U64 consumedSrcSize;
+ unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
+ unsigned long long consumedSrcSize;
+ unsigned long long producedCSize;
XXH64_state_t xxhState;
ZSTD_customMem customMem;
size_t staticSize;
- seqStore_t seqStore; /* sequences storage ptrs */
- optState_t optState;
- ldmState_t ldmState; /* long distance matching state */
- U32* hashTable;
- U32* hashTable3;
- U32* chainTable;
- ZSTD_entropyCTables_t* entropy;
+ seqStore_t seqStore; /* sequences storage ptrs */
+ ldmState_t ldmState; /* long distance matching state */
+ rawSeq* ldmSequences; /* Storage for the ldm output sequences */
+ size_t maxNbLdmSequences;
+ rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
+ ZSTD_blockState_t blockState;
+ U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
/* streaming */
char* inBuff;
@@ -191,6 +236,12 @@ struct ZSTD_CCtx_s {
};
+typedef size_t (*ZSTD_blockCompressor) (
+ ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
+
+
MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
{
static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
@@ -359,10 +410,12 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* co
}
/** ZSTD_count_2segments() :
-* can count match length with `ip` & `match` in 2 different segments.
-* convention : on reaching mEnd, match count continue starting from iStart
-*/
-MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
+ * can count match length with `ip` & `match` in 2 different segments.
+ * convention : on reaching mEnd, match count continue starting from iStart
+ */
+MEM_STATIC size_t
+ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
+ const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
{
const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
size_t const matchLength = ZSTD_count(ip, match, vEnd);
@@ -372,8 +425,8 @@ MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const
/*-*************************************
-* Hashes
-***************************************/
+ * Hashes
+ ***************************************/
static const U32 prime3bytes = 506832829U;
static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
@@ -411,6 +464,171 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
}
}
+/*-*************************************
+* Round buffer management
+***************************************/
+/* Max current allowed */
+#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
+/* Maximum chunk size before overflow correction needs to be called again */
+#define ZSTD_CHUNKSIZE_MAX \
+ ( ((U32)-1) /* Maximum ending current index */ \
+ - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */
+
+/**
+ * ZSTD_window_clear():
+ * Clears the window containing the history by simply setting it to empty.
+ */
+MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
+{
+ size_t const endT = (size_t)(window->nextSrc - window->base);
+ U32 const end = (U32)endT;
+
+ window->lowLimit = end;
+ window->dictLimit = end;
+}
+
+/**
+ * ZSTD_window_hasExtDict():
+ * Returns non-zero if the window has a non-empty extDict.
+ */
+MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
+{
+ return window.lowLimit < window.dictLimit;
+}
+
+/**
+ * ZSTD_window_needOverflowCorrection():
+ * Returns non-zero if the indices are getting too large and need overflow
+ * protection.
+ */
+MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+ void const* srcEnd)
+{
+ U32 const current = (U32)((BYTE const*)srcEnd - window.base);
+ return current > ZSTD_CURRENT_MAX;
+}
+
+/**
+ * ZSTD_window_correctOverflow():
+ * Reduces the indices to protect from index overflow.
+ * Returns the correction made to the indices, which must be applied to every
+ * stored index.
+ *
+ * The least significant cycleLog bits of the indices must remain the same,
+ * which may be 0. Every index up to maxDist in the past must be valid.
+ * NOTE: (maxDist & cycleMask) must be zero.
+ */
+MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+ U32 maxDist, void const* src)
+{
+ /* preemptive overflow correction:
+ * 1. correction is large enough:
+ * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
+ * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
+ *
+ * current - newCurrent
+ * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
+ * > (3<<29) - (1<<chainLog)
+ * > (3<<29) - (1<<30) (NOTE: chainLog <= 30)
+ * > 1<<29
+ *
+ * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
+ * After correction, current is less than (1<<chainLog + 1<<windowLog).
+ * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
+ * In 32-bit mode we are safe, because (chainLog <= 29), so
+ * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
+ * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
+ * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
+ */
+ U32 const cycleMask = (1U << cycleLog) - 1;
+ U32 const current = (U32)((BYTE const*)src - window->base);
+ U32 const newCurrent = (current & cycleMask) + maxDist;
+ U32 const correction = current - newCurrent;
+ assert((maxDist & cycleMask) == 0);
+ assert(current > newCurrent);
+ /* Loose bound, should be around 1<<29 (see above) */
+ assert(correction > 1<<28);
+
+ window->base += correction;
+ window->dictBase += correction;
+ window->lowLimit -= correction;
+ window->dictLimit -= correction;
+
+ DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
+ window->lowLimit);
+ return correction;
+}
+
+/**
+ * ZSTD_window_enforceMaxDist():
+ * Updates lowLimit so that:
+ * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
+ * This allows a simple check that index >= lowLimit to see if index is valid.
+ * This must be called before a block compression call, with srcEnd as the block
+ * source end.
+ * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
+ * This is because dictionaries are allowed to be referenced as long as the last
+ * byte of the dictionary is in the window, but once they are out of range,
+ * they cannot be referenced. If loadedDictEndPtr is NULL, we use
+ * loadedDictEnd == 0.
+ */
+MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
+ void const* srcEnd, U32 maxDist,
+ U32* loadedDictEndPtr)
+{
+ U32 const current = (U32)((BYTE const*)srcEnd - window->base);
+ U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
+ if (current > maxDist + loadedDictEnd) {
+ U32 const newLowLimit = current - maxDist;
+ if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
+ if (window->dictLimit < window->lowLimit) {
+ DEBUGLOG(5, "Update dictLimit from %u to %u", window->dictLimit,
+ window->lowLimit);
+ window->dictLimit = window->lowLimit;
+ }
+ if (loadedDictEndPtr)
+ *loadedDictEndPtr = 0;
+ }
+}
+
+/**
+ * ZSTD_window_update():
+ * Updates the window by appending [src, src + srcSize) to the window.
+ * If it is not contiguous, the current prefix becomes the extDict, and we
+ * forget about the extDict. Handles overlap of the prefix and extDict.
+ * Returns non-zero if the segment is contiguous.
+ */
+MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+ void const* src, size_t srcSize)
+{
+ BYTE const* const ip = (BYTE const*)src;
+ U32 contiguous = 1;
+ /* Check if blocks follow each other */
+ if (src != window->nextSrc) {
+ /* not contiguous */
+ size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
+ DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u",
+ window->dictLimit);
+ window->lowLimit = window->dictLimit;
+ assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
+ window->dictLimit = (U32)distanceFromBase;
+ window->dictBase = window->base;
+ window->base = ip - distanceFromBase;
+ // ms->nextToUpdate = window->dictLimit;
+ if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
+ contiguous = 0;
+ }
+ window->nextSrc = ip + srcSize;
+ /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
+ if ( (ip+srcSize > window->dictBase + window->lowLimit)
+ & (ip < window->dictBase + window->dictLimit)) {
+ ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
+ U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
+ window->lowLimit = lowLimitMax;
+ }
+ return contiguous;
+}
+
#if defined (__cplusplus)
}
#endif
@@ -421,6 +639,13 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
* These prototypes shall only be called from within lib/compress
* ============================================================== */
+/* ZSTD_getCParamsFromCCtxParams() :
+ * cParams are built depending on compressionLevel, src size hints,
+ * LDM and manually set compression parameters.
+ */
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+ const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
+
/*! ZSTD_initCStream_internal() :
* Private use only. Init streaming operation.
* expects params to be valid.
@@ -446,7 +671,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
* Private use only. To be called from zstdmt_compress.c. */
size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
const void* dict, size_t dictSize,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params,
unsigned long long pledgedSrcSize);
@@ -459,4 +684,26 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
const void* dict,size_t dictSize,
ZSTD_CCtx_params params);
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
+
+
+/* ZSTD_referenceExternalSequences() :
+ * Must be called before starting a compression operation.
+ * seqs must parse a prefix of the source.
+ * This cannot be used when long range matching is enabled.
+ * Zstd will use these sequences, and pass the literals to a secondary block
+ * compressor.
+ * @return : An error code on failure.
+ * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
+ * access and data corruption.
+ */
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+
+
#endif /* ZSTD_COMPRESS_H */
diff --git a/thirdparty/zstd/compress/zstd_double_fast.c b/thirdparty/zstd/compress/zstd_double_fast.c
index fee5127c35..86e6b39621 100644
--- a/thirdparty/zstd/compress/zstd_double_fast.c
+++ b/thirdparty/zstd/compress/zstd_double_fast.c
@@ -12,44 +12,58 @@
#include "zstd_double_fast.h"
-void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls)
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+ ZSTD_compressionParameters const* cParams,
+ void const* end)
{
- U32* const hashLarge = cctx->hashTable;
- U32 const hBitsL = cctx->appliedParams.cParams.hashLog;
- U32* const hashSmall = cctx->chainTable;
- U32 const hBitsS = cctx->appliedParams.cParams.chainLog;
- const BYTE* const base = cctx->base;
- const BYTE* ip = base + cctx->nextToUpdate;
+ U32* const hashLarge = ms->hashTable;
+ U32 const hBitsL = cParams->hashLog;
+ U32 const mls = cParams->searchLength;
+ U32* const hashSmall = ms->chainTable;
+ U32 const hBitsS = cParams->chainLog;
+ const BYTE* const base = ms->window.base;
+ const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
- const size_t fastHashFillStep = 3;
-
- while(ip <= iend) {
- hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
- hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
- ip += fastHashFillStep;
+ const U32 fastHashFillStep = 3;
+
+ /* Always insert every fastHashFillStep position into the hash tables.
+ * Insert the other positions into the large hash table if their entry
+ * is empty.
+ */
+ for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
+ U32 const current = (U32)(ip - base);
+ U32 i;
+ for (i = 0; i < fastHashFillStep; ++i) {
+ size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
+ size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
+ if (i == 0)
+ hashSmall[smHash] = current + i;
+ if (i == 0 || hashLarge[lgHash] == 0)
+ hashLarge[lgHash] = current + i;
+ }
}
}
FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
- const void* src, size_t srcSize,
- const U32 mls)
+size_t ZSTD_compressBlock_doubleFast_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
+ U32 const mls /* template */)
{
- U32* const hashLong = cctx->hashTable;
- const U32 hBitsL = cctx->appliedParams.cParams.hashLog;
- U32* const hashSmall = cctx->chainTable;
- const U32 hBitsS = cctx->appliedParams.cParams.chainLog;
- seqStore_t* seqStorePtr = &(cctx->seqStore);
- const BYTE* const base = cctx->base;
+ U32* const hashLong = ms->hashTable;
+ const U32 hBitsL = cParams->hashLog;
+ U32* const hashSmall = ms->chainTable;
+ const U32 hBitsS = cParams->chainLog;
+ const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
- const U32 lowestIndex = cctx->dictLimit;
+ const U32 lowestIndex = ms->window.dictLimit;
const BYTE* const lowest = base + lowestIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
- U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
+ U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
/* init */
@@ -76,7 +90,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
/* favor repcode */
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
U32 offset;
if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
@@ -99,14 +113,14 @@ size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
}
} else {
- ip += ((ip-anchor) >> g_searchStrength) + 1;
+ ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
}
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
}
/* match found */
@@ -129,61 +143,63 @@ size_t ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
/* save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
- seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
+ rep[0] = offset_1 ? offset_1 : offsetSaved;
+ rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_doubleFast(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- const U32 mls = ctx->appliedParams.cParams.searchLength;
+ const U32 mls = cParams->searchLength;
switch(mls)
{
default: /* includes case 3 */
case 4 :
- return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4);
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4);
case 5 :
- return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5);
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5);
case 6 :
- return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6);
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6);
case 7 :
- return ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7);
+ return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7);
}
}
-static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize,
- const U32 mls)
+static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
+ U32 const mls /* template */)
{
- U32* const hashLong = ctx->hashTable;
- U32 const hBitsL = ctx->appliedParams.cParams.hashLog;
- U32* const hashSmall = ctx->chainTable;
- U32 const hBitsS = ctx->appliedParams.cParams.chainLog;
- seqStore_t* seqStorePtr = &(ctx->seqStore);
- const BYTE* const base = ctx->base;
- const BYTE* const dictBase = ctx->dictBase;
+ U32* const hashLong = ms->hashTable;
+ U32 const hBitsL = cParams->hashLog;
+ U32* const hashSmall = ms->chainTable;
+ U32 const hBitsS = cParams->chainLog;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
- const U32 lowestIndex = ctx->lowLimit;
+ const U32 lowestIndex = ms->window.lowLimit;
const BYTE* const dictStart = dictBase + lowestIndex;
- const U32 dictLimit = ctx->dictLimit;
+ const U32 dictLimit = ms->window.dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
+ U32 offset_1=rep[0], offset_2=rep[1];
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
@@ -209,7 +225,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
ip++;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
@@ -220,7 +236,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -245,10 +261,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
}
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} else {
- ip += ((ip-anchor) >> g_searchStrength) + 1;
+ ip += ((ip-anchor) >> kSearchStrength) + 1;
continue;
} }
@@ -272,7 +288,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
ip += repLength2;
@@ -283,27 +299,29 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
} } }
/* save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;
+ rep[0] = offset_1;
+ rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_doubleFast_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- U32 const mls = ctx->appliedParams.cParams.searchLength;
+ U32 const mls = cParams->searchLength;
switch(mls)
{
default: /* includes case 3 */
case 4 :
- return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4);
+ return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 4);
case 5 :
- return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5);
+ return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 5);
case 6 :
- return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6);
+ return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 6);
case 7 :
- return ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7);
+ return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 7);
}
}
diff --git a/thirdparty/zstd/compress/zstd_double_fast.h b/thirdparty/zstd/compress/zstd_double_fast.h
index 75e0415809..6d80b2774c 100644
--- a/thirdparty/zstd/compress/zstd_double_fast.h
+++ b/thirdparty/zstd/compress/zstd_double_fast.h
@@ -16,11 +16,18 @@ extern "C" {
#endif
#include "mem.h" /* U32 */
-#include "zstd.h" /* ZSTD_CCtx, size_t */
+#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+ ZSTD_compressionParameters const* cParams,
+ void const* end);
+size_t ZSTD_compressBlock_doubleFast(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
-void ZSTD_fillDoubleHashTable(ZSTD_CCtx* cctx, const void* end, const U32 mls);
-size_t ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstd_fast.c b/thirdparty/zstd/compress/zstd_fast.c
index 7b56c3d6ad..df4d28b340 100644
--- a/thirdparty/zstd/compress/zstd_fast.c
+++ b/thirdparty/zstd/compress/zstd_fast.c
@@ -12,39 +12,48 @@
#include "zstd_fast.h"
-void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+ ZSTD_compressionParameters const* cParams,
+ void const* end)
{
- U32* const hashTable = zc->hashTable;
- U32 const hBits = zc->appliedParams.cParams.hashLog;
- const BYTE* const base = zc->base;
- const BYTE* ip = base + zc->nextToUpdate;
+ U32* const hashTable = ms->hashTable;
+ U32 const hBits = cParams->hashLog;
+ U32 const mls = cParams->searchLength;
+ const BYTE* const base = ms->window.base;
+ const BYTE* ip = base + ms->nextToUpdate;
const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
- const size_t fastHashFillStep = 3;
+ const U32 fastHashFillStep = 3;
- while(ip <= iend) {
- hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
- ip += fastHashFillStep;
+ /* Always insert every fastHashFillStep position into the hash table.
+ * Insert the other positions if their hash entry is empty.
+ */
+ for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
+ U32 const current = (U32)(ip - base);
+ U32 i;
+ for (i = 0; i < fastHashFillStep; ++i) {
+ size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
+ if (i == 0 || hashTable[hash] == 0)
+ hashTable[hash] = current + i;
+ }
}
}
-
FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
- const void* src, size_t srcSize,
- const U32 mls)
+size_t ZSTD_compressBlock_fast_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize,
+ U32 const hlog, U32 const stepSize, U32 const mls)
{
- U32* const hashTable = cctx->hashTable;
- U32 const hBits = cctx->appliedParams.cParams.hashLog;
- seqStore_t* seqStorePtr = &(cctx->seqStore);
- const BYTE* const base = cctx->base;
+ U32* const hashTable = ms->hashTable;
+ const BYTE* const base = ms->window.base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
- const U32 lowestIndex = cctx->dictLimit;
+ const U32 lowestIndex = ms->window.dictLimit;
const BYTE* const lowest = base + lowestIndex;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - HASH_READ_SIZE;
- U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
+ U32 offset_1=rep[0], offset_2=rep[1];
U32 offsetSaved = 0;
/* init */
@@ -57,7 +66,7 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
/* Main Search Loop */
while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
size_t mLength;
- size_t const h = ZSTD_hashPtr(ip, hBits, mls);
+ size_t const h = ZSTD_hashPtr(ip, hlog, mls);
U32 const current = (U32)(ip-base);
U32 const matchIndex = hashTable[h];
const BYTE* match = base + matchIndex;
@@ -66,21 +75,21 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
ip++;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
- U32 offset;
- if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
- ip += ((ip-anchor) >> g_searchStrength) + 1;
+ if ( (matchIndex <= lowestIndex)
+ || (MEM_read32(match) != MEM_read32(ip)) ) {
+ assert(stepSize >= 1);
+ ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
mLength = ZSTD_count(ip+4, match+4, iend) + 4;
- offset = (U32)(ip-match);
- while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
- offset_2 = offset_1;
- offset_1 = offset;
-
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
- }
+ { U32 const offset = (U32)(ip-match);
+ while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+ offset_2 = offset_1;
+ offset_1 = offset;
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ } }
/* match found */
ip += mLength;
@@ -88,8 +97,8 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
if (ip <= ilimit) {
/* Fill Table */
- hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */
- hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
+ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
+ hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
/* check immediate repcode */
while ( (ip <= ilimit)
&& ( (offset_2>0)
@@ -97,65 +106,67 @@ size_t ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
/* store sequence */
size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
{ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
- hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
+ hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
ip += rLength;
anchor = ip;
continue; /* faster when present ... (?) */
} } }
/* save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
- seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
+ rep[0] = offset_1 ? offset_1 : offsetSaved;
+ rep[1] = offset_2 ? offset_2 : offsetSaved;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_fast(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- const U32 mls = ctx->appliedParams.cParams.searchLength;
+ U32 const hlog = cParams->hashLog;
+ U32 const mls = cParams->searchLength;
+ U32 const stepSize = cParams->targetLength;
switch(mls)
{
default: /* includes case 3 */
case 4 :
- return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4);
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
case 5 :
- return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5);
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
case 6 :
- return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6);
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
case 7 :
- return ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7);
+ return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
}
}
-static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize,
- const U32 mls)
+static size_t ZSTD_compressBlock_fast_extDict_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ void const* src, size_t srcSize,
+ U32 const hlog, U32 const stepSize, U32 const mls)
{
- U32* hashTable = ctx->hashTable;
- const U32 hBits = ctx->appliedParams.cParams.hashLog;
- seqStore_t* seqStorePtr = &(ctx->seqStore);
- const BYTE* const base = ctx->base;
- const BYTE* const dictBase = ctx->dictBase;
+ U32* hashTable = ms->hashTable;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
- const U32 lowestIndex = ctx->lowLimit;
+ const U32 lowestIndex = ms->window.lowLimit;
const BYTE* const dictStart = dictBase + lowestIndex;
- const U32 dictLimit = ctx->dictLimit;
+ const U32 dictLimit = ms->window.dictLimit;
const BYTE* const lowPrefixPtr = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- U32 offset_1=seqStorePtr->rep[0], offset_2=seqStorePtr->rep[1];
+ U32 offset_1=rep[0], offset_2=rep[1];
/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
- const size_t h = ZSTD_hashPtr(ip, hBits, mls);
+ const size_t h = ZSTD_hashPtr(ip, hlog, mls);
const U32 matchIndex = hashTable[h];
const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
const BYTE* match = matchBase + matchIndex;
@@ -171,11 +182,12 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
ip++;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
} else {
if ( (matchIndex < lowestIndex) ||
(MEM_read32(match) != MEM_read32(ip)) ) {
- ip += ((ip-anchor) >> g_searchStrength) + 1;
+ assert(stepSize >= 1);
+ ip += ((ip-anchor) >> kSearchStrength) + stepSize;
continue;
}
{ const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
@@ -186,7 +198,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
offset = current - matchIndex;
offset_2 = offset_1;
offset_1 = offset;
- ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
} }
/* found a match : store it */
@@ -195,8 +207,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
if (ip <= ilimit) {
/* Fill Table */
- hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
- hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
+ hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
+ hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
/* check immediate repcode */
while (ip <= ilimit) {
U32 const current2 = (U32)(ip-base);
@@ -207,8 +219,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
- hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
+ hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
ip += repLength2;
anchor = ip;
continue;
@@ -217,27 +229,31 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
} } }
/* save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;
+ rep[0] = offset_1;
+ rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_fast_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- U32 const mls = ctx->appliedParams.cParams.searchLength;
+ U32 const hlog = cParams->hashLog;
+ U32 const mls = cParams->searchLength;
+ U32 const stepSize = cParams->targetLength;
switch(mls)
{
default: /* includes case 3 */
case 4 :
- return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4);
+ return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
case 5 :
- return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5);
+ return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
case 6 :
- return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6);
+ return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
case 7 :
- return ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7);
+ return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
}
}
diff --git a/thirdparty/zstd/compress/zstd_fast.h b/thirdparty/zstd/compress/zstd_fast.h
index d8b7771954..f0438ad5b4 100644
--- a/thirdparty/zstd/compress/zstd_fast.h
+++ b/thirdparty/zstd/compress/zstd_fast.h
@@ -16,13 +16,17 @@ extern "C" {
#endif
#include "mem.h" /* U32 */
-#include "zstd.h" /* ZSTD_CCtx, size_t */
+#include "zstd_compress_internal.h"
-void ZSTD_fillHashTable(ZSTD_CCtx* zc, const void* end, const U32 mls);
-size_t ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize);
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+ ZSTD_compressionParameters const* cParams,
+ void const* end);
+size_t ZSTD_compressBlock_fast(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstd_lazy.c b/thirdparty/zstd/compress/zstd_lazy.c
index 6d4804961d..9f158123f0 100644
--- a/thirdparty/zstd/compress/zstd_lazy.c
+++ b/thirdparty/zstd/compress/zstd_lazy.c
@@ -15,76 +15,90 @@
/*-*************************************
* Binary Tree search
***************************************/
-/** ZSTD_insertBt1() : add one or multiple positions to tree.
- * ip : assumed <= iend-8 .
- * @return : nb of positions added */
-static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
- const BYTE* const ip, const BYTE* const iend,
- U32 nbCompares, U32 const mls, U32 const extDict)
+
+void ZSTD_updateDUBT(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, const BYTE* iend,
+ U32 mls)
{
- U32* const hashTable = zc->hashTable;
- U32 const hashLog = zc->appliedParams.cParams.hashLog;
- size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
- U32* const bt = zc->chainTable;
- U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
+ U32* const hashTable = ms->hashTable;
+ U32 const hashLog = cParams->hashLog;
+
+ U32* const bt = ms->chainTable;
+ U32 const btLog = cParams->chainLog - 1;
+ U32 const btMask = (1 << btLog) - 1;
+
+ const BYTE* const base = ms->window.base;
+ U32 const target = (U32)(ip - base);
+ U32 idx = ms->nextToUpdate;
+
+ if (idx != target)
+ DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
+ idx, target, ms->window.dictLimit);
+ assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
+ (void)iend;
+
+ assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
+ for ( ; idx < target ; idx++) {
+ size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
+ U32 const matchIndex = hashTable[h];
+
+ U32* const nextCandidatePtr = bt + 2*(idx&btMask);
+ U32* const sortMarkPtr = nextCandidatePtr + 1;
+
+ DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
+ hashTable[h] = idx; /* Update Hash Table */
+ *nextCandidatePtr = matchIndex; /* update BT like a chain */
+ *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
+ }
+ ms->nextToUpdate = target;
+}
+
+
+/** ZSTD_insertDUBT1() :
+ * sort one already inserted but unsorted position
+ * assumption : current >= btlow == (current - btmask)
+ * doesn't fail */
+static void ZSTD_insertDUBT1(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ U32 current, const BYTE* inputEnd,
+ U32 nbCompares, U32 btLow, int extDict)
+{
+ U32* const bt = ms->chainTable;
+ U32 const btLog = cParams->chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
- U32 matchIndex = hashTable[h];
size_t commonLengthSmaller=0, commonLengthLarger=0;
- const BYTE* const base = zc->base;
- const BYTE* const dictBase = zc->dictBase;
- const U32 dictLimit = zc->dictLimit;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
+ const U32 dictLimit = ms->window.dictLimit;
+ const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current;
+ const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* match;
- const U32 current = (U32)(ip-base);
- const U32 btLow = btMask >= current ? 0 : current - btMask;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = smallerPtr + 1;
+ U32 matchIndex = *smallerPtr;
U32 dummy32; /* to be nullified at the end */
- U32 const windowLow = zc->lowLimit;
- U32 matchEndIdx = current+8+1;
- size_t bestLength = 8;
-#ifdef ZSTD_C_PREDICT
- U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
- U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
- predictedSmall += (predictedSmall>0);
- predictedLarge += (predictedLarge>0);
-#endif /* ZSTD_C_PREDICT */
-
- DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
+ U32 const windowLow = ms->window.lowLimit;
- assert(ip <= iend-8); /* required for h calculation */
- hashTable[h] = current; /* Update Hash Table */
+ DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
+ current, dictLimit, windowLow);
+ assert(current >= btLow);
+ assert(ip < iend); /* condition for ZSTD_count */
while (nbCompares-- && (matchIndex > windowLow)) {
U32* const nextPtr = bt + 2*(matchIndex & btMask);
size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
assert(matchIndex < current);
-#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
- const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
- if (matchIndex == predictedSmall) {
- /* no need to check length, result known */
- *smallerPtr = matchIndex;
- if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
- smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
- matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
- predictedSmall = predictPtr[1] + (predictPtr[1]>0);
- continue;
- }
- if (matchIndex == predictedLarge) {
- *largerPtr = matchIndex;
- if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
- largerPtr = nextPtr;
- matchIndex = nextPtr[0];
- predictedLarge = predictPtr[0] + (predictPtr[0]>0);
- continue;
- }
-#endif
-
- if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
- assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
- match = base + matchIndex;
+ if ( (!extDict)
+ || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
+ || (current < dictLimit) /* both in extDict */) {
+ const BYTE* const mBase = !extDict || ((matchIndex+matchLength) >= dictLimit) ? base : dictBase;
+ assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
+ || (current < dictLimit) );
+ match = mBase + matchIndex;
matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
} else {
match = dictBase + matchIndex;
@@ -93,11 +107,8 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
}
- if (matchLength > bestLength) {
- bestLength = matchLength;
- if (matchLength > matchEndIdx - matchIndex)
- matchEndIdx = matchIndex + (U32)matchLength;
- }
+ DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
+ current, matchIndex, (U32)matchLength);
if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
@@ -108,6 +119,8 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
*smallerPtr = matchIndex; /* update smaller idx */
commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
+ DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
+ matchIndex, btLow, nextPtr[1]);
smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
} else {
@@ -115,184 +128,205 @@ static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,
*largerPtr = matchIndex;
commonLengthLarger = matchLength;
if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
+ DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
+ matchIndex, btLow, nextPtr[0]);
largerPtr = nextPtr;
matchIndex = nextPtr[0];
} }
*smallerPtr = *largerPtr = 0;
- if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
- assert(matchEndIdx > current + 8);
- return matchEndIdx - (current + 8);
}
-FORCE_INLINE_TEMPLATE
-void ZSTD_updateTree_internal(ZSTD_CCtx* zc,
- const BYTE* const ip, const BYTE* const iend,
- const U32 nbCompares, const U32 mls, const U32 extDict)
-{
- const BYTE* const base = zc->base;
- U32 const target = (U32)(ip - base);
- U32 idx = zc->nextToUpdate;
- DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
- idx, target, extDict);
-
- while(idx < target)
- idx += ZSTD_insertBt1(zc, base+idx, iend, nbCompares, mls, extDict);
- zc->nextToUpdate = target;
-}
-void ZSTD_updateTree(ZSTD_CCtx* zc,
- const BYTE* const ip, const BYTE* const iend,
- const U32 nbCompares, const U32 mls)
+static size_t ZSTD_DUBT_findBestMatch (
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* const ip, const BYTE* const iend,
+ size_t* offsetPtr,
+ U32 const mls,
+ U32 const extDict)
{
- ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 0 /*extDict*/);
-}
-
-void ZSTD_updateTree_extDict(ZSTD_CCtx* zc,
- const BYTE* const ip, const BYTE* const iend,
- const U32 nbCompares, const U32 mls)
-{
- ZSTD_updateTree_internal(zc, ip, iend, nbCompares, mls, 1 /*extDict*/);
-}
+ U32* const hashTable = ms->hashTable;
+ U32 const hashLog = cParams->hashLog;
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
+ U32 matchIndex = hashTable[h];
+ const BYTE* const base = ms->window.base;
+ U32 const current = (U32)(ip-base);
+ U32 const windowLow = ms->window.lowLimit;
-static size_t ZSTD_insertBtAndFindBestMatch (
- ZSTD_CCtx* zc,
- const BYTE* const ip, const BYTE* const iend,
- size_t* offsetPtr,
- U32 nbCompares, const U32 mls,
- U32 extDict)
-{
- U32* const hashTable = zc->hashTable;
- U32 const hashLog = zc->appliedParams.cParams.hashLog;
- size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
- U32* const bt = zc->chainTable;
- U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
+ U32* const bt = ms->chainTable;
+ U32 const btLog = cParams->chainLog - 1;
U32 const btMask = (1 << btLog) - 1;
- U32 matchIndex = hashTable[h];
- size_t commonLengthSmaller=0, commonLengthLarger=0;
- const BYTE* const base = zc->base;
- const BYTE* const dictBase = zc->dictBase;
- const U32 dictLimit = zc->dictLimit;
- const BYTE* const dictEnd = dictBase + dictLimit;
- const BYTE* const prefixStart = base + dictLimit;
- const U32 current = (U32)(ip-base);
- const U32 btLow = btMask >= current ? 0 : current - btMask;
- const U32 windowLow = zc->lowLimit;
- U32* smallerPtr = bt + 2*(current&btMask);
- U32* largerPtr = bt + 2*(current&btMask) + 1;
- U32 matchEndIdx = current+8+1;
- U32 dummy32; /* to be nullified at the end */
- size_t bestLength = 0;
+ U32 const btLow = (btMask >= current) ? 0 : current - btMask;
+ U32 const unsortLimit = MAX(btLow, windowLow);
+
+ U32* nextCandidate = bt + 2*(matchIndex&btMask);
+ U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+ U32 nbCompares = 1U << cParams->searchLog;
+ U32 nbCandidates = nbCompares;
+ U32 previousCandidate = 0;
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current);
assert(ip <= iend-8); /* required for h calculation */
- hashTable[h] = current; /* Update Hash Table */
- while (nbCompares-- && (matchIndex > windowLow)) {
- U32* const nextPtr = bt + 2*(matchIndex & btMask);
- size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
- const BYTE* match;
+ /* reach end of unsorted candidates list */
+ while ( (matchIndex > unsortLimit)
+ && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
+ && (nbCandidates > 1) ) {
+ DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
+ matchIndex);
+ *unsortedMark = previousCandidate;
+ previousCandidate = matchIndex;
+ matchIndex = *nextCandidate;
+ nextCandidate = bt + 2*(matchIndex&btMask);
+ unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+ nbCandidates --;
+ }
- if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
- match = base + matchIndex;
- matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
- } else {
- match = dictBase + matchIndex;
- matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
- if (matchIndex+matchLength >= dictLimit)
- match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
- }
+ if ( (matchIndex > unsortLimit)
+ && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
+ matchIndex);
+ *nextCandidate = *unsortedMark = 0; /* nullify next candidate if it's still unsorted (note : simplification, detrimental to compression ratio, beneficial for speed) */
+ }
+
+ /* batch sort stacked candidates */
+ matchIndex = previousCandidate;
+ while (matchIndex) { /* will end on matchIndex == 0 */
+ U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
+ U32 const nextCandidateIdx = *nextCandidateIdxPtr;
+ ZSTD_insertDUBT1(ms, cParams, matchIndex, iend,
+ nbCandidates, unsortLimit, extDict);
+ matchIndex = nextCandidateIdx;
+ nbCandidates++;
+ }
- if (matchLength > bestLength) {
- if (matchLength > matchEndIdx - matchIndex)
- matchEndIdx = matchIndex + (U32)matchLength;
- if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
- bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
- if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
- break; /* drop, to guarantee consistency (miss a little bit of compression) */
+ /* find longest match */
+ { size_t commonLengthSmaller=0, commonLengthLarger=0;
+ const BYTE* const dictBase = ms->window.dictBase;
+ const U32 dictLimit = ms->window.dictLimit;
+ const BYTE* const dictEnd = dictBase + dictLimit;
+ const BYTE* const prefixStart = base + dictLimit;
+ U32* smallerPtr = bt + 2*(current&btMask);
+ U32* largerPtr = bt + 2*(current&btMask) + 1;
+ U32 matchEndIdx = current+8+1;
+ U32 dummy32; /* to be nullified at the end */
+ size_t bestLength = 0;
+
+ matchIndex = hashTable[h];
+ hashTable[h] = current; /* Update Hash Table */
+
+ while (nbCompares-- && (matchIndex > windowLow)) {
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
+ const BYTE* match;
+
+ if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
+ match = base + matchIndex;
+ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+ } else {
+ match = dictBase + matchIndex;
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+ if (matchIndex+matchLength >= dictLimit)
+ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
}
- }
- if (match[matchLength] < ip[matchLength]) {
- /* match is smaller than current */
- *smallerPtr = matchIndex; /* update smaller idx */
- commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
- if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
- smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
- matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
- } else {
- /* match is larger than current */
- *largerPtr = matchIndex;
- commonLengthLarger = matchLength;
- if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
- largerPtr = nextPtr;
- matchIndex = nextPtr[0];
- } }
+ if (matchLength > bestLength) {
+ if (matchLength > matchEndIdx - matchIndex)
+ matchEndIdx = matchIndex + (U32)matchLength;
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
+ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
+ break; /* drop, to guarantee consistency (miss a little bit of compression) */
+ }
+ }
- *smallerPtr = *largerPtr = 0;
+ if (match[matchLength] < ip[matchLength]) {
+ /* match is smaller than current */
+ *smallerPtr = matchIndex; /* update smaller idx */
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
+ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
+ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
+ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
+ } else {
+ /* match is larger than current */
+ *largerPtr = matchIndex;
+ commonLengthLarger = matchLength;
+ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
+ largerPtr = nextPtr;
+ matchIndex = nextPtr[0];
+ } }
+
+ *smallerPtr = *largerPtr = 0;
- assert(matchEndIdx > current+8);
- zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
- return bestLength;
+ assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
+ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
+ if (bestLength >= MINMATCH) {
+ U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+ DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+ current, (U32)bestLength, (U32)*offsetPtr, mIndex);
+ }
+ return bestLength;
+ }
}
/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
static size_t ZSTD_BtFindBestMatch (
- ZSTD_CCtx* zc,
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 mls)
+ const U32 mls /* template */)
{
- if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
- ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
- return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
+ DEBUGLOG(7, "ZSTD_BtFindBestMatch");
+ if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
+ ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
+ return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 0);
}
static size_t ZSTD_BtFindBestMatch_selectMLS (
- ZSTD_CCtx* zc, /* Index table will be updated */
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iLimit,
- size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 matchLengthSearch)
+ size_t* offsetPtr)
{
- switch(matchLengthSearch)
+ switch(cParams->searchLength)
{
default : /* includes case 3 */
- case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
- case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+ case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4);
+ case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5);
case 7 :
- case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+ case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6);
}
}
/** Tree updater, providing best match */
static size_t ZSTD_BtFindBestMatch_extDict (
- ZSTD_CCtx* zc,
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 mls)
+ const U32 mls)
{
- if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
- ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
- return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
+ DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict");
+ if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
+ ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
+ return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 1);
}
static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
- ZSTD_CCtx* zc, /* Index table will be updated */
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iLimit,
- size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 matchLengthSearch)
+ size_t* offsetPtr)
{
- switch(matchLengthSearch)
+ switch(cParams->searchLength)
{
default : /* includes case 3 */
- case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
- case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
+ case 4 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 4);
+ case 5 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 5);
case 7 :
- case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
+ case 6 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 6);
}
}
@@ -305,15 +339,17 @@ static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
/* Update chains up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
-U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
+static U32 ZSTD_insertAndFindFirstIndex_internal(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, U32 const mls)
{
- U32* const hashTable = zc->hashTable;
- const U32 hashLog = zc->appliedParams.cParams.hashLog;
- U32* const chainTable = zc->chainTable;
- const U32 chainMask = (1 << zc->appliedParams.cParams.chainLog) - 1;
- const BYTE* const base = zc->base;
+ U32* const hashTable = ms->hashTable;
+ const U32 hashLog = cParams->hashLog;
+ U32* const chainTable = ms->chainTable;
+ const U32 chainMask = (1 << cParams->chainLog) - 1;
+ const BYTE* const base = ms->window.base;
const U32 target = (U32)(ip - base);
- U32 idx = zc->nextToUpdate;
+ U32 idx = ms->nextToUpdate;
while(idx < target) { /* catch up */
size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
@@ -322,35 +358,42 @@ U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
idx++;
}
- zc->nextToUpdate = target;
+ ms->nextToUpdate = target;
return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
}
+U32 ZSTD_insertAndFindFirstIndex(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip)
+{
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, cParams->searchLength);
+}
+
/* inlining is important to hardwire a hot branch (template emulation) */
FORCE_INLINE_TEMPLATE
size_t ZSTD_HcFindBestMatch_generic (
- ZSTD_CCtx* zc, /* Index table will be updated */
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iLimit,
size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 mls, const U32 extDict)
+ const U32 mls, const U32 extDict)
{
- U32* const chainTable = zc->chainTable;
- const U32 chainSize = (1 << zc->appliedParams.cParams.chainLog);
+ U32* const chainTable = ms->chainTable;
+ const U32 chainSize = (1 << cParams->chainLog);
const U32 chainMask = chainSize-1;
- const BYTE* const base = zc->base;
- const BYTE* const dictBase = zc->dictBase;
- const U32 dictLimit = zc->dictLimit;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
+ const U32 dictLimit = ms->window.dictLimit;
const BYTE* const prefixStart = base + dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
- const U32 lowLimit = zc->lowLimit;
+ const U32 lowLimit = ms->window.lowLimit;
const U32 current = (U32)(ip-base);
const U32 minChain = current > chainSize ? current - chainSize : 0;
- int nbAttempts=maxNbAttempts;
+ U32 nbAttempts = 1U << cParams->searchLog;
size_t ml=4-1;
/* HC4 match finder */
- U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
+ U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
size_t currentMl=0;
@@ -381,35 +424,33 @@ size_t ZSTD_HcFindBestMatch_generic (
FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
- ZSTD_CCtx* zc,
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iLimit,
- size_t* offsetPtr,
- const U32 maxNbAttempts, const U32 matchLengthSearch)
+ size_t* offsetPtr)
{
- switch(matchLengthSearch)
+ switch(cParams->searchLength)
{
default : /* includes case 3 */
- case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
- case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 0);
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 0);
case 7 :
- case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 0);
}
}
FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
- ZSTD_CCtx* const zc,
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iLimit,
- size_t* const offsetPtr,
- U32 const maxNbAttempts, U32 const matchLengthSearch)
+ size_t* const offsetPtr)
{
- switch(matchLengthSearch)
+ switch(cParams->searchLength)
{
default : /* includes case 3 */
- case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
- case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 1);
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 1);
case 7 :
- case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 1);
}
}
@@ -418,30 +459,29 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
* Common parser - lazy strategy
*********************************/
FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize,
- const U32 searchMethod, const U32 depth)
+size_t ZSTD_compressBlock_lazy_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore,
+ U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams,
+ const void* src, size_t srcSize,
+ const U32 searchMethod, const U32 depth)
{
- seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- const BYTE* const base = ctx->base + ctx->dictLimit;
+ const BYTE* const base = ms->window.base + ms->window.dictLimit;
- U32 const maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
- U32 const mls = ctx->appliedParams.cParams.searchLength;
-
- typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
- size_t* offsetPtr,
- U32 maxNbAttempts, U32 matchLengthSearch);
+ typedef size_t (*searchMax_f)(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
- U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1], savedOffset=0;
+ U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
/* init */
ip += (ip==base);
- ctx->nextToUpdate3 = ctx->nextToUpdate;
+ ms->nextToUpdate3 = ms->nextToUpdate;
{ U32 const maxRep = (U32)(ip-base);
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
@@ -462,13 +502,13 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
/* first search (depth 0) */
{ size_t offsetFound = 99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
if (ml2 > matchLength)
matchLength = ml2, start = ip, offset=offsetFound;
}
if (matchLength < 4) {
- ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
+ ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
continue;
}
@@ -484,7 +524,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
matchLength = mlRep, offset = 0, start = ip;
}
{ size_t offset2=99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -503,7 +543,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
matchLength = ml2, offset = 0, start = ip;
}
{ size_t offset2=99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -528,7 +568,7 @@ size_t ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
- ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
@@ -538,73 +578,80 @@ _storeSequence:
/* store sequence */
matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
} }
/* Save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
- seqStorePtr->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
+ rep[0] = offset_1 ? offset_1 : savedOffset;
+ rep[1] = offset_2 ? offset_2 : savedOffset;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btlazy2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
}
-size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_lazy2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
}
-size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_lazy(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
}
-size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_greedy(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
}
FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize,
- const U32 searchMethod, const U32 depth)
+size_t ZSTD_compressBlock_lazy_extDict_generic(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore,
+ U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams,
+ const void* src, size_t srcSize,
+ const U32 searchMethod, const U32 depth)
{
- seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- const BYTE* const base = ctx->base;
- const U32 dictLimit = ctx->dictLimit;
- const U32 lowestIndex = ctx->lowLimit;
+ const BYTE* const base = ms->window.base;
+ const U32 dictLimit = ms->window.dictLimit;
+ const U32 lowestIndex = ms->window.lowLimit;
const BYTE* const prefixStart = base + dictLimit;
- const BYTE* const dictBase = ctx->dictBase;
+ const BYTE* const dictBase = ms->window.dictBase;
const BYTE* const dictEnd = dictBase + dictLimit;
- const BYTE* const dictStart = dictBase + ctx->lowLimit;
+ const BYTE* const dictStart = dictBase + lowestIndex;
- const U32 maxSearches = 1 << ctx->appliedParams.cParams.searchLog;
- const U32 mls = ctx->appliedParams.cParams.searchLength;
-
- typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
- size_t* offsetPtr,
- U32 maxNbAttempts, U32 matchLengthSearch);
+ typedef size_t (*searchMax_f)(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
- U32 offset_1 = seqStorePtr->rep[0], offset_2 = seqStorePtr->rep[1];
+ U32 offset_1 = rep[0], offset_2 = rep[1];
/* init */
- ctx->nextToUpdate3 = ctx->nextToUpdate;
+ ms->nextToUpdate3 = ms->nextToUpdate;
ip += (ip == prefixStart);
/* Match Loop */
@@ -628,13 +675,13 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
/* first search (depth 0) */
{ size_t offsetFound = 99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
if (ml2 > matchLength)
matchLength = ml2, start = ip, offset=offsetFound;
}
if (matchLength < 4) {
- ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
+ ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
continue;
}
@@ -661,7 +708,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
/* search match, depth 1 */
{ size_t offset2=99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -691,7 +738,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
/* search match, depth 2 */
{ size_t offset2=99999999;
- size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
+ size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -713,7 +760,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
/* store sequence */
_storeSequence:
{ size_t const litLength = start - anchor;
- ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, litLength, anchor, (U32)offset, matchLength-MINMATCH);
anchor = ip = start + matchLength;
}
@@ -728,7 +775,7 @@ _storeSequence:
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
+ ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
ip += matchLength;
anchor = ip;
continue; /* faster when present ... (?) */
@@ -737,29 +784,41 @@ _storeSequence:
} }
/* Save reps for next block */
- seqStorePtr->repToConfirm[0] = offset_1; seqStorePtr->repToConfirm[1] = offset_2;
+ rep[0] = offset_1;
+ rep[1] = offset_2;
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_greedy_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
}
-size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_lazy_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
+
{
- return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
}
-size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_lazy2_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
+
{
- return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
}
-size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btlazy2_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
+
{
- return ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
}
diff --git a/thirdparty/zstd/compress/zstd_lazy.h b/thirdparty/zstd/compress/zstd_lazy.h
index 74e1fd6970..bda064f199 100644
--- a/thirdparty/zstd/compress/zstd_lazy.h
+++ b/thirdparty/zstd/compress/zstd_lazy.h
@@ -15,22 +15,39 @@
extern "C" {
#endif
-#include "mem.h" /* U32 */
-#include "zstd.h" /* ZSTD_CCtx, size_t */
-
-U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls);
-void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
-void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls);
-
-size_t ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+#include "zstd_compress_internal.h"
+
+U32 ZSTD_insertAndFindFirstIndex(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip);
+
+void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+
+size_t ZSTD_compressBlock_btlazy2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_greedy_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btlazy2_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstd_ldm.c b/thirdparty/zstd/compress/zstd_ldm.c
index be50872cf7..bffd8a3dfa 100644
--- a/thirdparty/zstd/compress/zstd_ldm.c
+++ b/thirdparty/zstd/compress/zstd_ldm.c
@@ -17,36 +17,45 @@
#define LDM_HASH_RLOG 7
#define LDM_HASH_CHAR_OFFSET 10
-size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm)
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+ ZSTD_compressionParameters const* cParams)
{
+ U32 const windowLog = cParams->windowLog;
ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
- params->enableLdm = enableLdm>0;
- params->hashLog = 0;
- params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
- params->minMatchLength = LDM_MIN_MATCH_LENGTH;
- params->hashEveryLog = ZSTD_LDM_HASHEVERYLOG_NOTSET;
- return 0;
-}
-
-void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog)
-{
+ DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
+ if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
+ if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
+ if (cParams->strategy >= ZSTD_btopt) {
+ /* Get out of the way of the optimal parser */
+ U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
+ assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
+ assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
+ params->minMatchLength = minMatch;
+ }
if (params->hashLog == 0) {
params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
assert(params->hashLog <= ZSTD_HASHLOG_MAX);
}
- if (params->hashEveryLog == ZSTD_LDM_HASHEVERYLOG_NOTSET) {
+ if (params->hashEveryLog == 0) {
params->hashEveryLog =
windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
}
params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
}
-size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog) {
- size_t const ldmHSize = ((size_t)1) << hashLog;
- size_t const ldmBucketSizeLog = MIN(bucketSizeLog, hashLog);
+size_t ZSTD_ldm_getTableSize(ldmParams_t params)
+{
+ size_t const ldmHSize = ((size_t)1) << params.hashLog;
+ size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
size_t const ldmBucketSize =
- ((size_t)1) << (hashLog - ldmBucketSizeLog);
- return ldmBucketSize + (ldmHSize * (sizeof(ldmEntry_t)));
+ ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+ size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
+ return params.enableLdm ? totalSize : 0;
+}
+
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
+{
+ return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
}
/** ZSTD_ldm_getSmallHash() :
@@ -167,6 +176,7 @@ static U64 ZSTD_ldm_ipow(U64 base, U64 exp)
}
U64 ZSTD_ldm_getHashPower(U32 minMatchLength) {
+ DEBUGLOG(4, "ZSTD_ldm_getHashPower: mml=%u", minMatchLength);
assert(minMatchLength >= ZSTD_LDM_MINMATCH_MIN);
return ZSTD_ldm_ipow(prime8bytes, minMatchLength - 1);
}
@@ -205,21 +215,22 @@ static size_t ZSTD_ldm_countBackwardsMatch(
*
* The tables for the other strategies are filled within their
* block compressors. */
-static size_t ZSTD_ldm_fillFastTables(ZSTD_CCtx* zc, const void* end)
+static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
+ ZSTD_compressionParameters const* cParams,
+ void const* end)
{
const BYTE* const iend = (const BYTE*)end;
- const U32 mls = zc->appliedParams.cParams.searchLength;
- switch(zc->appliedParams.cParams.strategy)
+ switch(cParams->strategy)
{
case ZSTD_fast:
- ZSTD_fillHashTable(zc, iend, mls);
- zc->nextToUpdate = (U32)(iend - zc->base);
+ ZSTD_fillHashTable(ms, cParams, iend);
+ ms->nextToUpdate = (U32)(iend - ms->window.base);
break;
case ZSTD_dfast:
- ZSTD_fillDoubleHashTable(zc, iend, mls);
- zc->nextToUpdate = (U32)(iend - zc->base);
+ ZSTD_fillDoubleHashTable(ms, cParams, iend);
+ ms->nextToUpdate = (U32)(iend - ms->window.base);
break;
case ZSTD_greedy:
@@ -268,69 +279,62 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
* Sets cctx->nextToUpdate to a position corresponding closer to anchor
* if it is far way
* (after a long match, only update tables a limited amount). */
-static void ZSTD_ldm_limitTableUpdate(ZSTD_CCtx* cctx, const BYTE* anchor)
+static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
{
- U32 const current = (U32)(anchor - cctx->base);
- if (current > cctx->nextToUpdate + 1024) {
- cctx->nextToUpdate =
- current - MIN(512, current - cctx->nextToUpdate - 1024);
+ U32 const current = (U32)(anchor - ms->window.base);
+ if (current > ms->nextToUpdate + 1024) {
+ ms->nextToUpdate =
+ current - MIN(512, current - ms->nextToUpdate - 1024);
}
}
-typedef size_t (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-/* defined in zstd_compress.c */
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
-
-FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
- const void* src, size_t srcSize)
+static size_t ZSTD_ldm_generateSequences_internal(
+ ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
+ ldmParams_t const* params, void const* src, size_t srcSize)
{
- ldmState_t* const ldmState = &(cctx->ldmState);
- const ldmParams_t ldmParams = cctx->appliedParams.ldmParams;
- const U64 hashPower = ldmState->hashPower;
- const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
- const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
- const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
- seqStore_t* const seqStorePtr = &(cctx->seqStore);
- const BYTE* const base = cctx->base;
- const BYTE* const istart = (const BYTE*)src;
- const BYTE* ip = istart;
- const BYTE* anchor = istart;
- const U32 lowestIndex = cctx->dictLimit;
- const BYTE* const lowest = base + lowestIndex;
- const BYTE* const iend = istart + srcSize;
- const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
-
- const ZSTD_blockCompressor blockCompressor =
- ZSTD_selectBlockCompressor(cctx->appliedParams.cParams.strategy, 0);
- U32* const repToConfirm = seqStorePtr->repToConfirm;
- U32 savedRep[ZSTD_REP_NUM];
+ /* LDM parameters */
+ int const extDict = ZSTD_window_hasExtDict(ldmState->window);
+ U32 const minMatchLength = params->minMatchLength;
+ U64 const hashPower = ldmState->hashPower;
+ U32 const hBits = params->hashLog - params->bucketSizeLog;
+ U32 const ldmBucketSize = 1U << params->bucketSizeLog;
+ U32 const hashEveryLog = params->hashEveryLog;
+ U32 const ldmTagMask = (1U << params->hashEveryLog) - 1;
+ /* Prefix and extDict parameters */
+ U32 const dictLimit = ldmState->window.dictLimit;
+ U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
+ BYTE const* const base = ldmState->window.base;
+ BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
+ BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
+ BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
+ BYTE const* const lowPrefixPtr = base + dictLimit;
+ /* Input bounds */
+ BYTE const* const istart = (BYTE const*)src;
+ BYTE const* const iend = istart + srcSize;
+ BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
+ /* Input positions */
+ BYTE const* anchor = istart;
+ BYTE const* ip = istart;
+ /* Rolling hash */
+ BYTE const* lastHashed = NULL;
U64 rollingHash = 0;
- const BYTE* lastHashed = NULL;
- size_t i, lastLiterals;
- /* Save seqStorePtr->rep and copy repToConfirm */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
-
- /* Main Search Loop */
- while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
+ while (ip <= ilimit) {
size_t mLength;
U32 const current = (U32)(ip - base);
size_t forwardMatchLength = 0, backwardMatchLength = 0;
ldmEntry_t* bestEntry = NULL;
if (ip != istart) {
rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
- lastHashed[ldmParams.minMatchLength],
+ lastHashed[minMatchLength],
hashPower);
} else {
- rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
+ rollingHash = ZSTD_ldm_getRollingHash(ip, minMatchLength);
}
lastHashed = ip;
/* Do not insert and do not look for a match */
- if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
- ldmTagMask) {
+ if (ZSTD_ldm_getTag(rollingHash, hBits, hashEveryLog) != ldmTagMask) {
ip++;
continue;
}
@@ -340,27 +344,49 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
ldmEntry_t* const bucket =
ZSTD_ldm_getBucket(ldmState,
ZSTD_ldm_getSmallHash(rollingHash, hBits),
- ldmParams);
+ *params);
ldmEntry_t* cur;
size_t bestMatchLength = 0;
U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
- const BYTE* const pMatch = cur->offset + base;
size_t curForwardMatchLength, curBackwardMatchLength,
curTotalMatchLength;
if (cur->checksum != checksum || cur->offset <= lowestIndex) {
continue;
}
-
- curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
- if (curForwardMatchLength < ldmParams.minMatchLength) {
- continue;
+ if (extDict) {
+ BYTE const* const curMatchBase =
+ cur->offset < dictLimit ? dictBase : base;
+ BYTE const* const pMatch = curMatchBase + cur->offset;
+ BYTE const* const matchEnd =
+ cur->offset < dictLimit ? dictEnd : iend;
+ BYTE const* const lowMatchPtr =
+ cur->offset < dictLimit ? dictStart : lowPrefixPtr;
+
+ curForwardMatchLength = ZSTD_count_2segments(
+ ip, pMatch, iend,
+ matchEnd, lowPrefixPtr);
+ if (curForwardMatchLength < minMatchLength) {
+ continue;
+ }
+ curBackwardMatchLength =
+ ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
+ lowMatchPtr);
+ curTotalMatchLength = curForwardMatchLength +
+ curBackwardMatchLength;
+ } else { /* !extDict */
+ BYTE const* const pMatch = base + cur->offset;
+ curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
+ if (curForwardMatchLength < minMatchLength) {
+ continue;
+ }
+ curBackwardMatchLength =
+ ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
+ lowPrefixPtr);
+ curTotalMatchLength = curForwardMatchLength +
+ curBackwardMatchLength;
}
- curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
- ip, anchor, pMatch, lowest);
- curTotalMatchLength = curForwardMatchLength +
- curBackwardMatchLength;
if (curTotalMatchLength > bestMatchLength) {
bestMatchLength = curTotalMatchLength;
@@ -375,7 +401,7 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
if (bestEntry == NULL) {
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
hBits, current,
- ldmParams);
+ *params);
ip++;
continue;
}
@@ -384,324 +410,244 @@ size_t ZSTD_compressBlock_ldm_generic(ZSTD_CCtx* cctx,
mLength = forwardMatchLength + backwardMatchLength;
ip -= backwardMatchLength;
- /* Call the block compressor on the remaining literals */
{
+ /* Store the sequence:
+ * ip = current - backwardMatchLength
+ * The match is at (bestEntry->offset - backwardMatchLength)
+ */
U32 const matchIndex = bestEntry->offset;
- const BYTE* const match = base + matchIndex - backwardMatchLength;
- U32 const offset = (U32)(ip - match);
-
- /* Overwrite rep codes */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = repToConfirm[i];
-
- /* Fill tables for block compressor */
- ZSTD_ldm_limitTableUpdate(cctx, anchor);
- ZSTD_ldm_fillFastTables(cctx, anchor);
-
- /* Call block compressor and get remaining literals */
- lastLiterals = blockCompressor(cctx, anchor, ip - anchor);
- cctx->nextToUpdate = (U32)(ip - base);
-
- /* Update repToConfirm with the new offset */
- for (i = ZSTD_REP_NUM - 1; i > 0; i--)
- repToConfirm[i] = repToConfirm[i-1];
- repToConfirm[0] = offset;
-
- /* Store the sequence with the leftover literals */
- ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
- offset + ZSTD_REP_MOVE, mLength - MINMATCH);
+ U32 const offset = current - matchIndex;
+ rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
+
+ /* Out of sequence storage */
+ if (rawSeqStore->size == rawSeqStore->capacity)
+ return ERROR(dstSize_tooSmall);
+ seq->litLength = (U32)(ip - anchor);
+ seq->matchLength = (U32)mLength;
+ seq->offset = offset;
+ rawSeqStore->size++;
}
/* Insert the current entry into the hash table */
ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
(U32)(lastHashed - base),
- ldmParams);
+ *params);
assert(ip + backwardMatchLength == lastHashed);
/* Fill the hash table from lastHashed+1 to ip+mLength*/
/* Heuristic: don't need to fill the entire table at end of block */
- if (ip + mLength < ilimit) {
+ if (ip + mLength <= ilimit) {
rollingHash = ZSTD_ldm_fillLdmHashTable(
ldmState, rollingHash, lastHashed,
- ip + mLength, base, hBits, ldmParams);
+ ip + mLength, base, hBits, *params);
lastHashed = ip + mLength - 1;
}
ip += mLength;
anchor = ip;
- /* Check immediate repcode */
- while ( (ip < ilimit)
- && ( (repToConfirm[1] > 0) && (repToConfirm[1] <= (U32)(ip-lowest))
- && (MEM_read32(ip) == MEM_read32(ip - repToConfirm[1])) )) {
-
- size_t const rLength = ZSTD_count(ip+4, ip+4-repToConfirm[1],
- iend) + 4;
- /* Swap repToConfirm[1] <=> repToConfirm[0] */
- {
- U32 const tmpOff = repToConfirm[1];
- repToConfirm[1] = repToConfirm[0];
- repToConfirm[0] = tmpOff;
- }
-
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
-
- /* Fill the hash table from lastHashed+1 to ip+rLength*/
- if (ip + rLength < ilimit) {
- rollingHash = ZSTD_ldm_fillLdmHashTable(
- ldmState, rollingHash, lastHashed,
- ip + rLength, base, hBits, ldmParams);
- lastHashed = ip + rLength - 1;
- }
- ip += rLength;
- anchor = ip;
- }
}
-
- /* Overwrite rep */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = repToConfirm[i];
-
- ZSTD_ldm_limitTableUpdate(cctx, anchor);
- ZSTD_ldm_fillFastTables(cctx, anchor);
-
- lastLiterals = blockCompressor(cctx, anchor, iend - anchor);
- cctx->nextToUpdate = (U32)(iend - base);
-
- /* Restore seqStorePtr->rep */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = savedRep[i];
-
- /* Return the last literals size */
- return lastLiterals;
+ return iend - anchor;
}
-size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
+/*! ZSTD_ldm_reduceTable() :
+ * reduce table indexes by `reducerValue` */
+static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
+ U32 const reducerValue)
{
- return ZSTD_compressBlock_ldm_generic(ctx, src, srcSize);
+ U32 u;
+ for (u = 0; u < size; u++) {
+ if (table[u].offset < reducerValue) table[u].offset = 0;
+ else table[u].offset -= reducerValue;
+ }
}
-static size_t ZSTD_compressBlock_ldm_extDict_generic(
- ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
+size_t ZSTD_ldm_generateSequences(
+ ldmState_t* ldmState, rawSeqStore_t* sequences,
+ ldmParams_t const* params, void const* src, size_t srcSize)
{
- ldmState_t* const ldmState = &(ctx->ldmState);
- const ldmParams_t ldmParams = ctx->appliedParams.ldmParams;
- const U64 hashPower = ldmState->hashPower;
- const U32 hBits = ldmParams.hashLog - ldmParams.bucketSizeLog;
- const U32 ldmBucketSize = ((U32)1 << ldmParams.bucketSizeLog);
- const U32 ldmTagMask = ((U32)1 << ldmParams.hashEveryLog) - 1;
- seqStore_t* const seqStorePtr = &(ctx->seqStore);
- const BYTE* const base = ctx->base;
- const BYTE* const dictBase = ctx->dictBase;
- const BYTE* const istart = (const BYTE*)src;
- const BYTE* ip = istart;
- const BYTE* anchor = istart;
- const U32 lowestIndex = ctx->lowLimit;
- const BYTE* const dictStart = dictBase + lowestIndex;
- const U32 dictLimit = ctx->dictLimit;
- const BYTE* const lowPrefixPtr = base + dictLimit;
- const BYTE* const dictEnd = dictBase + dictLimit;
- const BYTE* const iend = istart + srcSize;
- const BYTE* const ilimit = iend - MAX(ldmParams.minMatchLength, HASH_READ_SIZE);
-
- const ZSTD_blockCompressor blockCompressor =
- ZSTD_selectBlockCompressor(ctx->appliedParams.cParams.strategy, 1);
- U32* const repToConfirm = seqStorePtr->repToConfirm;
- U32 savedRep[ZSTD_REP_NUM];
- U64 rollingHash = 0;
- const BYTE* lastHashed = NULL;
- size_t i, lastLiterals;
-
- /* Save seqStorePtr->rep and copy repToConfirm */
- for (i = 0; i < ZSTD_REP_NUM; i++) {
- savedRep[i] = repToConfirm[i] = seqStorePtr->rep[i];
- }
-
- /* Search Loop */
- while (ip < ilimit) { /* < instead of <=, because (ip+1) */
- size_t mLength;
- const U32 current = (U32)(ip-base);
- size_t forwardMatchLength = 0, backwardMatchLength = 0;
- ldmEntry_t* bestEntry = NULL;
- if (ip != istart) {
- rollingHash = ZSTD_ldm_updateHash(rollingHash, lastHashed[0],
- lastHashed[ldmParams.minMatchLength],
- hashPower);
+ U32 const maxDist = 1U << params->windowLog;
+ BYTE const* const istart = (BYTE const*)src;
+ BYTE const* const iend = istart + srcSize;
+ size_t const kMaxChunkSize = 1 << 20;
+ size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
+ size_t chunk;
+ size_t leftoverSize = 0;
+
+ assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
+ /* Check that ZSTD_window_update() has been called for this chunk prior
+ * to passing it to this function.
+ */
+ assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
+ /* The input could be very large (in zstdmt), so it must be broken up into
+ * chunks to enforce the maximmum distance and handle overflow correction.
+ */
+ assert(sequences->pos <= sequences->size);
+ assert(sequences->size <= sequences->capacity);
+ for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
+ BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
+ size_t const remaining = (size_t)(iend - chunkStart);
+ BYTE const *const chunkEnd =
+ (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
+ size_t const chunkSize = chunkEnd - chunkStart;
+ size_t newLeftoverSize;
+ size_t const prevSize = sequences->size;
+
+ assert(chunkStart < iend);
+ /* 1. Perform overflow correction if necessary. */
+ if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
+ U32 const ldmHSize = 1U << params->hashLog;
+ U32 const correction = ZSTD_window_correctOverflow(
+ &ldmState->window, /* cycleLog */ 0, maxDist, src);
+ ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+ }
+ /* 2. We enforce the maximum offset allowed.
+ *
+ * kMaxChunkSize should be small enough that we don't lose too much of
+ * the window through early invalidation.
+ * TODO: * Test the chunk size.
+ * * Try invalidation after the sequence generation and test the
+ * the offset against maxDist directly.
+ */
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
+ /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
+ newLeftoverSize = ZSTD_ldm_generateSequences_internal(
+ ldmState, sequences, params, chunkStart, chunkSize);
+ if (ZSTD_isError(newLeftoverSize))
+ return newLeftoverSize;
+ /* 4. We add the leftover literals from previous iterations to the first
+ * newly generated sequence, or add the `newLeftoverSize` if none are
+ * generated.
+ */
+ /* Prepend the leftover literals from the last call */
+ if (prevSize < sequences->size) {
+ sequences->seq[prevSize].litLength += (U32)leftoverSize;
+ leftoverSize = newLeftoverSize;
} else {
- rollingHash = ZSTD_ldm_getRollingHash(ip, ldmParams.minMatchLength);
+ assert(newLeftoverSize == chunkSize);
+ leftoverSize += chunkSize;
}
- lastHashed = ip;
+ }
+ return 0;
+}
- if (ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashEveryLog) !=
- ldmTagMask) {
- /* Don't insert and don't look for a match */
- ip++;
- continue;
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
+ while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
+ rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
+ if (srcSize <= seq->litLength) {
+ /* Skip past srcSize literals */
+ seq->litLength -= (U32)srcSize;
+ return;
}
-
- /* Get the best entry and compute the match lengths */
- {
- ldmEntry_t* const bucket =
- ZSTD_ldm_getBucket(ldmState,
- ZSTD_ldm_getSmallHash(rollingHash, hBits),
- ldmParams);
- ldmEntry_t* cur;
- size_t bestMatchLength = 0;
- U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
-
- for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
- const BYTE* const curMatchBase =
- cur->offset < dictLimit ? dictBase : base;
- const BYTE* const pMatch = curMatchBase + cur->offset;
- const BYTE* const matchEnd =
- cur->offset < dictLimit ? dictEnd : iend;
- const BYTE* const lowMatchPtr =
- cur->offset < dictLimit ? dictStart : lowPrefixPtr;
- size_t curForwardMatchLength, curBackwardMatchLength,
- curTotalMatchLength;
-
- if (cur->checksum != checksum || cur->offset <= lowestIndex) {
- continue;
- }
-
- curForwardMatchLength = ZSTD_count_2segments(
- ip, pMatch, iend,
- matchEnd, lowPrefixPtr);
- if (curForwardMatchLength < ldmParams.minMatchLength) {
- continue;
- }
- curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch(
- ip, anchor, pMatch, lowMatchPtr);
- curTotalMatchLength = curForwardMatchLength +
- curBackwardMatchLength;
-
- if (curTotalMatchLength > bestMatchLength) {
- bestMatchLength = curTotalMatchLength;
- forwardMatchLength = curForwardMatchLength;
- backwardMatchLength = curBackwardMatchLength;
- bestEntry = cur;
+ srcSize -= seq->litLength;
+ seq->litLength = 0;
+ if (srcSize < seq->matchLength) {
+ /* Skip past the first srcSize of the match */
+ seq->matchLength -= (U32)srcSize;
+ if (seq->matchLength < minMatch) {
+ /* The match is too short, omit it */
+ if (rawSeqStore->pos + 1 < rawSeqStore->size) {
+ seq[1].litLength += seq[0].matchLength;
}
+ rawSeqStore->pos++;
}
+ return;
}
+ srcSize -= seq->matchLength;
+ seq->matchLength = 0;
+ rawSeqStore->pos++;
+ }
+}
- /* No match found -- continue searching */
- if (bestEntry == NULL) {
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
- (U32)(lastHashed - base),
- ldmParams);
- ip++;
- continue;
+/**
+ * If the sequence length is longer than remaining then the sequence is split
+ * between this block and the next.
+ *
+ * Returns the current sequence to handle, or if the rest of the block should
+ * be literals, it returns a sequence with offset == 0.
+ */
+static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
+ U32 const remaining, U32 const minMatch)
+{
+ rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
+ assert(sequence.offset > 0);
+ /* Likely: No partial sequence */
+ if (remaining >= sequence.litLength + sequence.matchLength) {
+ rawSeqStore->pos++;
+ return sequence;
+ }
+ /* Cut the sequence short (offset == 0 ==> rest is literals). */
+ if (remaining <= sequence.litLength) {
+ sequence.offset = 0;
+ } else if (remaining < sequence.litLength + sequence.matchLength) {
+ sequence.matchLength = remaining - sequence.litLength;
+ if (sequence.matchLength < minMatch) {
+ sequence.offset = 0;
}
+ }
+ /* Skip past `remaining` bytes for the future sequences. */
+ ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
+ return sequence;
+}
- /* Match found */
- mLength = forwardMatchLength + backwardMatchLength;
- ip -= backwardMatchLength;
-
- /* Call the block compressor on the remaining literals */
- {
- /* ip = current - backwardMatchLength
- * The match is at (bestEntry->offset - backwardMatchLength) */
- U32 const matchIndex = bestEntry->offset;
- U32 const offset = current - matchIndex;
-
- /* Overwrite rep codes */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = repToConfirm[i];
-
- /* Fill the hash table for the block compressor */
- ZSTD_ldm_limitTableUpdate(ctx, anchor);
- ZSTD_ldm_fillFastTables(ctx, anchor);
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
+ int const extDict)
+{
+ unsigned const minMatch = cParams->searchLength;
+ ZSTD_blockCompressor const blockCompressor =
+ ZSTD_selectBlockCompressor(cParams->strategy, extDict);
+ BYTE const* const base = ms->window.base;
+ /* Input bounds */
+ BYTE const* const istart = (BYTE const*)src;
+ BYTE const* const iend = istart + srcSize;
+ /* Input positions */
+ BYTE const* ip = istart;
+
+ assert(rawSeqStore->pos <= rawSeqStore->size);
+ assert(rawSeqStore->size <= rawSeqStore->capacity);
+ /* Loop through each sequence and apply the block compressor to the lits */
+ while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
+ /* maybeSplitSequence updates rawSeqStore->pos */
+ rawSeq const sequence = maybeSplitSequence(rawSeqStore,
+ (U32)(iend - ip), minMatch);
+ int i;
+ /* End signal */
+ if (sequence.offset == 0)
+ break;
- /* Call block compressor and get remaining literals */
- lastLiterals = blockCompressor(ctx, anchor, ip - anchor);
- ctx->nextToUpdate = (U32)(ip - base);
+ assert(sequence.offset <= (1U << cParams->windowLog));
+ assert(ip + sequence.litLength + sequence.matchLength <= iend);
- /* Update repToConfirm with the new offset */
+ /* Fill tables for block compressor */
+ ZSTD_ldm_limitTableUpdate(ms, ip);
+ ZSTD_ldm_fillFastTables(ms, cParams, ip);
+ /* Run the block compressor */
+ {
+ size_t const newLitLength =
+ blockCompressor(ms, seqStore, rep, cParams, ip,
+ sequence.litLength);
+ ip += sequence.litLength;
+ ms->nextToUpdate = (U32)(ip - base);
+ /* Update the repcodes */
for (i = ZSTD_REP_NUM - 1; i > 0; i--)
- repToConfirm[i] = repToConfirm[i-1];
- repToConfirm[0] = offset;
-
- /* Store the sequence with the leftover literals */
- ZSTD_storeSeq(seqStorePtr, lastLiterals, ip - lastLiterals,
- offset + ZSTD_REP_MOVE, mLength - MINMATCH);
- }
-
- /* Insert the current entry into the hash table */
- ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
- (U32)(lastHashed - base),
- ldmParams);
-
- /* Fill the hash table from lastHashed+1 to ip+mLength */
- assert(ip + backwardMatchLength == lastHashed);
- if (ip + mLength < ilimit) {
- rollingHash = ZSTD_ldm_fillLdmHashTable(
- ldmState, rollingHash, lastHashed,
- ip + mLength, base, hBits,
- ldmParams);
- lastHashed = ip + mLength - 1;
- }
- ip += mLength;
- anchor = ip;
-
- /* check immediate repcode */
- while (ip < ilimit) {
- U32 const current2 = (U32)(ip-base);
- U32 const repIndex2 = current2 - repToConfirm[1];
- const BYTE* repMatch2 = repIndex2 < dictLimit ?
- dictBase + repIndex2 : base + repIndex2;
- if ( (((U32)((dictLimit-1) - repIndex2) >= 3) &
- (repIndex2 > lowestIndex)) /* intentional overflow */
- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
- const BYTE* const repEnd2 = repIndex2 < dictLimit ?
- dictEnd : iend;
- size_t const repLength2 =
- ZSTD_count_2segments(ip+4, repMatch2+4, iend,
- repEnd2, lowPrefixPtr) + 4;
-
- U32 tmpOffset = repToConfirm[1];
- repToConfirm[1] = repToConfirm[0];
- repToConfirm[0] = tmpOffset;
-
- ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
-
- /* Fill the hash table from lastHashed+1 to ip+repLength2*/
- if (ip + repLength2 < ilimit) {
- rollingHash = ZSTD_ldm_fillLdmHashTable(
- ldmState, rollingHash, lastHashed,
- ip + repLength2, base, hBits,
- ldmParams);
- lastHashed = ip + repLength2 - 1;
- }
- ip += repLength2;
- anchor = ip;
- continue;
- }
- break;
+ rep[i] = rep[i-1];
+ rep[0] = sequence.offset;
+ /* Store the sequence */
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
+ sequence.offset + ZSTD_REP_MOVE,
+ sequence.matchLength - MINMATCH);
+ ip += sequence.matchLength;
}
}
-
- /* Overwrite rep */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = repToConfirm[i];
-
- ZSTD_ldm_limitTableUpdate(ctx, anchor);
- ZSTD_ldm_fillFastTables(ctx, anchor);
-
- /* Call the block compressor one last time on the last literals */
- lastLiterals = blockCompressor(ctx, anchor, iend - anchor);
- ctx->nextToUpdate = (U32)(iend - base);
-
- /* Restore seqStorePtr->rep */
- for (i = 0; i < ZSTD_REP_NUM; i++)
- seqStorePtr->rep[i] = savedRep[i];
-
- /* Return the last literals size */
- return lastLiterals;
-}
-
-size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize)
-{
- return ZSTD_compressBlock_ldm_extDict_generic(ctx, src, srcSize);
+ /* Fill the tables for the block compressor */
+ ZSTD_ldm_limitTableUpdate(ms, ip);
+ ZSTD_ldm_fillFastTables(ms, cParams, ip);
+ /* Compress the last literals */
+ {
+ size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
+ ip, iend - ip);
+ ms->nextToUpdate = (U32)(iend - base);
+ return lastLiterals;
+ }
}
diff --git a/thirdparty/zstd/compress/zstd_ldm.h b/thirdparty/zstd/compress/zstd_ldm.h
index 8f12c677aa..0c3789ff13 100644
--- a/thirdparty/zstd/compress/zstd_ldm.h
+++ b/thirdparty/zstd/compress/zstd_ldm.h
@@ -22,32 +22,71 @@ extern "C" {
***************************************/
#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_DEFAULTMAX
-#define ZSTD_LDM_HASHEVERYLOG_NOTSET 9999
-/** ZSTD_compressBlock_ldm_generic() :
+/**
+ * ZSTD_ldm_generateSequences():
*
- * This is a block compressor intended for long distance matching.
+ * Generates the sequences using the long distance match finder.
+ * Generates long range matching sequences in `sequences`, which parse a prefix
+ * of the source. `sequences` must be large enough to store every sequence,
+ * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
+ * @returns 0 or an error code.
*
- * The function searches for matches of length at least
- * ldmParams.minMatchLength using a hash table in cctx->ldmState.
- * Matches can be at a distance of up to cParams.windowLog.
+ * NOTE: The user must have called ZSTD_window_update() for all of the input
+ * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
+ * NOTE: This function returns an error if it runs out of space to store
+ * sequences.
+ */
+size_t ZSTD_ldm_generateSequences(
+ ldmState_t* ldms, rawSeqStore_t* sequences,
+ ldmParams_t const* params, void const* src, size_t srcSize);
+
+/**
+ * ZSTD_ldm_blockCompress():
+ *
+ * Compresses a block using the predefined sequences, along with a secondary
+ * block compressor. The literals section of every sequence is passed to the
+ * secondary block compressor, and those sequences are interspersed with the
+ * predefined sequences. Returns the length of the last literals.
+ * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
+ * `rawSeqStore.seq` may also be updated to split the last sequence between two
+ * blocks.
+ * @return The length of the last literals.
+ *
+ * NOTE: The source must be at most the maximum block size, but the predefined
+ * sequences can be any size, and may be longer than the block. In the case that
+ * they are longer than the block, the last sequences may need to be split into
+ * two. We handle that case correctly, and update `rawSeqStore` appropriately.
+ * NOTE: This function does not return any errors.
+ */
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams,
+ void const* src, size_t srcSize,
+ int const extDict);
+
+/**
+ * ZSTD_ldm_skipSequences():
*
- * Upon finding a match, the unmatched literals are compressed using a
- * ZSTD_blockCompressor (depending on the strategy in the compression
- * parameters), which stores the matched sequences. The "long distance"
- * match is then stored with the remaining literals from the
- * ZSTD_blockCompressor. */
-size_t ZSTD_compressBlock_ldm(ZSTD_CCtx* cctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_ldm_extDict(ZSTD_CCtx* ctx,
- const void* src, size_t srcSize);
+ * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
+ * Avoids emitting matches less than `minMatch` bytes.
+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
+ U32 const minMatch);
-/** ZSTD_ldm_initializeParameters() :
- * Initialize the long distance matching parameters to their default values. */
-size_t ZSTD_ldm_initializeParameters(ldmParams_t* params, U32 enableLdm);
/** ZSTD_ldm_getTableSize() :
- * Estimate the space needed for long distance matching tables. */
-size_t ZSTD_ldm_getTableSize(U32 hashLog, U32 bucketSizeLog);
+ * Estimate the space needed for long distance matching tables or 0 if LDM is
+ * disabled.
+ */
+size_t ZSTD_ldm_getTableSize(ldmParams_t params);
+
+/** ZSTD_ldm_getSeqSpace() :
+ * Return an upper bound on the number of sequences that can be produced by
+ * the long distance matcher, or 0 if LDM is disabled.
+ */
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
/** ZSTD_ldm_getTableSize() :
* Return prime8bytes^(minMatchLength-1) */
@@ -58,8 +97,12 @@ U64 ZSTD_ldm_getHashPower(U32 minMatchLength);
* windowLog and params->hashLog.
*
* Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
- * params->hashLog if it is not). */
-void ZSTD_ldm_adjustParameters(ldmParams_t* params, U32 windowLog);
+ * params->hashLog if it is not).
+ *
+ * Ensures that the minMatchLength >= targetLength during optimal parsing.
+ */
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+ ZSTD_compressionParameters const* cParams);
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstd_opt.c b/thirdparty/zstd/compress/zstd_opt.c
index 7171ff5373..f63f0c5852 100644
--- a/thirdparty/zstd/compress/zstd_opt.c
+++ b/thirdparty/zstd/compress/zstd_opt.c
@@ -10,7 +10,6 @@
#include "zstd_compress_internal.h"
#include "zstd_opt.h"
-#include "zstd_lazy.h" /* ZSTD_updateTree, ZSTD_updateTree_extDict */
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
@@ -244,14 +243,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
/* Update hashTable3 up to ip (excluded)
Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* const cctx, const BYTE* const ip)
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip)
{
- U32* const hashTable3 = cctx->hashTable3;
- U32 const hashLog3 = cctx->hashLog3;
- const BYTE* const base = cctx->base;
- U32 idx = cctx->nextToUpdate3;
- U32 const target = cctx->nextToUpdate3 = (U32)(ip - base);
+ U32* const hashTable3 = ms->hashTable3;
+ U32 const hashLog3 = ms->hashLog3;
+ const BYTE* const base = ms->window.base;
+ U32 idx = ms->nextToUpdate3;
+ U32 const target = ms->nextToUpdate3 = (U32)(ip - base);
size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
+ assert(hashLog3 > 0);
while(idx < target) {
hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
@@ -265,36 +265,173 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* const cctx, const BYTE*
/*-*************************************
* Binary Tree search
***************************************/
+/** ZSTD_insertBt1() : add one or multiple positions to tree.
+ * ip : assumed <= iend-8 .
+ * @return : nb of positions added */
+static U32 ZSTD_insertBt1(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* const ip, const BYTE* const iend,
+ U32 const mls, U32 const extDict)
+{
+ U32* const hashTable = ms->hashTable;
+ U32 const hashLog = cParams->hashLog;
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
+ U32* const bt = ms->chainTable;
+ U32 const btLog = cParams->chainLog - 1;
+ U32 const btMask = (1 << btLog) - 1;
+ U32 matchIndex = hashTable[h];
+ size_t commonLengthSmaller=0, commonLengthLarger=0;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const dictBase = ms->window.dictBase;
+ const U32 dictLimit = ms->window.dictLimit;
+ const BYTE* const dictEnd = dictBase + dictLimit;
+ const BYTE* const prefixStart = base + dictLimit;
+ const BYTE* match;
+ const U32 current = (U32)(ip-base);
+ const U32 btLow = btMask >= current ? 0 : current - btMask;
+ U32* smallerPtr = bt + 2*(current&btMask);
+ U32* largerPtr = smallerPtr + 1;
+ U32 dummy32; /* to be nullified at the end */
+ U32 const windowLow = ms->window.lowLimit;
+ U32 matchEndIdx = current+8+1;
+ size_t bestLength = 8;
+ U32 nbCompares = 1U << cParams->searchLog;
+#ifdef ZSTD_C_PREDICT
+ U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
+ U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
+ predictedSmall += (predictedSmall>0);
+ predictedLarge += (predictedLarge>0);
+#endif /* ZSTD_C_PREDICT */
+
+ DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current);
+
+ assert(ip <= iend-8); /* required for h calculation */
+ hashTable[h] = current; /* Update Hash Table */
+
+ while (nbCompares-- && (matchIndex > windowLow)) {
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
+ assert(matchIndex < current);
+
+#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
+ const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
+ if (matchIndex == predictedSmall) {
+ /* no need to check length, result known */
+ *smallerPtr = matchIndex;
+ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
+ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
+ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
+ predictedSmall = predictPtr[1] + (predictPtr[1]>0);
+ continue;
+ }
+ if (matchIndex == predictedLarge) {
+ *largerPtr = matchIndex;
+ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
+ largerPtr = nextPtr;
+ matchIndex = nextPtr[0];
+ predictedLarge = predictPtr[0] + (predictPtr[0]>0);
+ continue;
+ }
+#endif
+
+ if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
+ assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
+ match = base + matchIndex;
+ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+ } else {
+ match = dictBase + matchIndex;
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+ if (matchIndex+matchLength >= dictLimit)
+ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
+ }
+
+ if (matchLength > bestLength) {
+ bestLength = matchLength;
+ if (matchLength > matchEndIdx - matchIndex)
+ matchEndIdx = matchIndex + (U32)matchLength;
+ }
+
+ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
+ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+ }
+
+ if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
+ /* match is smaller than current */
+ *smallerPtr = matchIndex; /* update smaller idx */
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
+ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
+ smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
+ matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
+ } else {
+ /* match is larger than current */
+ *largerPtr = matchIndex;
+ commonLengthLarger = matchLength;
+ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
+ largerPtr = nextPtr;
+ matchIndex = nextPtr[0];
+ } }
+
+ *smallerPtr = *largerPtr = 0;
+ if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
+ assert(matchEndIdx > current + 8);
+ return matchEndIdx - (current + 8);
+}
+
+FORCE_INLINE_TEMPLATE
+void ZSTD_updateTree_internal(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* const ip, const BYTE* const iend,
+ const U32 mls, const U32 extDict)
+{
+ const BYTE* const base = ms->window.base;
+ U32 const target = (U32)(ip - base);
+ U32 idx = ms->nextToUpdate;
+ DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
+ idx, target, extDict);
+
+ while(idx < target)
+ idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, extDict);
+ ms->nextToUpdate = target;
+}
+
+void ZSTD_updateTree(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, const BYTE* iend)
+{
+ ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, 0 /*extDict*/);
+}
+
FORCE_INLINE_TEMPLATE
U32 ZSTD_insertBtAndGetAllMatches (
- ZSTD_CCtx* zc,
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* const ip, const BYTE* const iLimit, int const extDict,
- U32 nbCompares, U32 const mls, U32 const sufficient_len,
U32 rep[ZSTD_REP_NUM], U32 const ll0,
- ZSTD_match_t* matches, const U32 lengthToBeat)
+ ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
{
- const BYTE* const base = zc->base;
+ U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+ const BYTE* const base = ms->window.base;
U32 const current = (U32)(ip-base);
- U32 const hashLog = zc->appliedParams.cParams.hashLog;
+ U32 const hashLog = cParams->hashLog;
U32 const minMatch = (mls==3) ? 3 : 4;
- U32* const hashTable = zc->hashTable;
+ U32* const hashTable = ms->hashTable;
size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
U32 matchIndex = hashTable[h];
- U32* const bt = zc->chainTable;
- U32 const btLog = zc->appliedParams.cParams.chainLog - 1;
+ U32* const bt = ms->chainTable;
+ U32 const btLog = cParams->chainLog - 1;
U32 const btMask= (1U << btLog) - 1;
size_t commonLengthSmaller=0, commonLengthLarger=0;
- const BYTE* const dictBase = zc->dictBase;
- U32 const dictLimit = zc->dictLimit;
+ const BYTE* const dictBase = ms->window.dictBase;
+ U32 const dictLimit = ms->window.dictLimit;
const BYTE* const dictEnd = dictBase + dictLimit;
const BYTE* const prefixStart = base + dictLimit;
U32 const btLow = btMask >= current ? 0 : current - btMask;
- U32 const windowLow = zc->lowLimit;
+ U32 const windowLow = ms->window.lowLimit;
U32* smallerPtr = bt + 2*(current&btMask);
U32* largerPtr = bt + 2*(current&btMask) + 1;
U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
U32 dummy32; /* to be nullified at the end */
U32 mnum = 0;
+ U32 nbCompares = 1U << cParams->searchLog;
size_t bestLength = lengthToBeat-1;
DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
@@ -335,7 +472,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
/* HC3 match finder */
if ((mls == 3) /*static*/ && (bestLength < mls)) {
- U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
+ U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
if ((matchIndex3 > windowLow)
& (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
size_t mlen;
@@ -359,7 +496,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
mnum = 1;
if ( (mlen > sufficient_len) |
(ip+mlen == iLimit) ) { /* best possible length */
- zc->nextToUpdate = current+1; /* skip insertion */
+ ms->nextToUpdate = current+1; /* skip insertion */
return 1;
} } } }
@@ -416,30 +553,29 @@ U32 ZSTD_insertBtAndGetAllMatches (
*smallerPtr = *largerPtr = 0;
assert(matchEndIdx > current+8);
- zc->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
+ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
return mnum;
}
FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
- ZSTD_CCtx* zc, /* Index table will be updated */
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
- U32 const maxNbAttempts, U32 const matchLengthSearch, U32 const sufficient_len,
U32 rep[ZSTD_REP_NUM], U32 const ll0,
ZSTD_match_t* matches, U32 const lengthToBeat)
{
+ U32 const matchLengthSearch = cParams->searchLength;
DEBUGLOG(7, "ZSTD_BtGetAllMatches");
- if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
- if (extDict) ZSTD_updateTree_extDict(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
- else ZSTD_updateTree(zc, ip, iHighLimit, maxNbAttempts, matchLengthSearch);
+ if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
+ ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict);
switch(matchLengthSearch)
{
- case 3 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 3, sufficient_len, rep, ll0, matches, lengthToBeat);
+ case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 3);
default :
- case 4 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 4, sufficient_len, rep, ll0, matches, lengthToBeat);
- case 5 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 5, sufficient_len, rep, ll0, matches, lengthToBeat);
+ case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 4);
+ case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 5);
case 7 :
- case 6 : return ZSTD_insertBtAndGetAllMatches(zc, ip, iHighLimit, extDict, maxNbAttempts, 6, sufficient_len, rep, ll0, matches, lengthToBeat);
+ case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 6);
}
}
@@ -527,36 +663,33 @@ static int ZSTD_literalsContribution_cached(
}
FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
+size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore,
+ U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams,
const void* src, size_t srcSize,
const int optLevel, const int extDict)
{
- seqStore_t* const seqStorePtr = &(ctx->seqStore);
- optState_t* const optStatePtr = &(ctx->optState);
+ optState_t* const optStatePtr = &ms->opt;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart;
const BYTE* anchor = istart;
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 8;
- const BYTE* const base = ctx->base;
- const BYTE* const prefixStart = base + ctx->dictLimit;
+ const BYTE* const base = ms->window.base;
+ const BYTE* const prefixStart = base + ms->window.dictLimit;
- U32 const maxSearches = 1U << ctx->appliedParams.cParams.searchLog;
- U32 const sufficient_len = MIN(ctx->appliedParams.cParams.targetLength, ZSTD_OPT_NUM -1);
- U32 const mls = ctx->appliedParams.cParams.searchLength;
- U32 const minMatch = (ctx->appliedParams.cParams.searchLength == 3) ? 3 : 4;
+ U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+ U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
ZSTD_match_t* const matches = optStatePtr->matchTable;
cachedLiteralPrice_t cachedLitPrice;
- U32 rep[ZSTD_REP_NUM];
/* init */
DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
- ctx->nextToUpdate3 = ctx->nextToUpdate;
+ ms->nextToUpdate3 = ms->nextToUpdate;
ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
ip += (ip==prefixStart);
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=seqStorePtr->rep[i]; }
memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
/* Match Loop */
@@ -567,7 +700,7 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
/* find first match */
{ U32 const litlen = (U32)(ip - anchor);
U32 const ll0 = !litlen;
- U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, ip, iend, extDict, maxSearches, mls, sufficient_len, rep, ll0, matches, minMatch);
+ U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, extDict, rep, ll0, matches, minMatch);
if (!nbMatches) { ip++; continue; }
/* initialize opt[0] */
@@ -653,7 +786,7 @@ size_t ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
- U32 const nbMatches = ZSTD_BtGetAllMatches(ctx, inr, iend, extDict, maxSearches, mls, sufficient_len, opt[cur].rep, ll0, matches, minMatch);
+ U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch);
U32 matchNb;
if (!nbMatches) continue;
@@ -749,37 +882,42 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
}
ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
- ZSTD_storeSeq(seqStorePtr, llen, anchor, offset, mlen-MINMATCH);
+ ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH);
anchor = ip;
} }
ZSTD_setLog2Prices(optStatePtr);
} /* while (ip < ilimit) */
- /* Save reps for next block */
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqStorePtr->repToConfirm[i] = rep[i]; }
-
/* Return the last literals size */
return iend - anchor;
}
-size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btopt(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
}
-size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btultra(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
}
-size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btopt_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
}
-size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
+size_t ZSTD_compressBlock_btultra_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
{
- return ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
+ return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
}
diff --git a/thirdparty/zstd/compress/zstd_opt.h b/thirdparty/zstd/compress/zstd_opt.h
index 82e810c293..b8dc389f31 100644
--- a/thirdparty/zstd/compress/zstd_opt.h
+++ b/thirdparty/zstd/compress/zstd_opt.h
@@ -15,13 +15,25 @@
extern "C" {
#endif
-#include "zstd.h" /* ZSTD_CCtx, size_t */
+#include "zstd_compress_internal.h"
-size_t ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+void ZSTD_updateTree(
+ ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
+ const BYTE* ip, const BYTE* iend); /* used in ZSTD_loadDictionaryContent() */
-size_t ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize);
+size_t ZSTD_compressBlock_btopt(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btopt_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_extDict(
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+ ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
#if defined (__cplusplus)
}
diff --git a/thirdparty/zstd/compress/zstdmt_compress.c b/thirdparty/zstd/compress/zstdmt_compress.c
index e51edf124f..c7a205d8c7 100644
--- a/thirdparty/zstd/compress/zstdmt_compress.c
+++ b/thirdparty/zstd/compress/zstdmt_compress.c
@@ -10,7 +10,8 @@
/* ====== Tuning parameters ====== */
-#define ZSTDMT_NBTHREADS_MAX 200
+#define ZSTDMT_NBWORKERS_MAX 200
+#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (2 GB)) /* note : limited by `jobSize` type, which is `unsigned` */
#define ZSTDMT_OVERLAPLOG_DEFAULT 6
@@ -22,11 +23,18 @@
/* ====== Dependencies ====== */
#include <string.h> /* memcpy, memset */
+#include <limits.h> /* INT_MAX */
#include "pool.h" /* threadpool */
#include "threading.h" /* mutex */
#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
+#include "zstd_ldm.h"
#include "zstdmt_compress.h"
+/* Guards code to support resizing the SeqPool.
+ * We will want to resize the SeqPool to save memory in the future.
+ * Until then, comment the code out since it is unused.
+ */
+#define ZSTD_RESIZE_SEQPOOL 0
/* ====== Debug ====== */
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
@@ -81,7 +89,7 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
typedef struct buffer_s {
void* start;
- size_t size;
+ size_t capacity;
} buffer_t;
static const buffer_t g_nullBuffer = { NULL, 0 };
@@ -95,9 +103,9 @@ typedef struct ZSTDMT_bufferPool_s {
buffer_t bTable[1]; /* variable size */
} ZSTDMT_bufferPool;
-static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbThreads, ZSTD_customMem cMem)
+static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem)
{
- unsigned const maxNbBuffers = 2*nbThreads + 3;
+ unsigned const maxNbBuffers = 2*nbWorkers + 3;
ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_calloc(
sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
if (bufPool==NULL) return NULL;
@@ -129,17 +137,21 @@ static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
{
size_t const poolSize = sizeof(*bufPool)
- + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
+ + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
unsigned u;
size_t totalBufferSize = 0;
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
for (u=0; u<bufPool->totalBuffers; u++)
- totalBufferSize += bufPool->bTable[u].size;
+ totalBufferSize += bufPool->bTable[u].capacity;
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return poolSize + totalBufferSize;
}
+/* ZSTDMT_setBufferSize() :
+ * all future buffers provided by this buffer pool will have _at least_ this size
+ * note : it's better for all buffers to have same size,
+ * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */
static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize)
{
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
@@ -149,7 +161,9 @@ static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const
}
/** ZSTDMT_getBuffer() :
- * assumption : bufPool must be valid */
+ * assumption : bufPool must be valid
+ * @return : a buffer, with start pointer and size
+ * note: allocation may fail, in this case, start==NULL and size==0 */
static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
{
size_t const bSize = bufPool->bufferSize;
@@ -157,12 +171,12 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
if (bufPool->nbBuffers) { /* try to use an existing buffer */
buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
- size_t const availBufferSize = buf.size;
+ size_t const availBufferSize = buf.capacity;
bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
/* large enough, but not too much */
DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
- bufPool->nbBuffers, (U32)buf.size);
+ bufPool->nbBuffers, (U32)buf.capacity);
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return buf;
}
@@ -176,12 +190,42 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
{ buffer_t buffer;
void* const start = ZSTD_malloc(bSize, bufPool->cMem);
buffer.start = start; /* note : start can be NULL if malloc fails ! */
- buffer.size = (start==NULL) ? 0 : bSize;
- DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize);
+ buffer.capacity = (start==NULL) ? 0 : bSize;
+ if (start==NULL) {
+ DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!");
+ } else {
+ DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize);
+ }
return buffer;
}
}
+#if ZSTD_RESIZE_SEQPOOL
+/** ZSTDMT_resizeBuffer() :
+ * assumption : bufPool must be valid
+ * @return : a buffer that is at least the buffer pool buffer size.
+ * If a reallocation happens, the data in the input buffer is copied.
+ */
+static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer)
+{
+ size_t const bSize = bufPool->bufferSize;
+ if (buffer.capacity < bSize) {
+ void* const start = ZSTD_malloc(bSize, bufPool->cMem);
+ buffer_t newBuffer;
+ newBuffer.start = start;
+ newBuffer.capacity = start == NULL ? 0 : bSize;
+ if (start != NULL) {
+ assert(newBuffer.capacity >= buffer.capacity);
+ memcpy(newBuffer.start, buffer.start, buffer.capacity);
+ DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize);
+ return newBuffer;
+ }
+ DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!");
+ }
+ return buffer;
+}
+#endif
+
/* store buffer for later re-use, up to pool capacity */
static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
{
@@ -191,7 +235,7 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
if (bufPool->nbBuffers < bufPool->totalBuffers) {
bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
- (U32)buf.size, (U32)(bufPool->nbBuffers-1));
+ (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
return;
}
@@ -201,21 +245,73 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
ZSTD_free(buf.start, bufPool->cMem);
}
-/* Sets parameters relevant to the compression job, initializing others to
- * default values. Notably, nbThreads should probably be zero. */
-static ZSTD_CCtx_params ZSTDMT_makeJobCCtxParams(ZSTD_CCtx_params const params)
+
+/* ===== Seq Pool Wrapper ====== */
+
+static rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0};
+
+typedef ZSTDMT_bufferPool ZSTDMT_seqPool;
+
+static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool)
+{
+ return ZSTDMT_sizeof_bufferPool(seqPool);
+}
+
+static rawSeqStore_t bufferToSeq(buffer_t buffer)
{
- ZSTD_CCtx_params jobParams;
- memset(&jobParams, 0, sizeof(jobParams));
+ rawSeqStore_t seq = {NULL, 0, 0, 0};
+ seq.seq = (rawSeq*)buffer.start;
+ seq.capacity = buffer.capacity / sizeof(rawSeq);
+ return seq;
+}
- jobParams.cParams = params.cParams;
- jobParams.fParams = params.fParams;
- jobParams.compressionLevel = params.compressionLevel;
+static buffer_t seqToBuffer(rawSeqStore_t seq)
+{
+ buffer_t buffer;
+ buffer.start = seq.seq;
+ buffer.capacity = seq.capacity * sizeof(rawSeq);
+ return buffer;
+}
- jobParams.ldmParams = params.ldmParams;
- return jobParams;
+static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool)
+{
+ if (seqPool->bufferSize == 0) {
+ return kNullRawSeqStore;
+ }
+ return bufferToSeq(ZSTDMT_getBuffer(seqPool));
}
+#if ZSTD_RESIZE_SEQPOOL
+static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
+{
+ return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq)));
+}
+#endif
+
+static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq)
+{
+ ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq));
+}
+
+static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq)
+{
+ ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq));
+}
+
+static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
+{
+ ZSTDMT_seqPool* seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
+ ZSTDMT_setNbSeq(seqPool, 0);
+ return seqPool;
+}
+
+static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool)
+{
+ ZSTDMT_freeBufferPool(seqPool);
+}
+
+
+
/* ===== CCtx Pool ===== */
/* a single CCtx Pool can be invoked from multiple threads in parallel */
@@ -238,23 +334,24 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
}
/* ZSTDMT_createCCtxPool() :
- * implies nbThreads >= 1 , checked by caller ZSTDMT_createCCtx() */
-static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
+ * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */
+static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbWorkers,
ZSTD_customMem cMem)
{
ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_calloc(
- sizeof(ZSTDMT_CCtxPool) + (nbThreads-1)*sizeof(ZSTD_CCtx*), cMem);
+ sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
+ assert(nbWorkers > 0);
if (!cctxPool) return NULL;
if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
ZSTD_free(cctxPool, cMem);
return NULL;
}
cctxPool->cMem = cMem;
- cctxPool->totalCCtx = nbThreads;
+ cctxPool->totalCCtx = nbWorkers;
cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */
cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
- DEBUGLOG(3, "cctxPool created, with %u threads", nbThreads);
+ DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
return cctxPool;
}
@@ -262,15 +359,16 @@ static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(unsigned nbThreads,
static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
{
ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
- { unsigned const nbThreads = cctxPool->totalCCtx;
+ { unsigned const nbWorkers = cctxPool->totalCCtx;
size_t const poolSize = sizeof(*cctxPool)
- + (nbThreads-1)*sizeof(ZSTD_CCtx*);
+ + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
unsigned u;
size_t totalCCtxSize = 0;
- for (u=0; u<nbThreads; u++) {
+ for (u=0; u<nbWorkers; u++) {
totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
}
ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
+ assert(nbWorkers > 0);
return poolSize + totalCCtxSize;
}
}
@@ -297,111 +395,318 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
if (pool->availCCtx < pool->totalCCtx)
pool->cctx[pool->availCCtx++] = cctx;
else {
- /* pool overflow : should not happen, since totalCCtx==nbThreads */
- DEBUGLOG(5, "CCtx pool overflow : free cctx");
+ /* pool overflow : should not happen, since totalCCtx==nbWorkers */
+ DEBUGLOG(4, "CCtx pool overflow : free cctx");
ZSTD_freeCCtx(cctx);
}
ZSTD_pthread_mutex_unlock(&pool->poolMutex);
}
+/* ==== Serial State ==== */
-/* ===== Thread worker ===== */
+typedef struct {
+ void const* start;
+ size_t size;
+} range_t;
typedef struct {
- buffer_t src;
- const void* srcStart;
- size_t prefixSize;
- size_t srcSize;
- buffer_t dstBuff;
- size_t cSize;
- size_t dstFlushed;
- unsigned firstChunk;
- unsigned lastChunk;
- unsigned jobCompleted;
- unsigned jobScanned;
- ZSTD_pthread_mutex_t* jobCompleted_mutex;
- ZSTD_pthread_cond_t* jobCompleted_cond;
+ /* All variables in the struct are protected by mutex. */
+ ZSTD_pthread_mutex_t mutex;
+ ZSTD_pthread_cond_t cond;
ZSTD_CCtx_params params;
- const ZSTD_CDict* cdict;
- ZSTDMT_CCtxPool* cctxPool;
- ZSTDMT_bufferPool* bufPool;
- unsigned long long fullFrameSize;
+ ldmState_t ldmState;
+ XXH64_state_t xxhState;
+ unsigned nextJobID;
+ /* Protects ldmWindow.
+ * Must be acquired after the main mutex when acquiring both.
+ */
+ ZSTD_pthread_mutex_t ldmWindowMutex;
+ ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is udpated */
+ ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
+} serialState_t;
+
+static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params)
+{
+ /* Adjust parameters */
+ if (params.ldmParams.enableLdm) {
+ DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
+ params.ldmParams.windowLog = params.cParams.windowLog;
+ ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
+ assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
+ assert(params.ldmParams.hashEveryLog < 32);
+ serialState->ldmState.hashPower =
+ ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
+ } else {
+ memset(&params.ldmParams, 0, sizeof(params.ldmParams));
+ }
+ serialState->nextJobID = 0;
+ if (params.fParams.checksumFlag)
+ XXH64_reset(&serialState->xxhState, 0);
+ if (params.ldmParams.enableLdm) {
+ ZSTD_customMem cMem = params.customMem;
+ unsigned const hashLog = params.ldmParams.hashLog;
+ size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t);
+ unsigned const bucketLog =
+ params.ldmParams.hashLog - params.ldmParams.bucketSizeLog;
+ size_t const bucketSize = (size_t)1 << bucketLog;
+ unsigned const prevBucketLog =
+ serialState->params.ldmParams.hashLog -
+ serialState->params.ldmParams.bucketSizeLog;
+ /* Size the seq pool tables */
+ ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, params.jobSize));
+ /* Reset the window */
+ ZSTD_window_clear(&serialState->ldmState.window);
+ serialState->ldmWindow = serialState->ldmState.window;
+ /* Resize tables and output space if necessary. */
+ if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) {
+ ZSTD_free(serialState->ldmState.hashTable, cMem);
+ serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_malloc(hashSize, cMem);
+ }
+ if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) {
+ ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
+ serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_malloc(bucketSize, cMem);
+ }
+ if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets)
+ return 1;
+ /* Zero the tables */
+ memset(serialState->ldmState.hashTable, 0, hashSize);
+ memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
+ }
+ serialState->params = params;
+ return 0;
+}
+
+static int ZSTDMT_serialState_init(serialState_t* serialState)
+{
+ int initError = 0;
+ memset(serialState, 0, sizeof(*serialState));
+ initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL);
+ initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL);
+ initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL);
+ initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL);
+ return initError;
+}
+
+static void ZSTDMT_serialState_free(serialState_t* serialState)
+{
+ ZSTD_customMem cMem = serialState->params.customMem;
+ ZSTD_pthread_mutex_destroy(&serialState->mutex);
+ ZSTD_pthread_cond_destroy(&serialState->cond);
+ ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex);
+ ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond);
+ ZSTD_free(serialState->ldmState.hashTable, cMem);
+ ZSTD_free(serialState->ldmState.bucketOffsets, cMem);
+}
+
+static void ZSTDMT_serialState_update(serialState_t* serialState,
+ ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore,
+ range_t src, unsigned jobID)
+{
+ /* Wait for our turn */
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
+ while (serialState->nextJobID < jobID) {
+ ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex);
+ }
+ /* A future job may error and skip our job */
+ if (serialState->nextJobID == jobID) {
+ /* It is now our turn, do any processing necessary */
+ if (serialState->params.ldmParams.enableLdm) {
+ size_t error;
+ assert(seqStore.seq != NULL && seqStore.pos == 0 &&
+ seqStore.size == 0 && seqStore.capacity > 0);
+ ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
+ error = ZSTD_ldm_generateSequences(
+ &serialState->ldmState, &seqStore,
+ &serialState->params.ldmParams, src.start, src.size);
+ /* We provide a large enough buffer to never fail. */
+ assert(!ZSTD_isError(error)); (void)error;
+ /* Update ldmWindow to match the ldmState.window and signal the main
+ * thread if it is waiting for a buffer.
+ */
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
+ serialState->ldmWindow = serialState->ldmState.window;
+ ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
+ ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
+ }
+ if (serialState->params.fParams.checksumFlag && src.size > 0)
+ XXH64_update(&serialState->xxhState, src.start, src.size);
+ }
+ /* Now it is the next jobs turn */
+ serialState->nextJobID++;
+ ZSTD_pthread_cond_broadcast(&serialState->cond);
+ ZSTD_pthread_mutex_unlock(&serialState->mutex);
+
+ if (seqStore.size > 0) {
+ size_t const err = ZSTD_referenceExternalSequences(
+ jobCCtx, seqStore.seq, seqStore.size);
+ assert(serialState->params.ldmParams.enableLdm);
+ assert(!ZSTD_isError(err));
+ (void)err;
+ }
+}
+
+static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState,
+ unsigned jobID, size_t cSize)
+{
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
+ if (serialState->nextJobID <= jobID) {
+ assert(ZSTD_isError(cSize)); (void)cSize;
+ DEBUGLOG(5, "Skipping past job %u because of error", jobID);
+ serialState->nextJobID = jobID + 1;
+ ZSTD_pthread_cond_broadcast(&serialState->cond);
+
+ ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex);
+ ZSTD_window_clear(&serialState->ldmWindow);
+ ZSTD_pthread_cond_signal(&serialState->ldmWindowCond);
+ ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex);
+ }
+ ZSTD_pthread_mutex_unlock(&serialState->mutex);
+
+}
+
+
+/* ------------------------------------------ */
+/* ===== Worker thread ===== */
+/* ------------------------------------------ */
+
+static const range_t kNullRange = { NULL, 0 };
+
+typedef struct {
+ size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */
+ size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */
+ ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */
+ ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */
+ ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */
+ ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */
+ ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */
+ serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */
+ buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */
+ range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */
+ range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */
+ unsigned jobID; /* set by mtctx, then read by worker => no barrier */
+ unsigned firstJob; /* set by mtctx, then read by worker => no barrier */
+ unsigned lastJob; /* set by mtctx, then read by worker => no barrier */
+ ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */
+ const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */
+ unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */
+ size_t dstFlushed; /* used only by mtctx */
+ unsigned frameChecksumNeeded; /* used only by mtctx */
} ZSTDMT_jobDescription;
-/* ZSTDMT_compressChunk() : POOL_function type */
-void ZSTDMT_compressChunk(void* jobDescription)
+/* ZSTDMT_compressionJob() is a POOL_function type */
+void ZSTDMT_compressionJob(void* jobDescription)
{
ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
+ ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
- const void* const src = (const char*)job->srcStart + job->prefixSize;
+ rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
buffer_t dstBuff = job->dstBuff;
- DEBUGLOG(5, "ZSTDMT_compressChunk: job (first:%u) (last:%u) : prefixSize %u, srcSize %u ",
- job->firstChunk, job->lastChunk, (U32)job->prefixSize, (U32)job->srcSize);
+ /* Don't compute the checksum for chunks, since we compute it externally,
+ * but write it in the header.
+ */
+ if (job->jobID != 0) jobParams.fParams.checksumFlag = 0;
+ /* Don't run LDM for the chunks, since we handle it externally */
+ jobParams.ldmParams.enableLdm = 0;
+
+ /* ressources */
if (cctx==NULL) {
job->cSize = ERROR(memory_allocation);
goto _endJob;
}
-
- if (dstBuff.start == NULL) {
+ if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */
dstBuff = ZSTDMT_getBuffer(job->bufPool);
if (dstBuff.start==NULL) {
job->cSize = ERROR(memory_allocation);
goto _endJob;
}
- job->dstBuff = dstBuff;
- DEBUGLOG(5, "ZSTDMT_compressChunk: received dstBuff of size %u", (U32)dstBuff.size);
+ job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
}
+ /* init */
if (job->cdict) {
- size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dm_auto, job->cdict, job->params, job->fullFrameSize);
- DEBUGLOG(4, "ZSTDMT_compressChunk: init using CDict (windowLog=%u)", job->params.cParams.windowLog);
- assert(job->firstChunk); /* only allowed for first job */
+ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
+ assert(job->firstJob); /* only allowed for first job */
if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
} else { /* srcStart points at reloaded section */
- U64 const pledgedSrcSize = job->firstChunk ? job->fullFrameSize : ZSTD_CONTENTSIZE_UNKNOWN;
- ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
- size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstChunk);
- if (ZSTD_isError(forceWindowError)) {
- DEBUGLOG(5, "ZSTD_CCtxParam_setParameter error : %s ", ZSTD_getErrorName(forceWindowError));
- job->cSize = forceWindowError;
- goto _endJob;
- }
- DEBUGLOG(5, "ZSTDMT_compressChunk: invoking ZSTD_compressBegin_advanced_internal with windowLog = %u ", jobParams.cParams.windowLog);
+ U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
+ { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
+ if (ZSTD_isError(forceWindowError)) {
+ job->cSize = forceWindowError;
+ goto _endJob;
+ } }
{ size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
- job->srcStart, job->prefixSize, ZSTD_dm_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
- NULL,
+ job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
+ NULL, /*cdict*/
jobParams, pledgedSrcSize);
if (ZSTD_isError(initError)) {
- DEBUGLOG(5, "ZSTD_compressBegin_advanced_internal error : %s ", ZSTD_getErrorName(initError));
job->cSize = initError;
goto _endJob;
- } }
- }
- if (!job->firstChunk) { /* flush and overwrite frame header when it's not first job */
- size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, 0);
+ } } }
+
+ /* Perform serial step as early as possible, but after CCtx initialization */
+ ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
+
+ if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
+ size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; }
+ DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
ZSTD_invalidateRepCodes(cctx);
}
- DEBUGLOG(5, "Compressing into dstBuff of size %u", (U32)dstBuff.size);
- DEBUG_PRINTHEX(6, job->srcStart, 12);
- job->cSize = (job->lastChunk) ?
- ZSTD_compressEnd (cctx, dstBuff.start, dstBuff.size, src, job->srcSize) :
- ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.size, src, job->srcSize);
- DEBUGLOG(5, "compressed %u bytes into %u bytes (first:%u) (last:%u) ",
- (unsigned)job->srcSize, (unsigned)job->cSize, job->firstChunk, job->lastChunk);
- DEBUGLOG(5, "dstBuff.size : %u ; => %s ", (U32)dstBuff.size, ZSTD_getErrorName(job->cSize));
+ /* compress */
+ { size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX;
+ int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize);
+ const BYTE* ip = (const BYTE*) job->src.start;
+ BYTE* const ostart = (BYTE*)dstBuff.start;
+ BYTE* op = ostart;
+ BYTE* oend = op + dstBuff.capacity;
+ int chunkNb;
+ if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); /* check overflow */
+ DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks);
+ assert(job->cSize == 0);
+ for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
+ size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
+ if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
+ ip += chunkSize;
+ op += cSize; assert(op < oend);
+ /* stats */
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
+ job->cSize += cSize;
+ job->consumed = chunkSize * chunkNb;
+ DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)",
+ (U32)cSize, (U32)job->cSize);
+ ZSTD_pthread_cond_signal(&job->job_cond); /* warns some more data is ready to be flushed */
+ ZSTD_pthread_mutex_unlock(&job->job_mutex);
+ }
+ /* last block */
+ assert(chunkSize > 0); assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */
+ if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) {
+ size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
+ size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
+ size_t const cSize = (job->lastJob) ?
+ ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
+ ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
+ if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
+ /* stats */
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
+ job->cSize += cSize;
+ ZSTD_pthread_mutex_unlock(&job->job_mutex);
+ } }
_endJob:
+ ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize);
+ if (job->prefix.size > 0)
+ DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start);
+ DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start);
+ /* release resources */
+ ZSTDMT_releaseSeq(job->seqPool, rawSeqStore);
ZSTDMT_releaseCCtx(job->cctxPool, cctx);
- ZSTDMT_releaseBuffer(job->bufPool, job->src);
- job->src = g_nullBuffer; job->srcStart = NULL;
- ZSTD_PTHREAD_MUTEX_LOCK(job->jobCompleted_mutex);
- job->jobCompleted = 1;
- job->jobScanned = 0;
- ZSTD_pthread_cond_signal(job->jobCompleted_cond);
- ZSTD_pthread_mutex_unlock(job->jobCompleted_mutex);
+ /* report */
+ ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
+ job->consumed = job->src.size;
+ ZSTD_pthread_cond_signal(&job->job_cond);
+ ZSTD_pthread_mutex_unlock(&job->job_mutex);
}
@@ -410,109 +715,141 @@ _endJob:
/* ------------------------------------------ */
typedef struct {
+ range_t prefix; /* read-only non-owned prefix buffer */
buffer_t buffer;
size_t filled;
} inBuff_t;
+typedef struct {
+ BYTE* buffer; /* The round input buffer. All jobs get references
+ * to pieces of the buffer. ZSTDMT_tryGetInputRange()
+ * handles handing out job input buffers, and makes
+ * sure it doesn't overlap with any pieces still in use.
+ */
+ size_t capacity; /* The capacity of buffer. */
+ size_t pos; /* The position of the current inBuff in the round
+ * buffer. Updated past the end if the inBuff once
+ * the inBuff is sent to the worker thread.
+ * pos <= capacity.
+ */
+} roundBuff_t;
+
+static const roundBuff_t kNullRoundBuff = {NULL, 0, 0};
+
struct ZSTDMT_CCtx_s {
POOL_ctx* factory;
ZSTDMT_jobDescription* jobs;
ZSTDMT_bufferPool* bufPool;
ZSTDMT_CCtxPool* cctxPool;
- ZSTD_pthread_mutex_t jobCompleted_mutex;
- ZSTD_pthread_cond_t jobCompleted_cond;
+ ZSTDMT_seqPool* seqPool;
ZSTD_CCtx_params params;
size_t targetSectionSize;
- size_t inBuffSize;
- size_t dictSize;
- size_t targetDictSize;
+ size_t targetPrefixSize;
+ roundBuff_t roundBuff;
inBuff_t inBuff;
- XXH64_state_t xxhState;
- unsigned singleThreaded;
+ int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create another one. */
+ serialState_t serial;
+ unsigned singleBlockingThread;
unsigned jobIDMask;
unsigned doneJobID;
unsigned nextJobID;
unsigned frameEnded;
unsigned allJobsCompleted;
unsigned long long frameContentSize;
+ unsigned long long consumed;
+ unsigned long long produced;
ZSTD_customMem cMem;
ZSTD_CDict* cdictLocal;
const ZSTD_CDict* cdict;
};
-static ZSTDMT_jobDescription* ZSTDMT_allocJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
+static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem)
+{
+ U32 jobNb;
+ if (jobTable == NULL) return;
+ for (jobNb=0; jobNb<nbJobs; jobNb++) {
+ ZSTD_pthread_mutex_destroy(&jobTable[jobNb].job_mutex);
+ ZSTD_pthread_cond_destroy(&jobTable[jobNb].job_cond);
+ }
+ ZSTD_free(jobTable, cMem);
+}
+
+/* ZSTDMT_allocJobsTable()
+ * allocate and init a job table.
+ * update *nbJobsPtr to next power of 2 value, as size of table */
+static ZSTDMT_jobDescription* ZSTDMT_createJobsTable(U32* nbJobsPtr, ZSTD_customMem cMem)
{
U32 const nbJobsLog2 = ZSTD_highbit32(*nbJobsPtr) + 1;
U32 const nbJobs = 1 << nbJobsLog2;
+ U32 jobNb;
+ ZSTDMT_jobDescription* const jobTable = (ZSTDMT_jobDescription*)
+ ZSTD_calloc(nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
+ int initError = 0;
+ if (jobTable==NULL) return NULL;
*nbJobsPtr = nbJobs;
- return (ZSTDMT_jobDescription*) ZSTD_calloc(
- nbJobs * sizeof(ZSTDMT_jobDescription), cMem);
+ for (jobNb=0; jobNb<nbJobs; jobNb++) {
+ initError |= ZSTD_pthread_mutex_init(&jobTable[jobNb].job_mutex, NULL);
+ initError |= ZSTD_pthread_cond_init(&jobTable[jobNb].job_cond, NULL);
+ }
+ if (initError != 0) {
+ ZSTDMT_freeJobsTable(jobTable, nbJobs, cMem);
+ return NULL;
+ }
+ return jobTable;
}
-/* ZSTDMT_CCtxParam_setNbThreads():
+/* ZSTDMT_CCtxParam_setNbWorkers():
* Internal use only */
-size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads)
+size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
{
- if (nbThreads > ZSTDMT_NBTHREADS_MAX) nbThreads = ZSTDMT_NBTHREADS_MAX;
- if (nbThreads < 1) nbThreads = 1;
- params->nbThreads = nbThreads;
+ if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX;
+ params->nbWorkers = nbWorkers;
params->overlapSizeLog = ZSTDMT_OVERLAPLOG_DEFAULT;
params->jobSize = 0;
- return nbThreads;
-}
-
-/* ZSTDMT_getNbThreads():
- * @return nb threads currently active in mtctx.
- * mtctx must be valid */
-size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx)
-{
- assert(mtctx != NULL);
- return mtctx->params.nbThreads;
+ return nbWorkers;
}
-ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads, ZSTD_customMem cMem)
+ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem)
{
ZSTDMT_CCtx* mtctx;
- U32 nbJobs = nbThreads + 2;
- DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbThreads = %u)", nbThreads);
+ U32 nbJobs = nbWorkers + 2;
+ int initError;
+ DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers);
- if (nbThreads < 1) return NULL;
- nbThreads = MIN(nbThreads , ZSTDMT_NBTHREADS_MAX);
+ if (nbWorkers < 1) return NULL;
+ nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX);
if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL))
/* invalid custom allocator */
return NULL;
mtctx = (ZSTDMT_CCtx*) ZSTD_calloc(sizeof(ZSTDMT_CCtx), cMem);
if (!mtctx) return NULL;
- ZSTDMT_CCtxParam_setNbThreads(&mtctx->params, nbThreads);
+ ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
mtctx->cMem = cMem;
mtctx->allJobsCompleted = 1;
- mtctx->factory = POOL_create_advanced(nbThreads, 0, cMem);
- mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, cMem);
+ mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem);
+ mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem);
+ assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */
mtctx->jobIDMask = nbJobs - 1;
- mtctx->bufPool = ZSTDMT_createBufferPool(nbThreads, cMem);
- mtctx->cctxPool = ZSTDMT_createCCtxPool(nbThreads, cMem);
- if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool) {
+ mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
+ mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem);
+ mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem);
+ initError = ZSTDMT_serialState_init(&mtctx->serial);
+ mtctx->roundBuff = kNullRoundBuff;
+ if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) {
ZSTDMT_freeCCtx(mtctx);
return NULL;
}
- if (ZSTD_pthread_mutex_init(&mtctx->jobCompleted_mutex, NULL)) {
- ZSTDMT_freeCCtx(mtctx);
- return NULL;
- }
- if (ZSTD_pthread_cond_init(&mtctx->jobCompleted_cond, NULL)) {
- ZSTDMT_freeCCtx(mtctx);
- return NULL;
- }
- DEBUGLOG(3, "mt_cctx created, for %u threads", nbThreads);
+ DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers);
return mtctx;
}
-ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads)
+ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers)
{
- return ZSTDMT_createCCtx_advanced(nbThreads, ZSTD_defaultCMem);
+ return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem);
}
+
/* ZSTDMT_releaseAllJobResources() :
* note : ensure all workers are killed first ! */
static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
@@ -523,29 +860,26 @@ static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx)
DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start);
ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
mtctx->jobs[jobID].dstBuff = g_nullBuffer;
- DEBUGLOG(4, "job%02u: release src address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].src.start);
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].src);
- mtctx->jobs[jobID].src = g_nullBuffer;
+ mtctx->jobs[jobID].cSize = 0;
}
memset(mtctx->jobs, 0, (mtctx->jobIDMask+1)*sizeof(ZSTDMT_jobDescription));
- DEBUGLOG(4, "input: release address %08X", (U32)(size_t)mtctx->inBuff.buffer.start);
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer);
mtctx->inBuff.buffer = g_nullBuffer;
+ mtctx->inBuff.filled = 0;
mtctx->allJobsCompleted = 1;
}
-static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* zcs)
+static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx)
{
DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted");
- while (zcs->doneJobID < zcs->nextJobID) {
- unsigned const jobID = zcs->doneJobID & zcs->jobIDMask;
- ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
- while (zcs->jobs[jobID].jobCompleted==0) {
- DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", zcs->doneJobID); /* we want to block when waiting for data to flush */
- ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex);
+ while (mtctx->doneJobID < mtctx->nextJobID) {
+ unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
+ while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
+ DEBUGLOG(5, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */
+ ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
}
- ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
- zcs->doneJobID++;
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
+ mtctx->doneJobID++;
}
}
@@ -554,12 +888,14 @@ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx)
if (mtctx==NULL) return 0; /* compatible with free on NULL */
POOL_free(mtctx->factory); /* stop and free worker threads */
ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */
- ZSTD_free(mtctx->jobs, mtctx->cMem);
+ ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
ZSTDMT_freeBufferPool(mtctx->bufPool);
ZSTDMT_freeCCtxPool(mtctx->cctxPool);
+ ZSTDMT_freeSeqPool(mtctx->seqPool);
+ ZSTDMT_serialState_free(&mtctx->serial);
ZSTD_freeCDict(mtctx->cdictLocal);
- ZSTD_pthread_mutex_destroy(&mtctx->jobCompleted_mutex);
- ZSTD_pthread_cond_destroy(&mtctx->jobCompleted_cond);
+ if (mtctx->roundBuff.buffer)
+ ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
ZSTD_free(mtctx, mtctx->cMem);
return 0;
}
@@ -572,7 +908,9 @@ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx)
+ ZSTDMT_sizeof_bufferPool(mtctx->bufPool)
+ (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription)
+ ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool)
- + ZSTD_sizeof_CDict(mtctx->cdictLocal);
+ + ZSTDMT_sizeof_seqPool(mtctx->seqPool)
+ + ZSTD_sizeof_CDict(mtctx->cdictLocal)
+ + mtctx->roundBuff.capacity;
}
/* Internal only */
@@ -612,133 +950,224 @@ size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter,
}
}
+/* Sets parameters relevant to the compression job,
+ * initializing others to default values. */
+static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
+{
+ ZSTD_CCtx_params jobParams;
+ memset(&jobParams, 0, sizeof(jobParams));
+
+ jobParams.cParams = params.cParams;
+ jobParams.fParams = params.fParams;
+ jobParams.compressionLevel = params.compressionLevel;
+ jobParams.disableLiteralCompression = params.disableLiteralCompression;
+
+ return jobParams;
+}
+
+/*! ZSTDMT_updateCParams_whileCompressing() :
+ * Updates only a selected set of compression parameters, to remain compatible with current frame.
+ * New parameters will be applied to next compression job. */
+void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams)
+{
+ U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */
+ int const compressionLevel = cctxParams->compressionLevel;
+ DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)",
+ compressionLevel);
+ mtctx->params.compressionLevel = compressionLevel;
+ { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0);
+ cParams.windowLog = saved_wlog;
+ mtctx->params.cParams = cParams;
+ }
+}
+
+/* ZSTDMT_getNbWorkers():
+ * @return nb threads currently active in mtctx.
+ * mtctx must be valid */
+unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx)
+{
+ assert(mtctx != NULL);
+ return mtctx->params.nbWorkers;
+}
+
+/* ZSTDMT_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads.
+ * Note : mutex will be acquired during statistics collection. */
+ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
+{
+ ZSTD_frameProgression fps;
+ DEBUGLOG(6, "ZSTDMT_getFrameProgression");
+ fps.consumed = mtctx->consumed;
+ fps.produced = mtctx->produced;
+ fps.ingested = mtctx->consumed + mtctx->inBuff.filled;
+ { unsigned jobNb;
+ unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
+ DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
+ mtctx->doneJobID, lastJobNb, mtctx->jobReady)
+ for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
+ unsigned const wJobID = jobNb & mtctx->jobIDMask;
+ ZSTD_pthread_mutex_lock(&mtctx->jobs[wJobID].job_mutex);
+ { size_t const cResult = mtctx->jobs[wJobID].cSize;
+ size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
+ fps.consumed += mtctx->jobs[wJobID].consumed;
+ fps.ingested += mtctx->jobs[wJobID].src.size;
+ fps.produced += produced;
+ }
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
+ }
+ }
+ return fps;
+}
+
+
/* ------------------------------------------ */
/* ===== Multi-threaded compression ===== */
/* ------------------------------------------ */
-static unsigned computeNbChunks(size_t srcSize, unsigned windowLog, unsigned nbThreads) {
- size_t const chunkSizeTarget = (size_t)1 << (windowLog + 2);
- size_t const chunkMaxSize = chunkSizeTarget << 2;
- size_t const passSizeMax = chunkMaxSize * nbThreads;
- unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
- unsigned const nbChunksLarge = multiplier * nbThreads;
- unsigned const nbChunksMax = (unsigned)(srcSize / chunkSizeTarget) + 1;
- unsigned const nbChunksSmall = MIN(nbChunksMax, nbThreads);
- return (multiplier>1) ? nbChunksLarge : nbChunksSmall;
+static size_t ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params)
+{
+ if (params.ldmParams.enableLdm)
+ return MAX(21, params.cParams.chainLog + 4);
+ return MAX(20, params.cParams.windowLog + 2);
+}
+
+static size_t ZSTDMT_computeOverlapLog(ZSTD_CCtx_params const params)
+{
+ unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
+ if (params.ldmParams.enableLdm)
+ return (MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog);
+ return overlapRLog >= 9 ? 0 : (params.cParams.windowLog - overlapRLog);
}
+static unsigned ZSTDMT_computeNbJobs(ZSTD_CCtx_params params, size_t srcSize, unsigned nbWorkers) {
+ assert(nbWorkers>0);
+ { size_t const jobSizeTarget = (size_t)1 << ZSTDMT_computeTargetJobLog(params);
+ size_t const jobMaxSize = jobSizeTarget << 2;
+ size_t const passSizeMax = jobMaxSize * nbWorkers;
+ unsigned const multiplier = (unsigned)(srcSize / passSizeMax) + 1;
+ unsigned const nbJobsLarge = multiplier * nbWorkers;
+ unsigned const nbJobsMax = (unsigned)(srcSize / jobSizeTarget) + 1;
+ unsigned const nbJobsSmall = MIN(nbJobsMax, nbWorkers);
+ return (multiplier>1) ? nbJobsLarge : nbJobsSmall;
+} }
+
+/* ZSTDMT_compress_advanced_internal() :
+ * This is a blocking function : it will only give back control to caller after finishing its compression job.
+ */
static size_t ZSTDMT_compress_advanced_internal(
ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
- ZSTD_CCtx_params const params)
+ ZSTD_CCtx_params params)
{
- ZSTD_CCtx_params const jobParams = ZSTDMT_makeJobCCtxParams(params);
- unsigned const overlapRLog = (params.overlapSizeLog>9) ? 0 : 9-params.overlapSizeLog;
- size_t const overlapSize = (overlapRLog>=9) ? 0 : (size_t)1 << (params.cParams.windowLog - overlapRLog);
- unsigned nbChunks = computeNbChunks(srcSize, params.cParams.windowLog, params.nbThreads);
- size_t const proposedChunkSize = (srcSize + (nbChunks-1)) / nbChunks;
- size_t const avgChunkSize = (((proposedChunkSize-1) & 0x1FFFF) < 0x7FFF) ? proposedChunkSize + 0xFFFF : proposedChunkSize; /* avoid too small last block */
+ ZSTD_CCtx_params const jobParams = ZSTDMT_initJobCCtxParams(params);
+ size_t const overlapSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
+ unsigned const nbJobs = ZSTDMT_computeNbJobs(params, srcSize, params.nbWorkers);
+ size_t const proposedJobSize = (srcSize + (nbJobs-1)) / nbJobs;
+ size_t const avgJobSize = (((proposedJobSize-1) & 0x1FFFF) < 0x7FFF) ? proposedJobSize + 0xFFFF : proposedJobSize; /* avoid too small last block */
const char* const srcStart = (const char*)src;
size_t remainingSrcSize = srcSize;
- unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbChunks : (unsigned)(dstCapacity / ZSTD_compressBound(avgChunkSize)); /* presumes avgChunkSize >= 256 KB, which should be the case */
+ unsigned const compressWithinDst = (dstCapacity >= ZSTD_compressBound(srcSize)) ? nbJobs : (unsigned)(dstCapacity / ZSTD_compressBound(avgJobSize)); /* presumes avgJobSize >= 256 KB, which should be the case */
size_t frameStartPos = 0, dstBufferPos = 0;
- XXH64_state_t xxh64;
- assert(jobParams.nbThreads == 0);
- assert(mtctx->cctxPool->totalCCtx == params.nbThreads);
+ assert(jobParams.nbWorkers == 0);
+ assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
- DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbChunks=%2u (rawSize=%u bytes; fixedSize=%u) ",
- nbChunks, (U32)proposedChunkSize, (U32)avgChunkSize);
- if (nbChunks==1) { /* fallback to single-thread mode */
+ params.jobSize = (U32)avgJobSize;
+ DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: nbJobs=%2u (rawSize=%u bytes; fixedSize=%u) ",
+ nbJobs, (U32)proposedJobSize, (U32)avgJobSize);
+
+ if ((nbJobs==1) | (params.nbWorkers<=1)) { /* fallback to single-thread mode : this is a blocking invocation anyway */
ZSTD_CCtx* const cctx = mtctx->cctxPool->cctx[0];
+ DEBUGLOG(4, "ZSTDMT_compress_advanced_internal: fallback to single-thread mode");
if (cdict) return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, jobParams.fParams);
return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, NULL, 0, jobParams);
}
- assert(avgChunkSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), which is required for compressWithinDst */
- ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgChunkSize) );
- XXH64_reset(&xxh64, 0);
- if (nbChunks > mtctx->jobIDMask+1) { /* enlarge job table */
- U32 nbJobs = nbChunks;
- ZSTD_free(mtctx->jobs, mtctx->cMem);
+ assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
+ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
+ return ERROR(memory_allocation);
+
+ if (nbJobs > mtctx->jobIDMask+1) { /* enlarge job table */
+ U32 jobsTableSize = nbJobs;
+ ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
mtctx->jobIDMask = 0;
- mtctx->jobs = ZSTDMT_allocJobsTable(&nbJobs, mtctx->cMem);
+ mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem);
if (mtctx->jobs==NULL) return ERROR(memory_allocation);
- mtctx->jobIDMask = nbJobs - 1;
+ assert((jobsTableSize != 0) && ((jobsTableSize & (jobsTableSize - 1)) == 0)); /* ensure jobsTableSize is a power of 2 */
+ mtctx->jobIDMask = jobsTableSize - 1;
}
{ unsigned u;
- for (u=0; u<nbChunks; u++) {
- size_t const chunkSize = MIN(remainingSrcSize, avgChunkSize);
- size_t const dstBufferCapacity = ZSTD_compressBound(chunkSize);
+ for (u=0; u<nbJobs; u++) {
+ size_t const jobSize = MIN(remainingSrcSize, avgJobSize);
+ size_t const dstBufferCapacity = ZSTD_compressBound(jobSize);
buffer_t const dstAsBuffer = { (char*)dst + dstBufferPos, dstBufferCapacity };
buffer_t const dstBuffer = u < compressWithinDst ? dstAsBuffer : g_nullBuffer;
size_t dictSize = u ? overlapSize : 0;
- mtctx->jobs[u].src = g_nullBuffer;
- mtctx->jobs[u].srcStart = srcStart + frameStartPos - dictSize;
- mtctx->jobs[u].prefixSize = dictSize;
- mtctx->jobs[u].srcSize = chunkSize;
+ mtctx->jobs[u].prefix.start = srcStart + frameStartPos - dictSize;
+ mtctx->jobs[u].prefix.size = dictSize;
+ mtctx->jobs[u].src.start = srcStart + frameStartPos;
+ mtctx->jobs[u].src.size = jobSize; assert(jobSize > 0); /* avoid job.src.size == 0 */
+ mtctx->jobs[u].consumed = 0;
+ mtctx->jobs[u].cSize = 0;
mtctx->jobs[u].cdict = (u==0) ? cdict : NULL;
mtctx->jobs[u].fullFrameSize = srcSize;
mtctx->jobs[u].params = jobParams;
/* do not calculate checksum within sections, but write it in header for first section */
- if (u!=0) mtctx->jobs[u].params.fParams.checksumFlag = 0;
mtctx->jobs[u].dstBuff = dstBuffer;
mtctx->jobs[u].cctxPool = mtctx->cctxPool;
mtctx->jobs[u].bufPool = mtctx->bufPool;
- mtctx->jobs[u].firstChunk = (u==0);
- mtctx->jobs[u].lastChunk = (u==nbChunks-1);
- mtctx->jobs[u].jobCompleted = 0;
- mtctx->jobs[u].jobCompleted_mutex = &mtctx->jobCompleted_mutex;
- mtctx->jobs[u].jobCompleted_cond = &mtctx->jobCompleted_cond;
-
- if (params.fParams.checksumFlag) {
- XXH64_update(&xxh64, srcStart + frameStartPos, chunkSize);
- }
+ mtctx->jobs[u].seqPool = mtctx->seqPool;
+ mtctx->jobs[u].serial = &mtctx->serial;
+ mtctx->jobs[u].jobID = u;
+ mtctx->jobs[u].firstJob = (u==0);
+ mtctx->jobs[u].lastJob = (u==nbJobs-1);
- DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)chunkSize);
- DEBUG_PRINTHEX(6, mtctx->jobs[u].srcStart, 12);
- POOL_add(mtctx->factory, ZSTDMT_compressChunk, &mtctx->jobs[u]);
+ DEBUGLOG(5, "ZSTDMT_compress_advanced_internal: posting job %u (%u bytes)", u, (U32)jobSize);
+ DEBUG_PRINTHEX(6, mtctx->jobs[u].prefix.start, 12);
+ POOL_add(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[u]);
- frameStartPos += chunkSize;
+ frameStartPos += jobSize;
dstBufferPos += dstBufferCapacity;
- remainingSrcSize -= chunkSize;
+ remainingSrcSize -= jobSize;
} }
/* collect result */
{ size_t error = 0, dstPos = 0;
- unsigned chunkID;
- for (chunkID=0; chunkID<nbChunks; chunkID++) {
- DEBUGLOG(5, "waiting for chunk %u ", chunkID);
- ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobCompleted_mutex);
- while (mtctx->jobs[chunkID].jobCompleted==0) {
- DEBUGLOG(5, "waiting for jobCompleted signal from chunk %u", chunkID);
- ZSTD_pthread_cond_wait(&mtctx->jobCompleted_cond, &mtctx->jobCompleted_mutex);
+ unsigned jobID;
+ for (jobID=0; jobID<nbJobs; jobID++) {
+ DEBUGLOG(5, "waiting for job %u ", jobID);
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
+ while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
+ DEBUGLOG(5, "waiting for jobCompleted signal from job %u", jobID);
+ ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
}
- ZSTD_pthread_mutex_unlock(&mtctx->jobCompleted_mutex);
- DEBUGLOG(5, "ready to write chunk %u ", chunkID);
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
+ DEBUGLOG(5, "ready to write job %u ", jobID);
- mtctx->jobs[chunkID].srcStart = NULL;
- { size_t const cSize = mtctx->jobs[chunkID].cSize;
+ { size_t const cSize = mtctx->jobs[jobID].cSize;
if (ZSTD_isError(cSize)) error = cSize;
if ((!error) && (dstPos + cSize > dstCapacity)) error = ERROR(dstSize_tooSmall);
- if (chunkID) { /* note : chunk 0 is written directly at dst, which is correct position */
+ if (jobID) { /* note : job 0 is written directly at dst, which is correct position */
if (!error)
- memmove((char*)dst + dstPos, mtctx->jobs[chunkID].dstBuff.start, cSize); /* may overlap when chunk compressed within dst */
- if (chunkID >= compressWithinDst) { /* chunk compressed into its own buffer, which must be released */
- DEBUGLOG(5, "releasing buffer %u>=%u", chunkID, compressWithinDst);
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[chunkID].dstBuff);
+ memmove((char*)dst + dstPos, mtctx->jobs[jobID].dstBuff.start, cSize); /* may overlap when job compressed within dst */
+ if (jobID >= compressWithinDst) { /* job compressed into its own buffer, which must be released */
+ DEBUGLOG(5, "releasing buffer %u>=%u", jobID, compressWithinDst);
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff);
} }
- mtctx->jobs[chunkID].dstBuff = g_nullBuffer;
+ mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+ mtctx->jobs[jobID].cSize = 0;
dstPos += cSize ;
}
- } /* for (chunkID=0; chunkID<nbChunks; chunkID++) */
+ } /* for (jobID=0; jobID<nbJobs; jobID++) */
DEBUGLOG(4, "checksumFlag : %u ", params.fParams.checksumFlag);
if (params.fParams.checksumFlag) {
- U32 const checksum = (U32)XXH64_digest(&xxh64);
+ U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
if (dstPos + 4 > dstCapacity) {
error = ERROR(dstSize_tooSmall);
} else {
@@ -756,7 +1185,7 @@ size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
- ZSTD_parameters const params,
+ ZSTD_parameters params,
unsigned overlapLog)
{
ZSTD_CCtx_params cctxParams = mtctx->params;
@@ -787,66 +1216,104 @@ size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
/* ====================================== */
size_t ZSTDMT_initCStream_internal(
- ZSTDMT_CCtx* zcs,
- const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
+ ZSTDMT_CCtx* mtctx,
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
unsigned long long pledgedSrcSize)
{
- DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u, disableLiteralCompression=%i)",
+ (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx, params.disableLiteralCompression);
/* params are supposed to be fully validated at this point */
assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
assert(!((dict) && (cdict))); /* either dict or cdict, not both */
- assert(zcs->cctxPool->totalCCtx == params.nbThreads);
- zcs->singleThreaded = (params.nbThreads==1) | (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
-
- if (zcs->singleThreaded) {
- ZSTD_CCtx_params const singleThreadParams = ZSTDMT_makeJobCCtxParams(params);
- DEBUGLOG(4, "single thread mode");
- assert(singleThreadParams.nbThreads == 0);
- return ZSTD_initCStream_internal(zcs->cctxPool->cctx[0],
+ assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
+
+ /* init */
+ if (params.jobSize == 0) {
+ params.jobSize = 1U << ZSTDMT_computeTargetJobLog(params);
+ }
+ if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
+
+ mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
+ if (mtctx->singleBlockingThread) {
+ ZSTD_CCtx_params const singleThreadParams = ZSTDMT_initJobCCtxParams(params);
+ DEBUGLOG(5, "ZSTDMT_initCStream_internal: switch to single blocking thread mode");
+ assert(singleThreadParams.nbWorkers == 0);
+ return ZSTD_initCStream_internal(mtctx->cctxPool->cctx[0],
dict, dictSize, cdict,
singleThreadParams, pledgedSrcSize);
}
- DEBUGLOG(4, "multi-threading mode (%u threads)", params.nbThreads);
- if (zcs->allJobsCompleted == 0) { /* previous compression not correctly finished */
- ZSTDMT_waitForAllJobsCompleted(zcs);
- ZSTDMT_releaseAllJobResources(zcs);
- zcs->allJobsCompleted = 1;
+ DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers);
+
+ if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */
+ ZSTDMT_waitForAllJobsCompleted(mtctx);
+ ZSTDMT_releaseAllJobResources(mtctx);
+ mtctx->allJobsCompleted = 1;
}
- zcs->params = params;
- zcs->frameContentSize = pledgedSrcSize;
+ mtctx->params = params;
+ mtctx->frameContentSize = pledgedSrcSize;
if (dict) {
- ZSTD_freeCDict(zcs->cdictLocal);
- zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
- ZSTD_dlm_byCopy, dictMode, /* note : a loadPrefix becomes an internal CDict */
- params.cParams, zcs->cMem);
- zcs->cdict = zcs->cdictLocal;
- if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
+ ZSTD_freeCDict(mtctx->cdictLocal);
+ mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize,
+ ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */
+ params.cParams, mtctx->cMem);
+ mtctx->cdict = mtctx->cdictLocal;
+ if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation);
} else {
- ZSTD_freeCDict(zcs->cdictLocal);
- zcs->cdictLocal = NULL;
- zcs->cdict = cdict;
+ ZSTD_freeCDict(mtctx->cdictLocal);
+ mtctx->cdictLocal = NULL;
+ mtctx->cdict = cdict;
}
- assert(params.overlapSizeLog <= 9);
- zcs->targetDictSize = (params.overlapSizeLog==0) ? 0 : (size_t)1 << (params.cParams.windowLog - (9 - params.overlapSizeLog));
- DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(zcs->targetDictSize>>10));
- zcs->targetSectionSize = params.jobSize ? params.jobSize : (size_t)1 << (params.cParams.windowLog + 2);
- if (zcs->targetSectionSize < ZSTDMT_JOBSIZE_MIN) zcs->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
- if (zcs->targetSectionSize < zcs->targetDictSize) zcs->targetSectionSize = zcs->targetDictSize; /* job size must be >= overlap size */
- DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(zcs->targetSectionSize>>10), params.jobSize);
- zcs->inBuffSize = zcs->targetDictSize + zcs->targetSectionSize;
- DEBUGLOG(4, "inBuff Size : %u KB", (U32)(zcs->inBuffSize>>10));
- ZSTDMT_setBufferSize(zcs->bufPool, MAX(zcs->inBuffSize, ZSTD_compressBound(zcs->targetSectionSize)) );
- zcs->inBuff.buffer = g_nullBuffer;
- zcs->dictSize = 0;
- zcs->doneJobID = 0;
- zcs->nextJobID = 0;
- zcs->frameEnded = 0;
- zcs->allJobsCompleted = 0;
- if (params.fParams.checksumFlag) XXH64_reset(&zcs->xxhState, 0);
+ mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
+ DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
+ mtctx->targetSectionSize = params.jobSize;
+ if (mtctx->targetSectionSize < ZSTDMT_JOBSIZE_MIN) mtctx->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
+ if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
+ DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
+ DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
+ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize));
+ {
+ /* If ldm is enabled we need windowSize space. */
+ size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0;
+ /* Two buffers of slack, plus extra space for the overlap
+ * This is the minimum slack that LDM works with. One extra because
+ * flush might waste up to targetSectionSize-1 bytes. Another extra
+ * for the overlap (if > 0), then one to fill which doesn't overlap
+ * with the LDM window.
+ */
+ size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0);
+ size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers;
+ /* Compute the total size, and always have enough slack */
+ size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1);
+ size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers;
+ size_t const capacity = MAX(windowSize, sectionsSize) + slackSize;
+ if (mtctx->roundBuff.capacity < capacity) {
+ if (mtctx->roundBuff.buffer)
+ ZSTD_free(mtctx->roundBuff.buffer, mtctx->cMem);
+ mtctx->roundBuff.buffer = (BYTE*)ZSTD_malloc(capacity, mtctx->cMem);
+ if (mtctx->roundBuff.buffer == NULL) {
+ mtctx->roundBuff.capacity = 0;
+ return ERROR(memory_allocation);
+ }
+ mtctx->roundBuff.capacity = capacity;
+ }
+ }
+ DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10));
+ mtctx->roundBuff.pos = 0;
+ mtctx->inBuff.buffer = g_nullBuffer;
+ mtctx->inBuff.filled = 0;
+ mtctx->inBuff.prefix = kNullRange;
+ mtctx->doneJobID = 0;
+ mtctx->nextJobID = 0;
+ mtctx->frameEnded = 0;
+ mtctx->allJobsCompleted = 0;
+ mtctx->consumed = 0;
+ mtctx->produced = 0;
+ if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
+ return ERROR(memory_allocation);
return 0;
}
@@ -855,11 +1322,11 @@ size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
ZSTD_parameters params,
unsigned long long pledgedSrcSize)
{
- ZSTD_CCtx_params cctxParams = mtctx->params;
- DEBUGLOG(5, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
+ ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */
+ DEBUGLOG(4, "ZSTDMT_initCStream_advanced (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
cctxParams.cParams = params.cParams;
cctxParams.fParams = params.fParams;
- return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dm_auto, NULL,
+ return ZSTDMT_initCStream_internal(mtctx, dict, dictSize, ZSTD_dct_auto, NULL,
cctxParams, pledgedSrcSize);
}
@@ -869,10 +1336,10 @@ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
unsigned long long pledgedSrcSize)
{
ZSTD_CCtx_params cctxParams = mtctx->params;
+ if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
cctxParams.cParams = ZSTD_getCParamsFromCDict(cdict);
cctxParams.fParams = fParams;
- if (cdict==NULL) return ERROR(dictionary_wrong); /* method incompatible with NULL cdict */
- return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dm_auto, cdict,
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0 /*dictSize*/, ZSTD_dct_auto, cdict,
cctxParams, pledgedSrcSize);
}
@@ -881,148 +1348,358 @@ size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
* pledgedSrcSize can be zero == unknown (for the time being)
* prefer using ZSTD_CONTENTSIZE_UNKNOWN,
* as `0` might mean "empty" in the future */
-size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* zcs, unsigned long long pledgedSrcSize)
+size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize)
{
if (!pledgedSrcSize) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
- if (zcs->params.nbThreads==1)
- return ZSTD_resetCStream(zcs->cctxPool->cctx[0], pledgedSrcSize);
- return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, 0, zcs->params,
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, 0, mtctx->params,
pledgedSrcSize);
}
-size_t ZSTDMT_initCStream(ZSTDMT_CCtx* zcs, int compressionLevel) {
- ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, 0);
- ZSTD_CCtx_params cctxParams = zcs->params;
+size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel) {
+ ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0);
+ ZSTD_CCtx_params cctxParams = mtctx->params; /* retrieve sticky params */
+ DEBUGLOG(4, "ZSTDMT_initCStream (cLevel=%i)", compressionLevel);
cctxParams.cParams = params.cParams;
cctxParams.fParams = params.fParams;
- return ZSTDMT_initCStream_internal(zcs, NULL, 0, ZSTD_dm_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
-}
-
-
-static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* zcs, size_t srcSize, unsigned endFrame)
-{
- unsigned const jobID = zcs->nextJobID & zcs->jobIDMask;
-
- DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
- zcs->nextJobID, (U32)srcSize, (U32)zcs->dictSize);
- zcs->jobs[jobID].src = zcs->inBuff.buffer;
- zcs->jobs[jobID].srcStart = zcs->inBuff.buffer.start;
- zcs->jobs[jobID].srcSize = srcSize;
- zcs->jobs[jobID].prefixSize = zcs->dictSize;
- assert(zcs->inBuff.filled >= srcSize + zcs->dictSize);
- zcs->jobs[jobID].params = zcs->params;
- /* do not calculate checksum within sections, but write it in header for first section */
- if (zcs->nextJobID) zcs->jobs[jobID].params.fParams.checksumFlag = 0;
- zcs->jobs[jobID].cdict = zcs->nextJobID==0 ? zcs->cdict : NULL;
- zcs->jobs[jobID].fullFrameSize = zcs->frameContentSize;
- zcs->jobs[jobID].dstBuff = g_nullBuffer;
- zcs->jobs[jobID].cctxPool = zcs->cctxPool;
- zcs->jobs[jobID].bufPool = zcs->bufPool;
- zcs->jobs[jobID].firstChunk = (zcs->nextJobID==0);
- zcs->jobs[jobID].lastChunk = endFrame;
- zcs->jobs[jobID].jobCompleted = 0;
- zcs->jobs[jobID].dstFlushed = 0;
- zcs->jobs[jobID].jobCompleted_mutex = &zcs->jobCompleted_mutex;
- zcs->jobs[jobID].jobCompleted_cond = &zcs->jobCompleted_cond;
-
- if (zcs->params.fParams.checksumFlag)
- XXH64_update(&zcs->xxhState, (const char*)zcs->inBuff.buffer.start + zcs->dictSize, srcSize);
-
- /* get a new buffer for next input */
- if (!endFrame) {
- size_t const newDictSize = MIN(srcSize + zcs->dictSize, zcs->targetDictSize);
- zcs->inBuff.buffer = ZSTDMT_getBuffer(zcs->bufPool);
- if (zcs->inBuff.buffer.start == NULL) { /* not enough memory to allocate next input buffer */
- zcs->jobs[jobID].jobCompleted = 1;
- zcs->nextJobID++;
- ZSTDMT_waitForAllJobsCompleted(zcs);
- ZSTDMT_releaseAllJobResources(zcs);
- return ERROR(memory_allocation);
+ return ZSTDMT_initCStream_internal(mtctx, NULL, 0, ZSTD_dct_auto, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+
+/* ZSTDMT_writeLastEmptyBlock()
+ * Write a single empty block with an end-of-frame to finish a frame.
+ * Job must be created from streaming variant.
+ * This function is always successfull if expected conditions are fulfilled.
+ */
+static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job)
+{
+ assert(job->lastJob == 1);
+ assert(job->src.size == 0); /* last job is empty -> will be simplified into a last empty block */
+ assert(job->firstJob == 0); /* cannot be first job, as it also needs to create frame header */
+ assert(job->dstBuff.start == NULL); /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */
+ job->dstBuff = ZSTDMT_getBuffer(job->bufPool);
+ if (job->dstBuff.start == NULL) {
+ job->cSize = ERROR(memory_allocation);
+ return;
+ }
+ assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); /* no buffer should ever be that small */
+ job->src = kNullRange;
+ job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity);
+ assert(!ZSTD_isError(job->cSize));
+ assert(job->consumed == 0);
+}
+
+static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp)
+{
+ unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask;
+ int const endFrame = (endOp == ZSTD_e_end);
+
+ if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) {
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full");
+ assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask));
+ return 0;
+ }
+
+ if (!mtctx->jobReady) {
+ BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start;
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ",
+ mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size);
+ mtctx->jobs[jobID].src.start = src;
+ mtctx->jobs[jobID].src.size = srcSize;
+ assert(mtctx->inBuff.filled >= srcSize);
+ mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix;
+ mtctx->jobs[jobID].consumed = 0;
+ mtctx->jobs[jobID].cSize = 0;
+ mtctx->jobs[jobID].params = mtctx->params;
+ mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL;
+ mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize;
+ mtctx->jobs[jobID].dstBuff = g_nullBuffer;
+ mtctx->jobs[jobID].cctxPool = mtctx->cctxPool;
+ mtctx->jobs[jobID].bufPool = mtctx->bufPool;
+ mtctx->jobs[jobID].seqPool = mtctx->seqPool;
+ mtctx->jobs[jobID].serial = &mtctx->serial;
+ mtctx->jobs[jobID].jobID = mtctx->nextJobID;
+ mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0);
+ mtctx->jobs[jobID].lastJob = endFrame;
+ mtctx->jobs[jobID].frameChecksumNeeded = endFrame && (mtctx->nextJobID>0) && mtctx->params.fParams.checksumFlag;
+ mtctx->jobs[jobID].dstFlushed = 0;
+
+ /* Update the round buffer pos and clear the input buffer to be reset */
+ mtctx->roundBuff.pos += srcSize;
+ mtctx->inBuff.buffer = g_nullBuffer;
+ mtctx->inBuff.filled = 0;
+ /* Set the prefix */
+ if (!endFrame) {
+ size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize);
+ mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize;
+ mtctx->inBuff.prefix.size = newPrefixSize;
+ } else { /* endFrame==1 => no need for another input buffer */
+ mtctx->inBuff.prefix = kNullRange;
+ mtctx->frameEnded = endFrame;
+ if (mtctx->nextJobID == 0) {
+ /* single job exception : checksum is already calculated directly within worker thread */
+ mtctx->params.fParams.checksumFlag = 0;
+ } }
+
+ if ( (srcSize == 0)
+ && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) {
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame");
+ assert(endOp == ZSTD_e_end); /* only possible case : need to end the frame with an empty last block */
+ ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID);
+ mtctx->nextJobID++;
+ return 0;
}
- zcs->inBuff.filled -= srcSize + zcs->dictSize - newDictSize;
- memmove(zcs->inBuff.buffer.start,
- (const char*)zcs->jobs[jobID].srcStart + zcs->dictSize + srcSize - newDictSize,
- zcs->inBuff.filled);
- zcs->dictSize = newDictSize;
- } else { /* if (endFrame==1) */
- zcs->inBuff.buffer = g_nullBuffer;
- zcs->inBuff.filled = 0;
- zcs->dictSize = 0;
- zcs->frameEnded = 1;
- if (zcs->nextJobID == 0) {
- /* single chunk exception : checksum is calculated directly within worker thread */
- zcs->params.fParams.checksumFlag = 0;
- } }
+ }
- DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u) (note : doneJob = %u=>%u)",
- zcs->nextJobID,
- (U32)zcs->jobs[jobID].srcSize,
- zcs->jobs[jobID].lastChunk,
- zcs->doneJobID,
- zcs->doneJobID & zcs->jobIDMask);
- POOL_add(zcs->factory, ZSTDMT_compressChunk, &zcs->jobs[jobID]); /* this call is blocking when thread worker pool is exhausted */
- zcs->nextJobID++;
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))",
+ mtctx->nextJobID,
+ (U32)mtctx->jobs[jobID].src.size,
+ mtctx->jobs[jobID].lastJob,
+ mtctx->nextJobID,
+ jobID);
+ if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) {
+ mtctx->nextJobID++;
+ mtctx->jobReady = 0;
+ } else {
+ DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID);
+ mtctx->jobReady = 1;
+ }
return 0;
}
-/* ZSTDMT_flushNextJob() :
- * output : will be updated with amount of data flushed .
- * blockToFlush : if >0, the function will block and wait if there is no data available to flush .
- * @return : amount of data remaining within internal buffer, 1 if unknown but > 0, 0 if no more, or an error code */
-static size_t ZSTDMT_flushNextJob(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, unsigned blockToFlush)
+/*! ZSTDMT_flushProduced() :
+ * `output` : `pos` will be updated with amount of data flushed .
+ * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
+ * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
+static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end)
{
- unsigned const wJobID = zcs->doneJobID & zcs->jobIDMask;
- DEBUGLOG(5, "ZSTDMT_flushNextJob");
- if (zcs->doneJobID == zcs->nextJobID) return 0; /* all flushed ! */
- ZSTD_PTHREAD_MUTEX_LOCK(&zcs->jobCompleted_mutex);
- while (zcs->jobs[wJobID].jobCompleted==0) {
- DEBUGLOG(5, "waiting for jobCompleted signal from job %u", zcs->doneJobID);
- if (!blockToFlush) { ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex); return 0; } /* nothing ready to be flushed => skip */
- ZSTD_pthread_cond_wait(&zcs->jobCompleted_cond, &zcs->jobCompleted_mutex); /* block when nothing available to flush */
+ unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask;
+ DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)",
+ blockToFlush, mtctx->doneJobID, mtctx->nextJobID);
+ assert(output->size >= output->pos);
+
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
+ if ( blockToFlush
+ && (mtctx->doneJobID < mtctx->nextJobID) ) {
+ assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize);
+ while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { /* nothing to flush */
+ if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) {
+ DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none",
+ mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size);
+ break;
+ }
+ DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)",
+ mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
+ ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); /* block when nothing to flush but some to come */
+ } }
+
+ /* try to flush something */
+ { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */
+ size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */
+ size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
+ if (ZSTD_isError(cSize)) {
+ DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
+ mtctx->doneJobID, ZSTD_getErrorName(cSize));
+ ZSTDMT_waitForAllJobsCompleted(mtctx);
+ ZSTDMT_releaseAllJobResources(mtctx);
+ return cSize;
+ }
+ /* add frame checksum if necessary (can only happen once) */
+ assert(srcConsumed <= srcSize);
+ if ( (srcConsumed == srcSize) /* job completed -> worker no longer active */
+ && mtctx->jobs[wJobID].frameChecksumNeeded ) {
+ U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState);
+ DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum);
+ MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum);
+ cSize += 4;
+ mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */
+ mtctx->jobs[wJobID].frameChecksumNeeded = 0;
+ }
+ if (cSize > 0) { /* compression is ongoing or completed */
+ size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
+ DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
+ (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize);
+ assert(mtctx->doneJobID < mtctx->nextJobID);
+ assert(cSize >= mtctx->jobs[wJobID].dstFlushed);
+ assert(mtctx->jobs[wJobID].dstBuff.start != NULL);
+ memcpy((char*)output->dst + output->pos,
+ (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed,
+ toFlush);
+ output->pos += toFlush;
+ mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
+
+ if ( (srcConsumed == srcSize) /* job completed */
+ && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */
+ DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
+ mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
+ ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
+ mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
+ mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */
+ mtctx->consumed += srcSize;
+ mtctx->produced += cSize;
+ mtctx->doneJobID++;
+ } }
+
+ /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */
+ if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed);
+ if (srcSize > srcConsumed) return 1; /* current job not completely compressed */
}
- ZSTD_pthread_mutex_unlock(&zcs->jobCompleted_mutex);
- /* compression job completed : output can be flushed */
- { ZSTDMT_jobDescription job = zcs->jobs[wJobID];
- if (!job.jobScanned) {
- if (ZSTD_isError(job.cSize)) {
- DEBUGLOG(5, "job %u : compression error detected : %s",
- zcs->doneJobID, ZSTD_getErrorName(job.cSize));
- ZSTDMT_waitForAllJobsCompleted(zcs);
- ZSTDMT_releaseAllJobResources(zcs);
- return job.cSize;
+ if (mtctx->doneJobID < mtctx->nextJobID) return 1; /* some more jobs ongoing */
+ if (mtctx->jobReady) return 1; /* one job is ready to push, just not yet in the list */
+ if (mtctx->inBuff.filled > 0) return 1; /* input is not empty, and still needs to be converted into a job */
+ mtctx->allJobsCompleted = mtctx->frameEnded; /* all jobs are entirely flushed => if this one is last one, frame is completed */
+ if (end == ZSTD_e_end) return !mtctx->frameEnded; /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */
+ return 0; /* internal buffers fully flushed */
+}
+
+/**
+ * Returns the range of data used by the earliest job that is not yet complete.
+ * If the data of the first job is broken up into two segments, we cover both
+ * sections.
+ */
+static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx)
+{
+ unsigned const firstJobID = mtctx->doneJobID;
+ unsigned const lastJobID = mtctx->nextJobID;
+ unsigned jobID;
+
+ for (jobID = firstJobID; jobID < lastJobID; ++jobID) {
+ unsigned const wJobID = jobID & mtctx->jobIDMask;
+ size_t consumed;
+
+ ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex);
+ consumed = mtctx->jobs[wJobID].consumed;
+ ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
+
+ if (consumed < mtctx->jobs[wJobID].src.size) {
+ range_t range = mtctx->jobs[wJobID].prefix;
+ if (range.size == 0) {
+ /* Empty prefix */
+ range = mtctx->jobs[wJobID].src;
}
- DEBUGLOG(5, "zcs->params.fParams.checksumFlag : %u ", zcs->params.fParams.checksumFlag);
- if (zcs->params.fParams.checksumFlag) {
- if (zcs->frameEnded && (zcs->doneJobID+1 == zcs->nextJobID)) { /* write checksum at end of last section */
- U32 const checksum = (U32)XXH64_digest(&zcs->xxhState);
- DEBUGLOG(5, "writing checksum : %08X \n", checksum);
- MEM_writeLE32((char*)job.dstBuff.start + job.cSize, checksum);
- job.cSize += 4;
- zcs->jobs[wJobID].cSize += 4;
- } }
- zcs->jobs[wJobID].jobScanned = 1;
+ /* Job source in multiple segments not supported yet */
+ assert(range.start <= mtctx->jobs[wJobID].src.start);
+ return range;
}
- { size_t const toWrite = MIN(job.cSize - job.dstFlushed, output->size - output->pos);
- DEBUGLOG(5, "Flushing %u bytes from job %u ", (U32)toWrite, zcs->doneJobID);
- memcpy((char*)output->dst + output->pos, (const char*)job.dstBuff.start + job.dstFlushed, toWrite);
- output->pos += toWrite;
- job.dstFlushed += toWrite;
+ }
+ return kNullRange;
+}
+
+/**
+ * Returns non-zero iff buffer and range overlap.
+ */
+static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range)
+{
+ BYTE const* const bufferStart = (BYTE const*)buffer.start;
+ BYTE const* const bufferEnd = bufferStart + buffer.capacity;
+ BYTE const* const rangeStart = (BYTE const*)range.start;
+ BYTE const* const rangeEnd = rangeStart + range.size;
+
+ if (rangeStart == NULL || bufferStart == NULL)
+ return 0;
+ /* Empty ranges cannot overlap */
+ if (bufferStart == bufferEnd || rangeStart == rangeEnd)
+ return 0;
+
+ return bufferStart < rangeEnd && rangeStart < bufferEnd;
+}
+
+static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window)
+{
+ range_t extDict;
+ range_t prefix;
+
+ extDict.start = window.dictBase + window.lowLimit;
+ extDict.size = window.dictLimit - window.lowLimit;
+
+ prefix.start = window.base + window.dictLimit;
+ prefix.size = window.nextSrc - (window.base + window.dictLimit);
+ DEBUGLOG(5, "extDict [0x%zx, 0x%zx)",
+ (size_t)extDict.start,
+ (size_t)extDict.start + extDict.size);
+ DEBUGLOG(5, "prefix [0x%zx, 0x%zx)",
+ (size_t)prefix.start,
+ (size_t)prefix.start + prefix.size);
+
+ return ZSTDMT_isOverlapped(buffer, extDict)
+ || ZSTDMT_isOverlapped(buffer, prefix);
+}
+
+static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer)
+{
+ if (mtctx->params.ldmParams.enableLdm) {
+ ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
+ DEBUGLOG(5, "source [0x%zx, 0x%zx)",
+ (size_t)buffer.start,
+ (size_t)buffer.start + buffer.capacity);
+ ZSTD_PTHREAD_MUTEX_LOCK(mutex);
+ while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) {
+ DEBUGLOG(6, "Waiting for LDM to finish...");
+ ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex);
}
- if (job.dstFlushed == job.cSize) { /* output buffer fully flushed => move to next one */
- ZSTDMT_releaseBuffer(zcs->bufPool, job.dstBuff);
- zcs->jobs[wJobID].dstBuff = g_nullBuffer;
- zcs->jobs[wJobID].jobCompleted = 0;
- zcs->doneJobID++;
- } else {
- zcs->jobs[wJobID].dstFlushed = job.dstFlushed;
+ DEBUGLOG(6, "Done waiting for LDM to finish");
+ ZSTD_pthread_mutex_unlock(mutex);
+ }
+}
+
+/**
+ * Attempts to set the inBuff to the next section to fill.
+ * If any part of the new section is still in use we give up.
+ * Returns non-zero if the buffer is filled.
+ */
+static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx)
+{
+ range_t const inUse = ZSTDMT_getInputDataInUse(mtctx);
+ size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos;
+ size_t const target = mtctx->targetSectionSize;
+ buffer_t buffer;
+
+ assert(mtctx->inBuff.buffer.start == NULL);
+ assert(mtctx->roundBuff.capacity >= target);
+
+ if (spaceLeft < target) {
+ /* ZSTD_invalidateRepCodes() doesn't work for extDict variants.
+ * Simply copy the prefix to the beginning in that case.
+ */
+ BYTE* const start = (BYTE*)mtctx->roundBuff.buffer;
+ size_t const prefixSize = mtctx->inBuff.prefix.size;
+
+ buffer.start = start;
+ buffer.capacity = prefixSize;
+ if (ZSTDMT_isOverlapped(buffer, inUse)) {
+ DEBUGLOG(6, "Waiting for buffer...");
+ return 0;
}
- /* return value : how many bytes left in buffer ; fake it to 1 if unknown but >0 */
- if (job.cSize > job.dstFlushed) return (job.cSize - job.dstFlushed);
- if (zcs->doneJobID < zcs->nextJobID) return 1; /* still some buffer to flush */
- zcs->allJobsCompleted = zcs->frameEnded; /* frame completed and entirely flushed */
- return 0; /* everything flushed */
-} }
+ ZSTDMT_waitForLdmComplete(mtctx, buffer);
+ memmove(start, mtctx->inBuff.prefix.start, prefixSize);
+ mtctx->inBuff.prefix.start = start;
+ mtctx->roundBuff.pos = prefixSize;
+ }
+ buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos;
+ buffer.capacity = target;
+
+ if (ZSTDMT_isOverlapped(buffer, inUse)) {
+ DEBUGLOG(6, "Waiting for buffer...");
+ return 0;
+ }
+ assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix));
+
+ ZSTDMT_waitForLdmComplete(mtctx, buffer);
+
+ DEBUGLOG(5, "Using prefix range [%zx, %zx)",
+ (size_t)mtctx->inBuff.prefix.start,
+ (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size);
+ DEBUGLOG(5, "Using source range [%zx, %zx)",
+ (size_t)buffer.start,
+ (size_t)buffer.start + buffer.capacity);
+
+
+ mtctx->inBuff.buffer = buffer;
+ mtctx->inBuff.filled = 0;
+ assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity);
+ return 1;
+}
/** ZSTDMT_compressStream_generic() :
@@ -1034,13 +1711,13 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp)
{
- size_t const newJobThreshold = mtctx->dictSize + mtctx->targetSectionSize;
unsigned forwardInputProgress = 0;
- DEBUGLOG(5, "ZSTDMT_compressStream_generic ");
+ DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)",
+ (U32)endOp, (U32)(input->size - input->pos));
assert(output->pos <= output->size);
assert(input->pos <= input->size);
- if (mtctx->singleThreaded) { /* delegate to single-thread (synchronous) */
+ if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */
return ZSTD_compressStream_generic(mtctx->cctxPool->cctx[0], output, input, endOp);
}
@@ -1050,10 +1727,11 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
}
/* single-pass shortcut (note : synchronous-mode) */
- if ( (mtctx->nextJobID == 0) /* just started */
- && (mtctx->inBuff.filled == 0) /* nothing buffered */
- && (endOp == ZSTD_e_end) /* end order */
- && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough room */
+ if ( (mtctx->nextJobID == 0) /* just started */
+ && (mtctx->inBuff.filled == 0) /* nothing buffered */
+ && (!mtctx->jobReady) /* no job already created */
+ && (endOp == ZSTD_e_end) /* end order */
+ && (output->size - output->pos >= ZSTD_compressBound(input->size - input->pos)) ) { /* enough space in dst */
size_t const cSize = ZSTDMT_compress_advanced_internal(mtctx,
(char*)output->dst + output->pos, output->size - output->pos,
(const char*)input->src + input->pos, input->size - input->pos,
@@ -1061,89 +1739,93 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
if (ZSTD_isError(cSize)) return cSize;
input->pos = input->size;
output->pos += cSize;
- ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->inBuff.buffer); /* was allocated in initStream */
mtctx->allJobsCompleted = 1;
mtctx->frameEnded = 1;
return 0;
}
/* fill input buffer */
- if (input->size > input->pos) { /* support NULL input */
+ if ( (!mtctx->jobReady)
+ && (input->size > input->pos) ) { /* support NULL input */
if (mtctx->inBuff.buffer.start == NULL) {
- mtctx->inBuff.buffer = ZSTDMT_getBuffer(mtctx->bufPool); /* note : may fail, in which case, no forward input progress */
- mtctx->inBuff.filled = 0;
+ assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */
+ if (!ZSTDMT_tryGetInputRange(mtctx)) {
+ /* It is only possible for this operation to fail if there are
+ * still compression jobs ongoing.
+ */
+ assert(mtctx->doneJobID != mtctx->nextJobID);
+ }
}
- if (mtctx->inBuff.buffer.start) {
- size_t const toLoad = MIN(input->size - input->pos, mtctx->inBuffSize - mtctx->inBuff.filled);
- DEBUGLOG(5, "inBuff:%08X; inBuffSize=%u; ToCopy=%u", (U32)(size_t)mtctx->inBuff.buffer.start, (U32)mtctx->inBuffSize, (U32)toLoad);
+ if (mtctx->inBuff.buffer.start != NULL) {
+ size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
+ assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize);
+ DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u",
+ (U32)toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize);
memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, toLoad);
input->pos += toLoad;
mtctx->inBuff.filled += toLoad;
forwardInputProgress = toLoad>0;
- } }
+ }
+ if ((input->pos < input->size) && (endOp == ZSTD_e_end))
+ endOp = ZSTD_e_flush; /* can't end now : not all input consumed */
+ }
- if ( (mtctx->inBuff.filled >= newJobThreshold) /* filled enough : let's compress */
- && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) { /* avoid overwriting job round buffer */
- CHECK_F( ZSTDMT_createCompressionJob(mtctx, mtctx->targetSectionSize, 0 /* endFrame */) );
+ if ( (mtctx->jobReady)
+ || (mtctx->inBuff.filled >= mtctx->targetSectionSize) /* filled enough : let's compress */
+ || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0)) /* something to flush : let's go */
+ || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */
+ size_t const jobSize = mtctx->inBuff.filled;
+ assert(mtctx->inBuff.filled <= mtctx->targetSectionSize);
+ CHECK_F( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) );
}
/* check for potential compressed data ready to be flushed */
- CHECK_F( ZSTDMT_flushNextJob(mtctx, output, !forwardInputProgress /* blockToFlush */) ); /* block if there was no forward input progress */
-
- if (input->pos < input->size) /* input not consumed : do not flush yet */
- endOp = ZSTD_e_continue;
-
- switch(endOp)
- {
- case ZSTD_e_flush:
- return ZSTDMT_flushStream(mtctx, output);
- case ZSTD_e_end:
- return ZSTDMT_endStream(mtctx, output);
- case ZSTD_e_continue:
- return 1;
- default:
- return ERROR(GENERIC); /* invalid endDirective */
+ { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */
+ if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */
+ return remainingToFlush;
}
}
-size_t ZSTDMT_compressStream(ZSTDMT_CCtx* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
{
- CHECK_F( ZSTDMT_compressStream_generic(zcs, output, input, ZSTD_e_continue) );
+ CHECK_F( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) );
/* recommended next input size : fill current input buffer */
- return zcs->inBuffSize - zcs->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
+ return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */
}
-static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned endFrame)
+static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_EndDirective endFrame)
{
- size_t const srcSize = mtctx->inBuff.filled - mtctx->dictSize;
+ size_t const srcSize = mtctx->inBuff.filled;
DEBUGLOG(5, "ZSTDMT_flushStream_internal");
- if ( ((srcSize > 0) || (endFrame && !mtctx->frameEnded))
- && (mtctx->nextJobID <= mtctx->doneJobID + mtctx->jobIDMask) ) {
- DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job");
+ if ( mtctx->jobReady /* one job ready for a worker to pick up */
+ || (srcSize > 0) /* still some data within input buffer */
+ || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */
+ DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)",
+ (U32)srcSize, (U32)endFrame);
CHECK_F( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) );
}
/* check if there is any data available to flush */
- return ZSTDMT_flushNextJob(mtctx, output, 1 /* blockToFlush */);
+ return ZSTDMT_flushProduced(mtctx, output, 1 /* blockToFlush */, endFrame);
}
size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
{
DEBUGLOG(5, "ZSTDMT_flushStream");
- if (mtctx->singleThreaded)
+ if (mtctx->singleBlockingThread)
return ZSTD_flushStream(mtctx->cctxPool->cctx[0], output);
- return ZSTDMT_flushStream_internal(mtctx, output, 0 /* endFrame */);
+ return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_flush);
}
size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output)
{
DEBUGLOG(4, "ZSTDMT_endStream");
- if (mtctx->singleThreaded)
+ if (mtctx->singleBlockingThread)
return ZSTD_endStream(mtctx->cctxPool->cctx[0], output);
- return ZSTDMT_flushStream_internal(mtctx, output, 1 /* endFrame */);
+ return ZSTDMT_flushStream_internal(mtctx, output, ZSTD_e_end);
}
diff --git a/thirdparty/zstd/compress/zstdmt_compress.h b/thirdparty/zstd/compress/zstdmt_compress.h
index d12f0adb8d..f79e3b4418 100644
--- a/thirdparty/zstd/compress/zstdmt_compress.h
+++ b/thirdparty/zstd/compress/zstdmt_compress.h
@@ -30,15 +30,15 @@
/* === Memory management === */
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
-ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
-ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads,
+ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
+ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
ZSTD_customMem cMem);
ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
-/* === Simple buffer-to-butter one-pass function === */
+/* === Simple one-pass compression function === */
ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
@@ -50,7 +50,7 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
/* === Streaming functions === */
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
-ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it may change in the future, to mean "empty" */
+ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
@@ -68,7 +68,7 @@ ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
const ZSTD_CDict* cdict,
- ZSTD_parameters const params,
+ ZSTD_parameters params,
unsigned overlapLog);
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
@@ -85,7 +85,7 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
typedef enum {
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
- ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
+ ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
} ZSTDMT_parameter;
/* ZSTDMT_setMTCtxParameter() :
@@ -97,30 +97,46 @@ ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
/*! ZSTDMT_compressStream_generic() :
- * Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream()
+ * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
* depending on flush directive.
* @return : minimum amount of data still to be flushed
* 0 if fully flushed
- * or an error code */
+ * or an error code
+ * note : needs to be init using any ZSTD_initCStream*() variant */
ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
ZSTD_outBuffer* output,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
-/* === Private definitions; never ever use directly === */
+/* ========================================================
+ * === Private interface, for use by ZSTD_compress.c ===
+ * === Not exposed in libzstd. Never invoke directly ===
+ * ======================================================== */
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
-/* ZSTDMT_CCtxParam_setNbThreads()
- * Set nbThreads, and clamp it correctly,
- * also reset jobSize and overlapLog */
-size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads);
+/* ZSTDMT_CCtxParam_setNbWorkers()
+ * Set nbWorkers, and clamp it.
+ * Also reset jobSize and overlapLog */
+size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
-/* ZSTDMT_getNbThreads():
+/*! ZSTDMT_updateCParams_whileCompressing() :
+ * Updates only a selected set of compression parameters, to remain compatible with current frame.
+ * New parameters will be applied to next compression job. */
+void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
+
+/* ZSTDMT_getNbWorkers():
* @return nb threads currently active in mtctx.
* mtctx must be valid */
-size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx);
+unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
+
+/* ZSTDMT_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads.
+ */
+ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
+
/*! ZSTDMT_initCStream_internal() :
* Private use only. Init streaming operation.
@@ -128,7 +144,7 @@ size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx);
* must receive dict, or cdict, or none, but not both.
* @return : 0, or an error code */
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
- const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
const ZSTD_CDict* cdict,
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
diff --git a/thirdparty/zstd/decompress/huf_decompress.c b/thirdparty/zstd/decompress/huf_decompress.c
index 79ded96bf6..73f5c46c06 100644
--- a/thirdparty/zstd/decompress/huf_decompress.c
+++ b/thirdparty/zstd/decompress/huf_decompress.c
@@ -49,18 +49,19 @@
****************************************************************/
#define HUF_isError ERR_isError
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
+#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
/* **************************************************************
* Byte alignment for workSpace management
****************************************************************/
-#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
/*-***************************/
/* generic DTableDesc */
/*-***************************/
-
typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
@@ -74,7 +75,6 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
/*-***************************/
/* single-symbol decoding */
/*-***************************/
-
typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
@@ -94,10 +94,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
- if ((spaceUsed32 << 2) > wkspSize)
- return ERROR(tableLog_tooLarge);
- workSpace = (U32 *)workSpace + spaceUsed32;
- wkspSize -= (spaceUsed32 << 2);
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
@@ -144,8 +141,10 @@ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
workSpace, sizeof(workSpace));
}
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
-static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
{
size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
BYTE const c = dt[val].byte;
@@ -156,7 +155,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
*ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
@@ -164,30 +163,33 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
if (MEM_64bits()) \
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
-HINT_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
+HINT_INLINE size_t
+HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
{
BYTE* const pStart = p;
/* up to 4 symbols at a time */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) {
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
}
- /* closer to the end */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+ /* [0-3] symbols remaining */
+ if (MEM_32bits())
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
- /* no more data to retrieve from bitstream, hence no need to reload */
+ /* no more data to retrieve from bitstream, no need to reload */
while (p < pEnd)
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
return pEnd-pStart;
}
-static size_t HUF_decompress1X2_usingDTable_internal(
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X2_usingDTable_internal_body(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
@@ -200,58 +202,17 @@ static size_t HUF_decompress1X2_usingDTable_internal(
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
- { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
- if (HUF_isError(errorCode)) return errorCode; }
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
- /* check */
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
return dstSize;
}
-size_t HUF_decompress1X2_usingDTable(
- void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- const HUF_DTable* DTable)
-{
- DTableDesc dtd = HUF_getDTableDesc(DTable);
- if (dtd.tableType != 0) return ERROR(GENERIC);
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- void* workSpace, size_t wkspSize)
-{
- const BYTE* ip = (const BYTE*) cSrc;
-
- size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
- if (HUF_isError(hSize)) return hSize;
- if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
- ip += hSize; cSrcSize -= hSize;
-
- return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
-}
-
-
-size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize)
-{
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
- return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
- workSpace, sizeof(workSpace));
-}
-
-size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
-{
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
- return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
-}
-
-
-static size_t HUF_decompress4X2_usingDTable_internal(
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X2_usingDTable_internal_body(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
@@ -286,23 +247,19 @@ static size_t HUF_decompress4X2_usingDTable_internal(
BYTE* op2 = opStart2;
BYTE* op3 = opStart3;
BYTE* op4 = opStart4;
- U32 endSignal;
+ U32 endSignal = BIT_DStream_unfinished;
DTableDesc const dtd = HUF_getDTableDesc(DTable);
U32 const dtLog = dtd.tableLog;
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
- { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
- if (HUF_isError(errorCode)) return errorCode; }
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
- /* 16-32 symbols per loop (4-8 symbols per stream) */
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
+ while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
@@ -319,10 +276,15 @@ static size_t HUF_decompress4X2_usingDTable_internal(
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ BIT_reloadDStream(&bitD1);
+ BIT_reloadDStream(&bitD2);
+ BIT_reloadDStream(&bitD3);
+ BIT_reloadDStream(&bitD4);
}
/* check corruption */
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
if (op1 > opStart2) return ERROR(corruption_detected);
if (op2 > opStart3) return ERROR(corruption_detected);
if (op3 > opStart4) return ERROR(corruption_detected);
@@ -335,8 +297,8 @@ static size_t HUF_decompress4X2_usingDTable_internal(
HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
/* check */
- endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
- if (!endSignal) return ERROR(corruption_detected);
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+ if (!endCheck) return ERROR(corruption_detected); }
/* decoded size */
return dstSize;
@@ -344,30 +306,309 @@ static size_t HUF_decompress4X2_usingDTable_internal(
}
-size_t HUF_decompress4X2_usingDTable(
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+ memcpy(op, dt+val, 2);
+ BIT_skipBits(DStream, dt[val].nbBits);
+ return dt[val].length;
+}
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
+{
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
+ memcpy(op, dt+val, 1);
+ if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+ else {
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+ BIT_skipBits(DStream, dt[val].nbBits);
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
+ } }
+ return 1;
+}
+
+#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
+ if (MEM_64bits()) \
+ ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
+
+HINT_INLINE size_t
+HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+ const HUF_DEltX4* const dt, const U32 dtLog)
+{
+ BYTE* const pStart = p;
+
+ /* up to 8 symbols at a time */
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+ HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+ HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
+ HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+ }
+
+ /* closer to end : up to 2 symbols at a time */
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
+
+ while (p <= pEnd-2)
+ HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
+
+ if (p < pEnd)
+ p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
+
+ return p-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X4_usingDTable_internal_body(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ BIT_DStream_t bitD;
+
+ /* Init */
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+ /* decode */
+ { BYTE* const ostart = (BYTE*) dst;
+ BYTE* const oend = ostart + dstSize;
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
+ const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
+ }
+
+ /* check */
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+ /* decoded size */
+ return dstSize;
+}
+
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X4_usingDTable_internal_body(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
+
+ { const BYTE* const istart = (const BYTE*) cSrc;
+ BYTE* const ostart = (BYTE*) dst;
+ BYTE* const oend = ostart + dstSize;
+ const void* const dtPtr = DTable+1;
+ const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
+
+ /* Init */
+ BIT_DStream_t bitD1;
+ BIT_DStream_t bitD2;
+ BIT_DStream_t bitD3;
+ BIT_DStream_t bitD4;
+ size_t const length1 = MEM_readLE16(istart);
+ size_t const length2 = MEM_readLE16(istart+2);
+ size_t const length3 = MEM_readLE16(istart+4);
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
+ const BYTE* const istart2 = istart1 + length1;
+ const BYTE* const istart3 = istart2 + length2;
+ const BYTE* const istart4 = istart3 + length3;
+ size_t const segmentSize = (dstSize+3) / 4;
+ BYTE* const opStart2 = ostart + segmentSize;
+ BYTE* const opStart3 = opStart2 + segmentSize;
+ BYTE* const opStart4 = opStart3 + segmentSize;
+ BYTE* op1 = ostart;
+ BYTE* op2 = opStart2;
+ BYTE* op3 = opStart3;
+ BYTE* op4 = opStart4;
+ U32 endSignal;
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ U32 const dtLog = dtd.tableLog;
+
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
+ HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
+ HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
+ HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
+ HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
+ HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
+ HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
+
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
+ }
+
+ /* check corruption */
+ if (op1 > opStart2) return ERROR(corruption_detected);
+ if (op2 > opStart3) return ERROR(corruption_detected);
+ if (op3 > opStart4) return ERROR(corruption_detected);
+ /* note : op4 already verified within main loop */
+
+ /* finish bitStreams one by one */
+ HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
+ HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
+ HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
+ HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
+
+ /* check */
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+ if (!endCheck) return ERROR(corruption_detected); }
+
+ /* decoded size */
+ return dstSize;
+ }
+}
+
+
+typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+ const void *cSrc,
+ size_t cSrcSize,
+ const HUF_DTable *DTable);
+#if DYNAMIC_BMI2
+
+#define X(fn) \
+ \
+ static size_t fn##_default( \
+ void* dst, size_t dstSize, \
+ const void* cSrc, size_t cSrcSize, \
+ const HUF_DTable* DTable) \
+ { \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ \
+ static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
+ void* dst, size_t dstSize, \
+ const void* cSrc, size_t cSrcSize, \
+ const HUF_DTable* DTable) \
+ { \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ \
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
+ { \
+ if (bmi2) { \
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
+ } \
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
+ }
+
+#else
+
+#define X(fn) \
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
+ { \
+ (void)bmi2; \
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
+ }
+
+#endif
+
+X(HUF_decompress1X2_usingDTable_internal)
+X(HUF_decompress4X2_usingDTable_internal)
+X(HUF_decompress1X4_usingDTable_internal)
+X(HUF_decompress4X4_usingDTable_internal)
+
+#undef X
+
+
+size_t HUF_decompress1X2_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
const HUF_DTable* DTable)
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 0) return ERROR(GENERIC);
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
-
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
void* workSpace, size_t wkspSize)
{
const BYTE* ip = (const BYTE*) cSrc;
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize)
+{
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
+ workSpace, sizeof(workSpace));
+}
+
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
+{
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
+ return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
+}
+
+size_t HUF_decompress4X2_usingDTable(
+ void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ const HUF_DTable* DTable)
+{
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
+ if (dtd.tableType != 0) return ERROR(GENERIC);
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize, int bmi2)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
workSpace, wkspSize);
if (HUF_isError(hSize)) return hSize;
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
- return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
}
@@ -387,8 +628,6 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
/* *************************/
/* double-symbols decoding */
/* *************************/
-typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
-
typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
/* HUF_fillDTableX4Level2() :
@@ -508,10 +747,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
- if ((spaceUsed32 << 2) > wkspSize)
- return ERROR(tableLog_tooLarge);
- workSpace = (U32 *)workSpace + spaceUsed32;
- wkspSize -= (spaceUsed32 << 2);
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
rankStart = rankStart0 + 1;
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
@@ -588,95 +824,6 @@ size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
workSpace, sizeof(workSpace));
}
-static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
-{
- size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
- memcpy(op, dt+val, 2);
- BIT_skipBits(DStream, dt[val].nbBits);
- return dt[val].length;
-}
-
-static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
-{
- size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
- memcpy(op, dt+val, 1);
- if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
- else {
- if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
- BIT_skipBits(DStream, dt[val].nbBits);
- if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
- /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
- DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
- } }
- return 1;
-}
-
-
-#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
- if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
- if (MEM_64bits()) \
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
-{
- BYTE* const pStart = p;
-
- /* up to 8 symbols at a time */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
- HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
- }
-
- /* closer to end : up to 2 symbols at a time */
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
-
- while (p <= pEnd-2)
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
-
- if (p < pEnd)
- p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
-
- return p-pStart;
-}
-
-
-static size_t HUF_decompress1X4_usingDTable_internal(
- void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- const HUF_DTable* DTable)
-{
- BIT_DStream_t bitD;
-
- /* Init */
- { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
- if (HUF_isError(errorCode)) return errorCode;
- }
-
- /* decode */
- { BYTE* const ostart = (BYTE*) dst;
- BYTE* const oend = ostart + dstSize;
- const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
- HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
- }
-
- /* check */
- if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
-
- /* decoded size */
- return dstSize;
-}
-
size_t HUF_decompress1X4_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@@ -684,7 +831,7 @@ size_t HUF_decompress1X4_usingDTable(
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 1) return ERROR(GENERIC);
- return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+ return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
@@ -699,7 +846,7 @@ size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
- return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
+ return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
}
@@ -717,99 +864,6 @@ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
}
-static size_t HUF_decompress4X4_usingDTable_internal(
- void* dst, size_t dstSize,
- const void* cSrc, size_t cSrcSize,
- const HUF_DTable* DTable)
-{
- if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
-
- { const BYTE* const istart = (const BYTE*) cSrc;
- BYTE* const ostart = (BYTE*) dst;
- BYTE* const oend = ostart + dstSize;
- const void* const dtPtr = DTable+1;
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
-
- /* Init */
- BIT_DStream_t bitD1;
- BIT_DStream_t bitD2;
- BIT_DStream_t bitD3;
- BIT_DStream_t bitD4;
- size_t const length1 = MEM_readLE16(istart);
- size_t const length2 = MEM_readLE16(istart+2);
- size_t const length3 = MEM_readLE16(istart+4);
- size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
- const BYTE* const istart1 = istart + 6; /* jumpTable */
- const BYTE* const istart2 = istart1 + length1;
- const BYTE* const istart3 = istart2 + length2;
- const BYTE* const istart4 = istart3 + length3;
- size_t const segmentSize = (dstSize+3) / 4;
- BYTE* const opStart2 = ostart + segmentSize;
- BYTE* const opStart3 = opStart2 + segmentSize;
- BYTE* const opStart4 = opStart3 + segmentSize;
- BYTE* op1 = ostart;
- BYTE* op2 = opStart2;
- BYTE* op3 = opStart3;
- BYTE* op4 = opStart4;
- U32 endSignal;
- DTableDesc const dtd = HUF_getDTableDesc(DTable);
- U32 const dtLog = dtd.tableLog;
-
- if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
- { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
- if (HUF_isError(errorCode)) return errorCode; }
- { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
- if (HUF_isError(errorCode)) return errorCode; }
-
- /* 16-32 symbols per loop (4-8 symbols per stream) */
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
- HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
- HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
- HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
- HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
- HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
-
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
- }
-
- /* check corruption */
- if (op1 > opStart2) return ERROR(corruption_detected);
- if (op2 > opStart3) return ERROR(corruption_detected);
- if (op3 > opStart4) return ERROR(corruption_detected);
- /* note : op4 already verified within main loop */
-
- /* finish bitStreams one by one */
- HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
- HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
- HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
- HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
-
- /* check */
- { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
- if (!endCheck) return ERROR(corruption_detected); }
-
- /* decoded size */
- return dstSize;
- }
-}
-
-
size_t HUF_decompress4X4_usingDTable(
void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
@@ -817,13 +871,12 @@ size_t HUF_decompress4X4_usingDTable(
{
DTableDesc dtd = HUF_getDTableDesc(DTable);
if (dtd.tableType != 1) return ERROR(GENERIC);
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
+ return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
-
-size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
const void* cSrc, size_t cSrcSize,
- void* workSpace, size_t wkspSize)
+ void* workSpace, size_t wkspSize, int bmi2)
{
const BYTE* ip = (const BYTE*) cSrc;
@@ -833,7 +886,14 @@ size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
ip += hSize; cSrcSize -= hSize;
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
+ return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+ const void* cSrc, size_t cSrcSize,
+ void* workSpace, size_t wkspSize)
+{
+ return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
}
@@ -861,8 +921,8 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
const HUF_DTable* DTable)
{
DTableDesc const dtd = HUF_getDTableDesc(DTable);
- return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
- HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+ return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+ HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@@ -870,8 +930,8 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
const HUF_DTable* DTable)
{
DTableDesc const dtd = HUF_getDTableDesc(DTable);
- return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
- HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
+ return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+ HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
}
@@ -898,21 +958,22 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
};
/** HUF_selectDecoder() :
-* Tells which decoder is likely to decode faster,
-* based on a set of pre-determined metrics.
-* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-* Assumption : 0 < cSrcSize, dstSize <= 128 KB */
+ * Tells which decoder is likely to decode faster,
+ * based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
+ * Assumption : 0 < dstSize <= 128 KB */
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
{
+ assert(dstSize > 0);
+ assert(dstSize <= 128 KB);
/* decoder timing evaluation */
- U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
- U32 const D256 = (U32)(dstSize >> 8);
- U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
- U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
- DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
-
- return DTime1 < DTime0;
-}
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
+ U32 const D256 = (U32)(dstSize >> 8);
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
+ return DTime1 < DTime0;
+} }
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
@@ -994,3 +1055,42 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
workSpace, sizeof(workSpace));
}
+
+
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+ HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+ const BYTE* ip = (const BYTE*) cSrc;
+
+ size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
+ if (HUF_isError(hSize)) return hSize;
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+ ip += hSize; cSrcSize -= hSize;
+
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
+ return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+ HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+}
+
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+ /* validation checks */
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
+ if (cSrcSize == 0) return ERROR(corruption_detected);
+
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+ return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+ HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+ }
+}
diff --git a/thirdparty/zstd/decompress/zstd_decompress.c b/thirdparty/zstd/decompress/zstd_decompress.c
index a59d944112..3ec6a1cb32 100644
--- a/thirdparty/zstd/decompress/zstd_decompress.c
+++ b/thirdparty/zstd/decompress/zstd_decompress.c
@@ -14,8 +14,9 @@
*****************************************************************/
/*!
* HEAPMODE :
- * Select how default decompression function ZSTD_decompress() will allocate memory,
- * in memory stack (0), or in memory heap (1, requires malloc())
+ * Select how default decompression function ZSTD_decompress() allocates its context,
+ * on stack (0), or into heap (1, default; requires malloc()).
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
*/
#ifndef ZSTD_HEAPMODE
# define ZSTD_HEAPMODE 1
@@ -23,17 +24,18 @@
/*!
* LEGACY_SUPPORT :
-* if set to 1, ZSTD_decompress() can decode older formats (v0.1+)
+* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
*/
#ifndef ZSTD_LEGACY_SUPPORT
# define ZSTD_LEGACY_SUPPORT 0
#endif
/*!
-* MAXWINDOWSIZE_DEFAULT :
-* maximum window size accepted by DStream, by default.
-* Frames requiring more memory will be rejected.
-*/
+ * MAXWINDOWSIZE_DEFAULT :
+ * maximum window size accepted by DStream __by default__.
+ * Frames requiring more memory will be rejected.
+ * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
+ */
#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1)
#endif
@@ -43,6 +45,7 @@
* Dependencies
*********************************************************/
#include <string.h> /* memcpy, memmove, memset */
+#include "cpu.h"
#include "mem.h" /* low level memory routines */
#define FSE_STATIC_LINKING_ONLY
#include "fse.h"
@@ -80,10 +83,25 @@ typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
typedef enum { zdss_init=0, zdss_loadHeader,
zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+typedef struct {
+ U32 fastMode;
+ U32 tableLog;
+} ZSTD_seqSymbol_header;
+
+typedef struct {
+ U16 nextState;
+ BYTE nbAdditionalBits;
+ BYTE nbBits;
+ U32 baseValue;
+} ZSTD_seqSymbol;
+
+#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
+
typedef struct {
- FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
- FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
- FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
+ ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];
+ ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];
+ ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
U32 rep[ZSTD_REP_NUM];
@@ -91,9 +109,9 @@ typedef struct {
struct ZSTD_DCtx_s
{
- const FSE_DTable* LLTptr;
- const FSE_DTable* MLTptr;
- const FSE_DTable* OFTptr;
+ const ZSTD_seqSymbol* LLTptr;
+ const ZSTD_seqSymbol* MLTptr;
+ const ZSTD_seqSymbol* OFTptr;
const HUF_DTable* HUFptr;
ZSTD_entropyDTables_t entropy;
const void* previousDstEnd; /* detect continuity */
@@ -116,6 +134,7 @@ struct ZSTD_DCtx_s
size_t litSize;
size_t rleSize;
size_t staticSize;
+ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
/* streaming */
ZSTD_DDict* ddictLocal;
@@ -173,6 +192,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->inBuffSize = 0;
dctx->outBuffSize = 0;
dctx->streamStage = zdss_init;
+ dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
}
ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
@@ -204,6 +224,7 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
ZSTD_DCtx* ZSTD_createDCtx(void)
{
+ DEBUGLOG(3, "ZSTD_createDCtx");
return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
}
@@ -234,8 +255,8 @@ void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
/*-*************************************************************
-* Decompression section
-***************************************************************/
+ * Frame header decoding
+ ***************************************************************/
/*! ZSTD_isFrame() :
* Tells if the content of `buffer` starts with a valid Frame Identifier.
@@ -257,7 +278,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
/** ZSTD_frameHeaderSize_internal() :
* srcSize must be large enough to reach header size fields.
- * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
+ * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
* @return : size of the Frame Header
* or an error code, which can be tested with ZSTD_isError() */
static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
@@ -480,6 +501,10 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he
}
+/*-*************************************************************
+ * Block decoding
+ ***************************************************************/
+
/*! ZSTD_getcBlockSize() :
* Provides the size of compressed block from block header `src` */
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
@@ -566,13 +591,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
if (HUF_isError((litEncType==set_repeat) ?
( singleStream ?
- HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) :
- HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr) ) :
+ HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
+ HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) :
( singleStream ?
- HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
- dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) :
- HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
- dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
+ HUF_decompress1X2_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
+ dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2) :
+ HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
+ dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2))))
return ERROR(corruption_detected);
dctx->litPtr = dctx->litBuffer;
@@ -647,115 +672,268 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
}
}
-
-typedef union {
- FSE_decode_t realData;
- U32 alignedBy4;
-} FSE_decode_t4;
+/* Default FSE distribution tables.
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
+ * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
+ * They were generated programmatically with following method :
+ * - start from default distributions, present in /lib/common/zstd_internal.h
+ * - generate tables normally, using ZSTD_buildFSETable()
+ * - printout the content of tables
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
/* Default FSE distribution table for Literal Lengths */
-static const FSE_decode_t4 LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
- { { LL_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
- /* base, symbol, bits */
- { { 0, 0, 4 } }, { { 16, 0, 4 } }, { { 32, 1, 5 } }, { { 0, 3, 5 } },
- { { 0, 4, 5 } }, { { 0, 6, 5 } }, { { 0, 7, 5 } }, { { 0, 9, 5 } },
- { { 0, 10, 5 } }, { { 0, 12, 5 } }, { { 0, 14, 6 } }, { { 0, 16, 5 } },
- { { 0, 18, 5 } }, { { 0, 19, 5 } }, { { 0, 21, 5 } }, { { 0, 22, 5 } },
- { { 0, 24, 5 } }, { { 32, 25, 5 } }, { { 0, 26, 5 } }, { { 0, 27, 6 } },
- { { 0, 29, 6 } }, { { 0, 31, 6 } }, { { 32, 0, 4 } }, { { 0, 1, 4 } },
- { { 0, 2, 5 } }, { { 32, 4, 5 } }, { { 0, 5, 5 } }, { { 32, 7, 5 } },
- { { 0, 8, 5 } }, { { 32, 10, 5 } }, { { 0, 11, 5 } }, { { 0, 13, 6 } },
- { { 32, 16, 5 } }, { { 0, 17, 5 } }, { { 32, 19, 5 } }, { { 0, 20, 5 } },
- { { 32, 22, 5 } }, { { 0, 23, 5 } }, { { 0, 25, 4 } }, { { 16, 25, 4 } },
- { { 32, 26, 5 } }, { { 0, 28, 6 } }, { { 0, 30, 6 } }, { { 48, 0, 4 } },
- { { 16, 1, 4 } }, { { 32, 2, 5 } }, { { 32, 3, 5 } }, { { 32, 5, 5 } },
- { { 32, 6, 5 } }, { { 32, 8, 5 } }, { { 32, 9, 5 } }, { { 32, 11, 5 } },
- { { 32, 12, 5 } }, { { 0, 15, 6 } }, { { 32, 17, 5 } }, { { 32, 18, 5 } },
- { { 32, 20, 5 } }, { { 32, 21, 5 } }, { { 32, 23, 5 } }, { { 32, 24, 5 } },
- { { 0, 35, 6 } }, { { 0, 34, 6 } }, { { 0, 33, 6 } }, { { 0, 32, 6 } },
+static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
+ { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+ /* nextState, nbAddBits, nbBits, baseVal */
+ { 0, 0, 4, 0}, { 16, 0, 4, 0},
+ { 32, 0, 5, 1}, { 0, 0, 5, 3},
+ { 0, 0, 5, 4}, { 0, 0, 5, 6},
+ { 0, 0, 5, 7}, { 0, 0, 5, 9},
+ { 0, 0, 5, 10}, { 0, 0, 5, 12},
+ { 0, 0, 6, 14}, { 0, 1, 5, 16},
+ { 0, 1, 5, 20}, { 0, 1, 5, 22},
+ { 0, 2, 5, 28}, { 0, 3, 5, 32},
+ { 0, 4, 5, 48}, { 32, 6, 5, 64},
+ { 0, 7, 5, 128}, { 0, 8, 6, 256},
+ { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
+ { 32, 0, 4, 0}, { 0, 0, 4, 1},
+ { 0, 0, 5, 2}, { 32, 0, 5, 4},
+ { 0, 0, 5, 5}, { 32, 0, 5, 7},
+ { 0, 0, 5, 8}, { 32, 0, 5, 10},
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
+ { 32, 1, 5, 16}, { 0, 1, 5, 18},
+ { 32, 1, 5, 22}, { 0, 2, 5, 24},
+ { 32, 3, 5, 32}, { 0, 3, 5, 40},
+ { 0, 6, 4, 64}, { 16, 6, 4, 64},
+ { 32, 7, 5, 128}, { 0, 9, 6, 512},
+ { 0, 11, 6, 2048}, { 48, 0, 4, 0},
+ { 16, 0, 4, 1}, { 32, 0, 5, 2},
+ { 32, 0, 5, 3}, { 32, 0, 5, 5},
+ { 32, 0, 5, 6}, { 32, 0, 5, 8},
+ { 32, 0, 5, 9}, { 32, 0, 5, 11},
+ { 32, 0, 5, 12}, { 0, 0, 6, 15},
+ { 32, 1, 5, 18}, { 32, 1, 5, 20},
+ { 32, 2, 5, 24}, { 32, 2, 5, 28},
+ { 32, 3, 5, 40}, { 32, 4, 5, 48},
+ { 0, 16, 6,65536}, { 0, 15, 6,32768},
+ { 0, 14, 6,16384}, { 0, 13, 6, 8192},
}; /* LL_defaultDTable */
+/* Default FSE distribution table for Offset Codes */
+static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
+ { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+ /* nextState, nbAddBits, nbBits, baseVal */
+ { 0, 0, 5, 0}, { 0, 6, 4, 61},
+ { 0, 9, 5, 509}, { 0, 15, 5,32765},
+ { 0, 21, 5,2097149}, { 0, 3, 5, 5},
+ { 0, 7, 4, 125}, { 0, 12, 5, 4093},
+ { 0, 18, 5,262141}, { 0, 23, 5,8388605},
+ { 0, 5, 5, 29}, { 0, 8, 4, 253},
+ { 0, 14, 5,16381}, { 0, 20, 5,1048573},
+ { 0, 2, 5, 1}, { 16, 7, 4, 125},
+ { 0, 11, 5, 2045}, { 0, 17, 5,131069},
+ { 0, 22, 5,4194301}, { 0, 4, 5, 13},
+ { 16, 8, 4, 253}, { 0, 13, 5, 8189},
+ { 0, 19, 5,524285}, { 0, 1, 5, 1},
+ { 16, 6, 4, 61}, { 0, 10, 5, 1021},
+ { 0, 16, 5,65533}, { 0, 28, 5,268435453},
+ { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
+ { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
+}; /* OF_defaultDTable */
+
+
/* Default FSE distribution table for Match Lengths */
-static const FSE_decode_t4 ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
- { { ML_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
- /* base, symbol, bits */
- { { 0, 0, 6 } }, { { 0, 1, 4 } }, { { 32, 2, 5 } }, { { 0, 3, 5 } },
- { { 0, 5, 5 } }, { { 0, 6, 5 } }, { { 0, 8, 5 } }, { { 0, 10, 6 } },
- { { 0, 13, 6 } }, { { 0, 16, 6 } }, { { 0, 19, 6 } }, { { 0, 22, 6 } },
- { { 0, 25, 6 } }, { { 0, 28, 6 } }, { { 0, 31, 6 } }, { { 0, 33, 6 } },
- { { 0, 35, 6 } }, { { 0, 37, 6 } }, { { 0, 39, 6 } }, { { 0, 41, 6 } },
- { { 0, 43, 6 } }, { { 0, 45, 6 } }, { { 16, 1, 4 } }, { { 0, 2, 4 } },
- { { 32, 3, 5 } }, { { 0, 4, 5 } }, { { 32, 6, 5 } }, { { 0, 7, 5 } },
- { { 0, 9, 6 } }, { { 0, 12, 6 } }, { { 0, 15, 6 } }, { { 0, 18, 6 } },
- { { 0, 21, 6 } }, { { 0, 24, 6 } }, { { 0, 27, 6 } }, { { 0, 30, 6 } },
- { { 0, 32, 6 } }, { { 0, 34, 6 } }, { { 0, 36, 6 } }, { { 0, 38, 6 } },
- { { 0, 40, 6 } }, { { 0, 42, 6 } }, { { 0, 44, 6 } }, { { 32, 1, 4 } },
- { { 48, 1, 4 } }, { { 16, 2, 4 } }, { { 32, 4, 5 } }, { { 32, 5, 5 } },
- { { 32, 7, 5 } }, { { 32, 8, 5 } }, { { 0, 11, 6 } }, { { 0, 14, 6 } },
- { { 0, 17, 6 } }, { { 0, 20, 6 } }, { { 0, 23, 6 } }, { { 0, 26, 6 } },
- { { 0, 29, 6 } }, { { 0, 52, 6 } }, { { 0, 51, 6 } }, { { 0, 50, 6 } },
- { { 0, 49, 6 } }, { { 0, 48, 6 } }, { { 0, 47, 6 } }, { { 0, 46, 6 } },
+static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
+ { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
+ /* nextState, nbAddBits, nbBits, baseVal */
+ { 0, 0, 6, 3}, { 0, 0, 4, 4},
+ { 32, 0, 5, 5}, { 0, 0, 5, 6},
+ { 0, 0, 5, 8}, { 0, 0, 5, 9},
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
+ { 0, 0, 6, 16}, { 0, 0, 6, 19},
+ { 0, 0, 6, 22}, { 0, 0, 6, 25},
+ { 0, 0, 6, 28}, { 0, 0, 6, 31},
+ { 0, 0, 6, 34}, { 0, 1, 6, 37},
+ { 0, 1, 6, 41}, { 0, 2, 6, 47},
+ { 0, 3, 6, 59}, { 0, 4, 6, 83},
+ { 0, 7, 6, 131}, { 0, 9, 6, 515},
+ { 16, 0, 4, 4}, { 0, 0, 4, 5},
+ { 32, 0, 5, 6}, { 0, 0, 5, 7},
+ { 32, 0, 5, 9}, { 0, 0, 5, 10},
+ { 0, 0, 6, 12}, { 0, 0, 6, 15},
+ { 0, 0, 6, 18}, { 0, 0, 6, 21},
+ { 0, 0, 6, 24}, { 0, 0, 6, 27},
+ { 0, 0, 6, 30}, { 0, 0, 6, 33},
+ { 0, 1, 6, 35}, { 0, 1, 6, 39},
+ { 0, 2, 6, 43}, { 0, 3, 6, 51},
+ { 0, 4, 6, 67}, { 0, 5, 6, 99},
+ { 0, 8, 6, 259}, { 32, 0, 4, 4},
+ { 48, 0, 4, 4}, { 16, 0, 4, 5},
+ { 32, 0, 5, 7}, { 32, 0, 5, 8},
+ { 32, 0, 5, 10}, { 32, 0, 5, 11},
+ { 0, 0, 6, 14}, { 0, 0, 6, 17},
+ { 0, 0, 6, 20}, { 0, 0, 6, 23},
+ { 0, 0, 6, 26}, { 0, 0, 6, 29},
+ { 0, 0, 6, 32}, { 0, 16, 6,65539},
+ { 0, 15, 6,32771}, { 0, 14, 6,16387},
+ { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
+ { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
}; /* ML_defaultDTable */
-/* Default FSE distribution table for Offset Codes */
-static const FSE_decode_t4 OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
- { { OF_DEFAULTNORMLOG, 1, 1 } }, /* header : tableLog, fastMode, fastMode */
- /* base, symbol, bits */
- { { 0, 0, 5 } }, { { 0, 6, 4 } },
- { { 0, 9, 5 } }, { { 0, 15, 5 } },
- { { 0, 21, 5 } }, { { 0, 3, 5 } },
- { { 0, 7, 4 } }, { { 0, 12, 5 } },
- { { 0, 18, 5 } }, { { 0, 23, 5 } },
- { { 0, 5, 5 } }, { { 0, 8, 4 } },
- { { 0, 14, 5 } }, { { 0, 20, 5 } },
- { { 0, 2, 5 } }, { { 16, 7, 4 } },
- { { 0, 11, 5 } }, { { 0, 17, 5 } },
- { { 0, 22, 5 } }, { { 0, 4, 5 } },
- { { 16, 8, 4 } }, { { 0, 13, 5 } },
- { { 0, 19, 5 } }, { { 0, 1, 5 } },
- { { 16, 6, 4 } }, { { 0, 10, 5 } },
- { { 0, 16, 5 } }, { { 0, 28, 5 } },
- { { 0, 27, 5 } }, { { 0, 26, 5 } },
- { { 0, 25, 5 } }, { { 0, 24, 5 } },
-}; /* OF_defaultDTable */
+
+static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
+{
+ void* ptr = dt;
+ ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
+ ZSTD_seqSymbol* const cell = dt + 1;
+
+ DTableH->tableLog = 0;
+ DTableH->fastMode = 0;
+
+ cell->nbBits = 0;
+ cell->nextState = 0;
+ assert(nbAddBits < 255);
+ cell->nbAdditionalBits = (BYTE)nbAddBits;
+ cell->baseValue = baseValue;
+}
+
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off) */
+static void
+ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+ const short* normalizedCounter, unsigned maxSymbolValue,
+ const U32* baseValue, const U32* nbAdditionalBits,
+ unsigned tableLog)
+{
+ ZSTD_seqSymbol* const tableDecode = dt+1;
+ U16 symbolNext[MaxSeq+1];
+
+ U32 const maxSV1 = maxSymbolValue + 1;
+ U32 const tableSize = 1 << tableLog;
+ U32 highThreshold = tableSize-1;
+
+ /* Sanity Checks */
+ assert(maxSymbolValue <= MaxSeq);
+ assert(tableLog <= MaxFSELog);
+
+ /* Init, lay down lowprob symbols */
+ { ZSTD_seqSymbol_header DTableH;
+ DTableH.tableLog = tableLog;
+ DTableH.fastMode = 1;
+ { S16 const largeLimit= (S16)(1 << (tableLog-1));
+ U32 s;
+ for (s=0; s<maxSV1; s++) {
+ if (normalizedCounter[s]==-1) {
+ tableDecode[highThreshold--].baseValue = s;
+ symbolNext[s] = 1;
+ } else {
+ if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+ symbolNext[s] = normalizedCounter[s];
+ } } }
+ memcpy(dt, &DTableH, sizeof(DTableH));
+ }
+
+ /* Spread symbols */
+ { U32 const tableMask = tableSize-1;
+ U32 const step = FSE_TABLESTEP(tableSize);
+ U32 s, position = 0;
+ for (s=0; s<maxSV1; s++) {
+ int i;
+ for (i=0; i<normalizedCounter[s]; i++) {
+ tableDecode[position].baseValue = s;
+ position = (position + step) & tableMask;
+ while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
+ } }
+ assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+ }
+
+ /* Build Decoding table */
+ { U32 u;
+ for (u=0; u<tableSize; u++) {
+ U32 const symbol = tableDecode[u].baseValue;
+ U32 const nextState = symbolNext[symbol]++;
+ tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+ tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+ assert(nbAdditionalBits[symbol] < 255);
+ tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
+ tableDecode[u].baseValue = baseValue[symbol];
+ } }
+}
+
/*! ZSTD_buildSeqTable() :
* @return : nb bytes read from src,
- * or an error code if it fails, testable with ZSTD_isError()
- */
-static size_t ZSTD_buildSeqTable(FSE_DTable* DTableSpace, const FSE_DTable** DTablePtr,
+ * or an error code if it fails */
+static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
symbolEncodingType_e type, U32 max, U32 maxLog,
const void* src, size_t srcSize,
- const FSE_decode_t4* defaultTable, U32 flagRepeatTable)
+ const U32* baseValue, const U32* nbAdditionalBits,
+ const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable)
{
- const void* const tmpPtr = defaultTable; /* bypass strict aliasing */
switch(type)
{
case set_rle :
if (!srcSize) return ERROR(srcSize_wrong);
if ( (*(const BYTE*)src) > max) return ERROR(corruption_detected);
- FSE_buildDTable_rle(DTableSpace, *(const BYTE*)src);
+ { U32 const symbol = *(const BYTE*)src;
+ U32 const baseline = baseValue[symbol];
+ U32 const nbBits = nbAdditionalBits[symbol];
+ ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
+ }
*DTablePtr = DTableSpace;
return 1;
case set_basic :
- *DTablePtr = (const FSE_DTable*)tmpPtr;
+ *DTablePtr = defaultTable;
return 0;
case set_repeat:
if (!flagRepeatTable) return ERROR(corruption_detected);
return 0;
- default : /* impossible */
case set_compressed :
{ U32 tableLog;
S16 norm[MaxSeq+1];
size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
if (FSE_isError(headerSize)) return ERROR(corruption_detected);
if (tableLog > maxLog) return ERROR(corruption_detected);
- FSE_buildDTable(DTableSpace, norm, max, tableLog);
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
*DTablePtr = DTableSpace;
return headerSize;
- } }
+ }
+ default : /* impossible */
+ assert(0);
+ return ERROR(GENERIC);
+ }
}
+static const U32 LL_base[MaxLL+1] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 18, 20, 22, 24, 28, 32, 40,
+ 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+ 0x2000, 0x4000, 0x8000, 0x10000 };
+
+static const U32 OF_base[MaxOff+1] = {
+ 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
+ 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
+ 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+ 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
+
+static const U32 OF_bits[MaxOff+1] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31 };
+
+static const U32 ML_base[MaxML+1] = {
+ 3, 4, 5, 6, 7, 8, 9, 10,
+ 11, 12, 13, 14, 15, 16, 17, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 37, 39, 41, 43, 47, 51, 59,
+ 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+ 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
const void* src, size_t srcSize)
{
@@ -792,19 +970,27 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
/* Build DTables */
{ size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
LLtype, MaxLL, LLFSELog,
- ip, iend-ip, LL_defaultDTable, dctx->fseEntropy);
+ ip, iend-ip,
+ LL_base, LL_bits,
+ LL_defaultDTable, dctx->fseEntropy);
if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
ip += llhSize;
}
+
{ size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
OFtype, MaxOff, OffFSELog,
- ip, iend-ip, OF_defaultDTable, dctx->fseEntropy);
+ ip, iend-ip,
+ OF_base, OF_bits,
+ OF_defaultDTable, dctx->fseEntropy);
if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
ip += ofhSize;
}
+
{ size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
MLtype, MaxML, MLFSELog,
- ip, iend-ip, ML_defaultDTable, dctx->fseEntropy);
+ ip, iend-ip,
+ ML_base, ML_bits,
+ ML_defaultDTable, dctx->fseEntropy);
if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
ip += mlhSize;
}
@@ -822,10 +1008,15 @@ typedef struct {
} seq_t;
typedef struct {
+ size_t state;
+ const ZSTD_seqSymbol* table;
+} ZSTD_fseState;
+
+typedef struct {
BIT_DStream_t DStream;
- FSE_DState_t stateLL;
- FSE_DState_t stateOffb;
- FSE_DState_t stateML;
+ ZSTD_fseState stateLL;
+ ZSTD_fseState stateOffb;
+ ZSTD_fseState stateML;
size_t prevOffset[ZSTD_REP_NUM];
const BYTE* prefixStart;
const BYTE* dictEnd;
@@ -880,118 +1071,6 @@ size_t ZSTD_execSequenceLast7(BYTE* op,
}
-typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
-
-/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
- * bits before reloading. This value is the maximum number of bytes we read
- * after reloading when we are decoding long offets.
- */
-#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
- (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
- ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
- : 0)
-
-static seq_t ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
-{
- seq_t seq;
-
- U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
- U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
- U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
-
- U32 const llBits = LL_bits[llCode];
- U32 const mlBits = ML_bits[mlCode];
- U32 const ofBits = ofCode;
- U32 const totalBits = llBits+mlBits+ofBits;
-
- static const U32 LL_base[MaxLL+1] = {
- 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 18, 20, 22, 24, 28, 32, 40,
- 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
- 0x2000, 0x4000, 0x8000, 0x10000 };
-
- static const U32 ML_base[MaxML+1] = {
- 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18,
- 19, 20, 21, 22, 23, 24, 25, 26,
- 27, 28, 29, 30, 31, 32, 33, 34,
- 35, 37, 39, 41, 43, 47, 51, 59,
- 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
- 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
-
- static const U32 OF_base[MaxOff+1] = {
- 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
- 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
- 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
- 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
-
- /* sequence */
- { size_t offset;
- if (!ofCode)
- offset = 0;
- else {
- ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
- ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
- assert(ofBits <= MaxOff);
- if (MEM_32bits() && longOffsets) {
- U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
- offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
- if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
- } else {
- offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
- }
- }
-
- if (ofCode <= 1) {
- offset += (llCode==0);
- if (offset) {
- size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
- seqState->prevOffset[1] = seqState->prevOffset[0];
- seqState->prevOffset[0] = offset = temp;
- } else {
- offset = seqState->prevOffset[0];
- }
- } else {
- seqState->prevOffset[2] = seqState->prevOffset[1];
- seqState->prevOffset[1] = seqState->prevOffset[0];
- seqState->prevOffset[0] = offset;
- }
- seq.offset = offset;
- }
-
- seq.matchLength = ML_base[mlCode]
- + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
- if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
- BIT_reloadDStream(&seqState->DStream);
- if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
- BIT_reloadDStream(&seqState->DStream);
- /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
- ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
-
- seq.litLength = LL_base[llCode]
- + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
- if (MEM_32bits())
- BIT_reloadDStream(&seqState->DStream);
-
- DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
- (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
-
- /* ANS state update */
- FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
- FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
- FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
-
- return seq;
-}
-
-
HINT_INLINE
size_t ZSTD_execSequence(BYTE* op,
BYTE* const oend, seq_t sequence,
@@ -1073,10 +1152,199 @@ size_t ZSTD_execSequence(BYTE* op,
}
-static size_t ZSTD_decompressSequences(
- ZSTD_DCtx* dctx,
+HINT_INLINE
+size_t ZSTD_execSequenceLong(BYTE* op,
+ BYTE* const oend, seq_t sequence,
+ const BYTE** litPtr, const BYTE* const litLimit,
+ const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
+{
+ BYTE* const oLitEnd = op + sequence.litLength;
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+ const BYTE* match = sequence.match;
+
+ /* check */
+ if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
+ if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
+ if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
+
+ /* copy Literals */
+ ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
+ if (sequence.litLength > 8)
+ ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
+ op = oLitEnd;
+ *litPtr = iLitEnd; /* update for next sequence */
+
+ /* copy Match */
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+ /* offset beyond prefix */
+ if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
+ if (match + sequence.matchLength <= dictEnd) {
+ memmove(oLitEnd, match, sequence.matchLength);
+ return sequenceLength;
+ }
+ /* span extDict & currentPrefixSegment */
+ { size_t const length1 = dictEnd - match;
+ memmove(oLitEnd, match, length1);
+ op = oLitEnd + length1;
+ sequence.matchLength -= length1;
+ match = prefixStart;
+ if (op > oend_w || sequence.matchLength < MINMATCH) {
+ U32 i;
+ for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
+ return sequenceLength;
+ }
+ } }
+ assert(op <= oend_w);
+ assert(sequence.matchLength >= MINMATCH);
+
+ /* match within prefix */
+ if (sequence.offset < 8) {
+ /* close range match, overlap */
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
+ int const sub2 = dec64table[sequence.offset];
+ op[0] = match[0];
+ op[1] = match[1];
+ op[2] = match[2];
+ op[3] = match[3];
+ match += dec32table[sequence.offset];
+ ZSTD_copy4(op+4, match);
+ match -= sub2;
+ } else {
+ ZSTD_copy8(op, match);
+ }
+ op += 8; match += 8;
+
+ if (oMatchEnd > oend-(16-MINMATCH)) {
+ if (op < oend_w) {
+ ZSTD_wildcopy(op, match, oend_w - op);
+ match += oend_w - op;
+ op = oend_w;
+ }
+ while (op < oMatchEnd) *op++ = *match++;
+ } else {
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
+ }
+ return sequenceLength;
+}
+
+static void
+ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
+{
+ const void* ptr = dt;
+ const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
+ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+ DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
+ (U32)DStatePtr->state, DTableH->tableLog);
+ BIT_reloadDStream(bitD);
+ DStatePtr->table = dt + 1;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
+{
+ ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
+ U32 const nbBits = DInfo.nbBits;
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
+ DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
+ * bits before reloading. This value is the maximum number of bytes we read
+ * after reloading when we are decoding long offets.
+ */
+#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
+ (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
+ ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
+ : 0)
+
+typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+
+FORCE_INLINE_TEMPLATE seq_t
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+{
+ seq_t seq;
+ U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
+ U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
+ U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
+ U32 const totalBits = llBits+mlBits+ofBits;
+ U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
+ U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
+ U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
+
+ /* sequence */
+ { size_t offset;
+ if (!ofBits)
+ offset = 0;
+ else {
+ ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+ ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+ assert(ofBits <= MaxOff);
+ if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
+ U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+ BIT_reloadDStream(&seqState->DStream);
+ if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+ assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
+ } else {
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+ }
+ }
+
+ if (ofBits <= 1) {
+ offset += (llBase==0);
+ if (offset) {
+ size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+ seqState->prevOffset[1] = seqState->prevOffset[0];
+ seqState->prevOffset[0] = offset = temp;
+ } else { /* offset == 0 */
+ offset = seqState->prevOffset[0];
+ }
+ } else {
+ seqState->prevOffset[2] = seqState->prevOffset[1];
+ seqState->prevOffset[1] = seqState->prevOffset[0];
+ seqState->prevOffset[0] = offset;
+ }
+ seq.offset = offset;
+ }
+
+ seq.matchLength = mlBase
+ + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */
+ if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+ BIT_reloadDStream(&seqState->DStream);
+ if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+ BIT_reloadDStream(&seqState->DStream);
+ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+
+ seq.litLength = llBase
+ + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */
+ if (MEM_32bits())
+ BIT_reloadDStream(&seqState->DStream);
+
+ DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+ /* ANS state update */
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
+
+ return seq;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
- const void* seqStart, size_t seqSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
@@ -1089,26 +1357,17 @@ static size_t ZSTD_decompressSequences(
const BYTE* const base = (const BYTE*) (dctx->base);
const BYTE* const vBase = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
- int nbSeq;
DEBUGLOG(5, "ZSTD_decompressSequences");
- /* Build Decoding Tables */
- { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
- DEBUGLOG(5, "ZSTD_decodeSeqHeaders: size=%u, nbSeq=%i",
- (U32)seqHSize, nbSeq);
- if (ZSTD_isError(seqHSize)) return seqHSize;
- ip += seqHSize;
- }
-
/* Regen sequences */
if (nbSeq) {
seqState_t seqState;
dctx->fseEntropy = 1;
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
- FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
- FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
- FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
nbSeq--;
@@ -1120,7 +1379,7 @@ static size_t ZSTD_decompressSequences(
} }
/* check if reached exact end */
- DEBUGLOG(5, "after decode loop, remaining nbSeq : %i", nbSeq);
+ DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq);
if (nbSeq) return ERROR(corruption_detected);
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
@@ -1136,46 +1395,32 @@ static size_t ZSTD_decompressSequences(
return op-ostart;
}
-
-HINT_INLINE
-seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
+static size_t
+ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
{
- seq_t seq;
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
- U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
- U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
- U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= MaxOff, by table construction */
- U32 const llBits = LL_bits[llCode];
- U32 const mlBits = ML_bits[mlCode];
- U32 const ofBits = ofCode;
- U32 const totalBits = llBits+mlBits+ofBits;
- static const U32 LL_base[MaxLL+1] = {
- 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 18, 20, 22, 24, 28, 32, 40,
- 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
- 0x2000, 0x4000, 0x8000, 0x10000 };
-
- static const U32 ML_base[MaxML+1] = {
- 3, 4, 5, 6, 7, 8, 9, 10,
- 11, 12, 13, 14, 15, 16, 17, 18,
- 19, 20, 21, 22, 23, 24, 25, 26,
- 27, 28, 29, 30, 31, 32, 33, 34,
- 35, 37, 39, 41, 43, 47, 51, 59,
- 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
- 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
-
- static const U32 OF_base[MaxOff+1] = {
- 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
- 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
- 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
- 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
+FORCE_INLINE_TEMPLATE seq_t
+ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets)
+{
+ seq_t seq;
+ U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits;
+ U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits;
+ U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits;
+ U32 const totalBits = llBits+mlBits+ofBits;
+ U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue;
+ U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue;
+ U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue;
/* sequence */
{ size_t offset;
- if (!ofCode)
+ if (!ofBits)
offset = 0;
else {
ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
@@ -1183,17 +1428,17 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
assert(ofBits <= MaxOff);
if (MEM_32bits() && longOffsets) {
U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1);
- offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream);
if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
} else {
- offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
}
}
- if (ofCode <= 1) {
- offset += (llCode==0);
+ if (ofBits <= 1) {
+ offset += (llBase==0);
if (offset) {
size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
@@ -1211,7 +1456,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
seq.offset = offset;
}
- seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
+ seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */
if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
BIT_reloadDStream(&seqState->DStream);
if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
@@ -1219,7 +1464,7 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
/* Verify that there is enough bits to read the rest of the data in 64-bit mode. */
ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
- seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
+ seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */
if (MEM_32bits())
BIT_reloadDStream(&seqState->DStream);
@@ -1231,98 +1476,19 @@ seq_t ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const long
}
/* ANS state update */
- FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
- FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
+ ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
+ ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
- FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
+ ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
return seq;
}
-
-HINT_INLINE
-size_t ZSTD_execSequenceLong(BYTE* op,
- BYTE* const oend, seq_t sequence,
- const BYTE** litPtr, const BYTE* const litLimit,
- const BYTE* const prefixStart, const BYTE* const dictStart, const BYTE* const dictEnd)
-{
- BYTE* const oLitEnd = op + sequence.litLength;
- size_t const sequenceLength = sequence.litLength + sequence.matchLength;
- BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
- BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
- const BYTE* const iLitEnd = *litPtr + sequence.litLength;
- const BYTE* match = sequence.match;
-
- /* check */
- if (oMatchEnd > oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
- if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
- if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd);
-
- /* copy Literals */
- ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */
- if (sequence.litLength > 8)
- ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
- op = oLitEnd;
- *litPtr = iLitEnd; /* update for next sequence */
-
- /* copy Match */
- if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
- /* offset beyond prefix */
- if (sequence.offset > (size_t)(oLitEnd - dictStart)) return ERROR(corruption_detected);
- if (match + sequence.matchLength <= dictEnd) {
- memmove(oLitEnd, match, sequence.matchLength);
- return sequenceLength;
- }
- /* span extDict & currentPrefixSegment */
- { size_t const length1 = dictEnd - match;
- memmove(oLitEnd, match, length1);
- op = oLitEnd + length1;
- sequence.matchLength -= length1;
- match = prefixStart;
- if (op > oend_w || sequence.matchLength < MINMATCH) {
- U32 i;
- for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
- return sequenceLength;
- }
- } }
- assert(op <= oend_w);
- assert(sequence.matchLength >= MINMATCH);
-
- /* match within prefix */
- if (sequence.offset < 8) {
- /* close range match, overlap */
- static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
- static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
- int const sub2 = dec64table[sequence.offset];
- op[0] = match[0];
- op[1] = match[1];
- op[2] = match[2];
- op[3] = match[3];
- match += dec32table[sequence.offset];
- ZSTD_copy4(op+4, match);
- match -= sub2;
- } else {
- ZSTD_copy8(op, match);
- }
- op += 8; match += 8;
-
- if (oMatchEnd > oend-(16-MINMATCH)) {
- if (op < oend_w) {
- ZSTD_wildcopy(op, match, oend_w - op);
- match += oend_w - op;
- op = oend_w;
- }
- while (op < oMatchEnd) *op++ = *match++;
- } else {
- ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */
- }
- return sequenceLength;
-}
-
-static size_t ZSTD_decompressSequencesLong(
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequencesLong_body(
ZSTD_DCtx* dctx,
void* dst, size_t maxDstSize,
- const void* seqStart, size_t seqSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
const ZSTD_longOffset_e isLongOffset)
{
const BYTE* ip = (const BYTE*)seqStart;
@@ -1335,13 +1501,6 @@ static size_t ZSTD_decompressSequencesLong(
const BYTE* const prefixStart = (const BYTE*) (dctx->base);
const BYTE* const dictStart = (const BYTE*) (dctx->vBase);
const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
- int nbSeq;
-
- /* Build Decoding Tables */
- { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
- if (ZSTD_isError(seqHSize)) return seqHSize;
- ip += seqHSize;
- }
/* Regen sequences */
if (nbSeq) {
@@ -1358,18 +1517,18 @@ static size_t ZSTD_decompressSequencesLong(
seqState.pos = (size_t)(op-prefixStart);
seqState.dictEnd = dictEnd;
CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
- FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
- FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
- FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
/* prepare in advance */
- for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) {
+ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
}
if (seqNb<seqAdvance) return ERROR(corruption_detected);
/* decode and decompress */
- for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) {
+ for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
@@ -1389,6 +1548,9 @@ static size_t ZSTD_decompressSequencesLong(
/* save reps for next block */
{ U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+#undef STORED_SEQS
+#undef STOSEQ_MASK
+#undef ADVANCED_SEQS
}
/* last literal segment */
@@ -1401,6 +1563,96 @@ static size_t ZSTD_decompressSequencesLong(
return op-ostart;
}
+static size_t
+ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
+{
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
+
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
+{
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
+{
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
+
+#endif
+
+typedef size_t (*ZSTD_decompressSequences_t)(
+ ZSTD_DCtx *dctx, void *dst, size_t maxDstSize,
+ const void *seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset);
+
+static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
+{
+ DEBUGLOG(5, "ZSTD_decompressSequences");
+#if DYNAMIC_BMI2
+ if (dctx->bmi2) {
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+#endif
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
+
+static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
+ void* dst, size_t maxDstSize,
+ const void* seqStart, size_t seqSize, int nbSeq,
+ const ZSTD_longOffset_e isLongOffset)
+{
+ DEBUGLOG(5, "ZSTD_decompressSequencesLong");
+#if DYNAMIC_BMI2
+ if (dctx->bmi2) {
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+ }
+#endif
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
+}
+
+/* ZSTD_getLongOffsetsShare() :
+ * condition : offTable must be valid
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
+ * compared to maximum possible of (1<<OffFSELog) */
+static unsigned
+ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
+{
+ const void* ptr = offTable;
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
+ const ZSTD_seqSymbol* table = offTable + 1;
+ U32 const max = 1 << tableLog;
+ U32 u, total = 0;
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
+
+ assert(max <= (1 << OffFSELog)); /* max not too large */
+ for (u=0; u<max; u++) {
+ if (table[u].nbAdditionalBits > 22) total += 1;
+ }
+
+ assert(tableLog <= OffFSELog);
+ total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
+
+ return total;
+}
+
static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
@@ -1410,13 +1662,9 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
/* isLongOffset must be true if there are long offsets.
* Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
* We don't expect that to be the case in 64-bit mode.
- * If we are in block mode we don't know the window size, so we have to be
- * conservative.
+ * In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit)
*/
ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
- /* windowSize could be any value at this point, since it is only validated
- * in the streaming API.
- */
DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
@@ -1428,9 +1676,24 @@ static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
ip += litCSize;
srcSize -= litCSize;
}
- if (frame && dctx->fParams.windowSize > (1<<23))
- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, isLongOffset);
- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, isLongOffset);
+
+ /* Build Decoding Tables */
+ { int nbSeq;
+ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
+ if (ZSTD_isError(seqHSize)) return seqHSize;
+ ip += seqHSize;
+ srcSize -= seqHSize;
+
+ if ( (!frame || dctx->fParams.windowSize > (1<<24))
+ && (nbSeq>0) ) { /* could probably use a larger nbSeq limit */
+ U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
+ if (shareLongOffsets >= minShare)
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
+ }
+
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
+ }
}
@@ -1758,7 +2021,7 @@ static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skip
* or an error code, which can be tested using ZSTD_isError() */
size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{
- DEBUGLOG(5, "ZSTD_decompressContinue");
+ DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (U32)srcSize);
/* Sanity check */
if (srcSize != dctx->expected) return ERROR(srcSize_wrong); /* not allowed */
if (dstCapacity) ZSTD_checkContinuity(dctx, dst);
@@ -1819,12 +2082,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
case ZSTDds_decompressLastBlock:
case ZSTDds_decompressBlock:
- DEBUGLOG(5, "case ZSTDds_decompressBlock");
+ DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
{ size_t rSize;
switch(dctx->bType)
{
case bt_compressed:
- DEBUGLOG(5, "case bt_compressed");
+ DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
break;
case bt_raw :
@@ -1838,12 +2101,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
return ERROR(corruption_detected);
}
if (ZSTD_isError(rSize)) return rSize;
- DEBUGLOG(5, "decoded size from block : %u", (U32)rSize);
+ DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (U32)rSize);
dctx->decodedSize += rSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
- DEBUGLOG(4, "decoded size from frame : %u", (U32)dctx->decodedSize);
+ DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (U32)dctx->decodedSize);
if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
if (dctx->decodedSize != dctx->fParams.frameContentSize) {
return ERROR(corruption_detected);
@@ -1867,7 +2130,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
assert(srcSize == 4); /* guaranteed by dctx->expected */
{ U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
U32 const check32 = MEM_readLE32(src);
- DEBUGLOG(4, "checksum : calculated %08X :: %08X read", h32, check32);
+ DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", h32, check32);
if (check32 != h32) return ERROR(checksum_wrong);
dctx->expected = 0;
dctx->stage = ZSTDds_getFrameHeaderSize;
@@ -1925,8 +2188,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
U32 offcodeMaxValue = MaxOff, offcodeLog;
size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
+ if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
- CHECK_E(FSE_buildDTable(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
+ ZSTD_buildFSETable(entropy->OFTable,
+ offcodeNCount, offcodeMaxValue,
+ OF_base, OF_bits,
+ offcodeLog);
dictPtr += offcodeHeaderSize;
}
@@ -1934,8 +2201,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
unsigned matchlengthMaxValue = MaxML, matchlengthLog;
size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
+ if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
- CHECK_E(FSE_buildDTable(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
+ ZSTD_buildFSETable(entropy->MLTable,
+ matchlengthNCount, matchlengthMaxValue,
+ ML_base, ML_bits,
+ matchlengthLog);
dictPtr += matchlengthHeaderSize;
}
@@ -1943,8 +2214,12 @@ static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const
unsigned litlengthMaxValue = MaxLL, litlengthLog;
size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
+ if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
- CHECK_E(FSE_buildDTable(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
+ ZSTD_buildFSETable(entropy->LLTable,
+ litlengthNCount, litlengthMaxValue,
+ LL_base, LL_bits,
+ litlengthLog);
dictPtr += litlengthHeaderSize;
}
@@ -2062,13 +2337,23 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddi
return 0;
}
-static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
+static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, ZSTD_dictContentType_e dictContentType)
{
ddict->dictID = 0;
ddict->entropyPresent = 0;
- if (ddict->dictSize < 8) return 0;
+ if (dictContentType == ZSTD_dct_rawContent) return 0;
+
+ if (ddict->dictSize < 8) {
+ if (dictContentType == ZSTD_dct_fullDict)
+ return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
+ return 0; /* pure content mode */
+ }
{ U32 const magic = MEM_readLE32(ddict->dictContent);
- if (magic != ZSTD_MAGIC_DICTIONARY) return 0; /* pure content mode */
+ if (magic != ZSTD_MAGIC_DICTIONARY) {
+ if (dictContentType == ZSTD_dct_fullDict)
+ return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
+ return 0; /* pure content mode */
+ }
}
ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_frameIdSize);
@@ -2079,7 +2364,10 @@ static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict)
}
-static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
+static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
{
if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
ddict->dictBuffer = NULL;
@@ -2095,12 +2383,15 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, const void* dict, size_
ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
/* parse dictionary content */
- CHECK_F( ZSTD_loadEntropy_inDDict(ddict) );
+ CHECK_F( ZSTD_loadEntropy_inDDict(ddict, dictContentType) );
return 0;
}
-ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_customMem customMem)
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType,
+ ZSTD_customMem customMem)
{
if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
@@ -2108,7 +2399,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_di
if (!ddict) return NULL;
ddict->cMem = customMem;
- if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod) )) {
+ if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod, dictContentType) )) {
ZSTD_freeDDict(ddict);
return NULL;
}
@@ -2124,7 +2415,7 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_di
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
- return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, allocator);
+ return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
}
/*! ZSTD_createDDict_byReference() :
@@ -2134,13 +2425,15 @@ ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
{
ZSTD_customMem const allocator = { NULL, NULL, NULL };
- return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, allocator);
+ return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
}
-ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
- const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod)
+const ZSTD_DDict* ZSTD_initStaticDDict(
+ void* workspace, size_t workspaceSize,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType)
{
size_t const neededSpace =
sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
@@ -2153,7 +2446,7 @@ ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
memcpy(ddict+1, dict, dictSize); /* local copy */
dict = ddict+1;
}
- if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef) ))
+ if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef, dictContentType) ))
return NULL;
return ddict;
}
@@ -2247,6 +2540,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
ZSTD_DStream* ZSTD_createDStream(void)
{
+ DEBUGLOG(3, "ZSTD_createDStream");
return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
}
@@ -2271,58 +2565,99 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds)
size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
-size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
+size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
{
- zds->streamStage = zdss_loadHeader;
- zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
- ZSTD_freeDDict(zds->ddictLocal);
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
+ ZSTD_freeDDict(dctx->ddictLocal);
if (dict && dictSize >= 8) {
- zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
- if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
- } else zds->ddictLocal = NULL;
- zds->ddict = zds->ddictLocal;
- zds->legacyVersion = 0;
- zds->hostageByte = 0;
+ dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
+ if (dctx->ddictLocal == NULL) return ERROR(memory_allocation);
+ } else {
+ dctx->ddictLocal = NULL;
+ }
+ dctx->ddict = dctx->ddictLocal;
+ return 0;
+}
+
+size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+ return ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType);
+}
+
+size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
+{
+ return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+
+/* ZSTD_initDStream_usingDict() :
+ * return : expected size, aka ZSTD_frameHeaderSize_prefix.
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
+{
+ DEBUGLOG(4, "ZSTD_initDStream_usingDict");
+ zds->streamStage = zdss_init;
+ CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
return ZSTD_frameHeaderSize_prefix;
}
/* note : this variant can't fail */
size_t ZSTD_initDStream(ZSTD_DStream* zds)
{
+ DEBUGLOG(4, "ZSTD_initDStream");
return ZSTD_initDStream_usingDict(zds, NULL, 0);
}
+size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
+ dctx->ddict = ddict;
+ return 0;
+}
+
/* ZSTD_initDStream_usingDDict() :
* ddict will just be referenced, and must outlive decompression session
* this function cannot fail */
-size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict)
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
{
- size_t const initResult = ZSTD_initDStream(zds);
- zds->ddict = ddict;
+ size_t const initResult = ZSTD_initDStream(dctx);
+ dctx->ddict = ddict;
return initResult;
}
-size_t ZSTD_resetDStream(ZSTD_DStream* zds)
+/* ZSTD_resetDStream() :
+ * return : expected size, aka ZSTD_frameHeaderSize_prefix.
+ * this function cannot fail */
+size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
{
- zds->streamStage = zdss_loadHeader;
- zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
- zds->legacyVersion = 0;
- zds->hostageByte = 0;
+ DEBUGLOG(4, "ZSTD_resetDStream");
+ dctx->streamStage = zdss_loadHeader;
+ dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0;
+ dctx->legacyVersion = 0;
+ dctx->hostageByte = 0;
return ZSTD_frameHeaderSize_prefix;
}
-size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
+size_t ZSTD_setDStreamParameter(ZSTD_DStream* dctx,
ZSTD_DStreamParameter_e paramType, unsigned paramValue)
{
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
- if ((unsigned)zds->streamStage > (unsigned)zdss_loadHeader)
- return ERROR(stage_wrong);
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
switch(paramType)
{
default : return ERROR(parameter_unsupported);
case DStream_p_maxWindowSize :
DEBUGLOG(4, "setting maxWindowSize = %u KB", paramValue >> 10);
- zds->maxWindowSize = paramValue ? paramValue : (U32)(-1);
+ dctx->maxWindowSize = paramValue ? paramValue : (U32)(-1);
break;
}
return 0;
@@ -2330,9 +2665,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
{
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
- if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader)
- return ERROR(stage_wrong);
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
dctx->maxWindowSize = maxWindowSize;
return 0;
}
@@ -2340,17 +2673,15 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
{
DEBUGLOG(4, "ZSTD_DCtx_setFormat : %u", (unsigned)format);
- ZSTD_STATIC_ASSERT((unsigned)zdss_loadHeader >= (unsigned)zdss_init);
- if ((unsigned)dctx->streamStage > (unsigned)zdss_loadHeader)
- return ERROR(stage_wrong);
+ if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
dctx->format = format;
return 0;
}
-size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds)
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
{
- return ZSTD_sizeof_DCtx(zds);
+ return ZSTD_sizeof_DCtx(dctx);
}
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
@@ -2417,23 +2748,25 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
}
DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
-#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
- if (zds->legacyVersion) {
- /* legacy support is incompatible with static dctx */
- if (zds->staticSize) return ERROR(memory_allocation);
- return ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
- }
-#endif
-
while (someMoreWork) {
switch(zds->streamStage)
{
case zdss_init :
+ DEBUGLOG(5, "stage zdss_init => transparent reset ");
ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
/* fall-through */
case zdss_loadHeader :
DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
+#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
+ if (zds->legacyVersion) {
+ /* legacy support is incompatible with static dctx */
+ if (zds->staticSize) return ERROR(memory_allocation);
+ { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input);
+ if (hint==0) zds->streamStage = zdss_init;
+ return hint;
+ } }
+#endif
{ size_t const hSize = ZSTD_getFrameHeader_internal(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
DEBUGLOG(5, "header size : %u", (U32)hSize);
if (ZSTD_isError(hSize)) {
@@ -2442,14 +2775,17 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if (legacyVersion) {
const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
+ DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion);
/* legacy support is incompatible with static dctx */
if (zds->staticSize) return ERROR(memory_allocation);
CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext,
zds->previousLegacyVersion, legacyVersion,
dict, dictSize));
zds->legacyVersion = zds->previousLegacyVersion = legacyVersion;
- return ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
- }
+ { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input);
+ if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */
+ return hint;
+ } }
#endif
return hSize; /* error */
}
@@ -2559,6 +2895,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
if (ip==iend) { someMoreWork = 0; break; } /* no more input */
zds->streamStage = zdss_load;
/* fall-through */
+
case zdss_load:
{ size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
size_t const toLoad = neededInSize - zds->inPos;
@@ -2585,6 +2922,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
} }
zds->streamStage = zdss_flush;
/* fall-through */
+
case zdss_flush:
{ size_t const toFlushSize = zds->outEnd - zds->outStart;
size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize);
@@ -2631,8 +2969,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
return 1;
} /* nextSrcSizeHint==0 */
nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */
- if (zds->inPos > nextSrcSizeHint) return ERROR(GENERIC); /* should never happen */
- nextSrcSizeHint -= zds->inPos; /* already loaded*/
+ assert(zds->inPos <= nextSrcSizeHint);
+ nextSrcSizeHint -= zds->inPos; /* part already loaded*/
return nextSrcSizeHint;
}
}
diff --git a/thirdparty/zstd/zstd.h b/thirdparty/zstd/zstd.h
index 9ac0a73dce..6405da602e 100644
--- a/thirdparty/zstd/zstd.h
+++ b/thirdparty/zstd/zstd.h
@@ -45,11 +45,11 @@ extern "C" {
Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory.
Compression can be done in:
- a single step (described as Simple API)
- - a single step, reusing a context (described as Explicit memory management)
+ - a single step, reusing a context (described as Explicit context)
- unbounded multiple steps (described as Streaming compression)
The compression ratio achievable on small data can be highly improved using a dictionary in:
- a single step (described as Simple dictionary API)
- - a single step, reusing a dictionary (described as Fast dictionary API)
+ - a single step, reusing a dictionary (described as Bulk-processing dictionary API)
Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
Advanced experimental APIs shall never be used with a dynamic library.
@@ -59,7 +59,7 @@ extern "C" {
/*------ Version ------*/
#define ZSTD_VERSION_MAJOR 1
#define ZSTD_VERSION_MINOR 3
-#define ZSTD_VERSION_RELEASE 3
+#define ZSTD_VERSION_RELEASE 4
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll version */
@@ -68,7 +68,7 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll versio
#define ZSTD_QUOTE(str) #str
#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
-ZSTDLIB_API const char* ZSTD_versionString(void); /* v1.3.0 */
+ZSTDLIB_API const char* ZSTD_versionString(void); /* added in v1.3.0 */
/***************************************
@@ -92,7 +92,7 @@ ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
const void* src, size_t compressedSize);
-/*! ZSTD_getFrameContentSize() : v1.3.0
+/*! ZSTD_getFrameContentSize() : added in v1.3.0
* `src` should point to the start of a ZSTD encoded frame.
* `srcSize` must be at least as large as the frame header.
* hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
@@ -120,26 +120,24 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t
/*! ZSTD_getDecompressedSize() :
* NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
- * Both functions work the same way,
- * but ZSTD_getDecompressedSize() blends
- * "empty", "unknown" and "error" results in the same return value (0),
- * while ZSTD_getFrameContentSize() distinguishes them.
- *
- * 'src' is the start of a zstd compressed frame.
- * @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */
+ * Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ * "empty", "unknown" and "error" results to the same return value (0),
+ * while ZSTD_getFrameContentSize() gives them separate return values.
+ * `src` is the start of a zstd compressed frame.
+ * @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */
ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
/*====== Helper functions ======*/
#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
-ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case scenario */
+ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */
ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */
ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */
/***************************************
-* Explicit memory management
+* Explicit context
***************************************/
/*= Compression context
* When compressing many times,
@@ -345,7 +343,7 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output
* *******************************************************************************/
typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */
- /* Continue to distinguish them for compatibility with versions <= v1.2.0 */
+ /* For compatibility with versions <= v1.2.0, continue to consider them separated. */
/*===== ZSTD_DStream management functions =====*/
ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);
@@ -375,23 +373,24 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output
/* --- Constants ---*/
#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */
#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
-#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* v0.7+ */
+#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* >= v0.7.0 */
#define ZSTD_WINDOWLOG_MAX_32 30
#define ZSTD_WINDOWLOG_MAX_64 31
#define ZSTD_WINDOWLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
#define ZSTD_WINDOWLOG_MIN 10
-#define ZSTD_HASHLOG_MAX MIN(ZSTD_WINDOWLOG_MAX, 30)
+#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
#define ZSTD_HASHLOG_MIN 6
-#define ZSTD_CHAINLOG_MAX MIN(ZSTD_WINDOWLOG_MAX+1, 30)
+#define ZSTD_CHAINLOG_MAX_32 29
+#define ZSTD_CHAINLOG_MAX_64 30
+#define ZSTD_CHAINLOG_MAX ((unsigned)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN
#define ZSTD_HASHLOG3_MAX 17
#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1)
#define ZSTD_SEARCHLOG_MIN 1
#define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
#define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */
-#define ZSTD_TARGETLENGTH_MIN 4 /* only useful for btopt */
-#define ZSTD_TARGETLENGTH_MAX 999 /* only useful for btopt */
+#define ZSTD_TARGETLENGTH_MIN 1 /* only used by btopt, btultra and btfast */
#define ZSTD_LDM_MINMATCH_MIN 4
#define ZSTD_LDM_MINMATCH_MAX 4096
#define ZSTD_LDM_BUCKETSIZELOG_MAX 8
@@ -432,12 +431,17 @@ typedef struct {
typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
-/*--- Custom memory allocation functions ---*/
-typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
-typedef void (*ZSTD_freeFunction) (void* opaque, void* address);
-typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
-/* use this constant to defer to stdlib's functions */
-static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };
+typedef enum {
+ ZSTD_dct_auto=0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+ ZSTD_dct_rawContent, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+ ZSTD_dct_fullDict /* refuses to load a dictionary if it does not respect Zstandard's specification */
+} ZSTD_dictContentType_e;
+
+typedef enum {
+ ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
+ ZSTD_dlm_byRef, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
+
/***************************************
@@ -483,12 +487,12 @@ ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
/***************************************
-* Context memory usage
+* Memory management
***************************************/
/*! ZSTD_sizeof_*() :
* These functions give the current memory usage of selected object.
- * Object memory usage can evolve when re-used multiple times. */
+ * Object memory usage can evolve when re-used. */
ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
@@ -503,8 +507,8 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* It will also consider src size to be arbitrarily "large", which is worst case.
* If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
* ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is > 1.
- * Note : CCtx estimation is only correct for single-threaded compression */
+ * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbWorkers is >= 1.
+ * Note : CCtx size estimation is only correct for single-threaded compression. */
ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
@@ -515,8 +519,8 @@ ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
* It will also consider src size to be arbitrarily "large", which is worst case.
* If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
* ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbThreads is set to a value > 1.
- * Note : CStream estimation is only correct for single-threaded compression.
+ * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_p_nbWorkers is >= 1.
+ * Note : CStream size estimation is only correct for single-threaded compression.
* ZSTD_DStream memory budget depends on window Size.
* This information can be passed manually, using ZSTD_estimateDStreamSize,
* or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
@@ -529,84 +533,92 @@ ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_para
ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
-typedef enum {
- ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */
- ZSTD_dlm_byRef, /**< Reference dictionary content -- the dictionary buffer must outlive its users. */
-} ZSTD_dictLoadMethod_e;
-
/*! ZSTD_estimate?DictSize() :
* ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
- * ZSTD_estimateCStreamSize_advanced_usingCParams() makes it possible to control precisely compression parameters, like ZSTD_createCDict_advanced().
- * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller
+ * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
*/
ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
-
-/***************************************
-* Advanced compression functions
-***************************************/
-/*! ZSTD_createCCtx_advanced() :
- * Create a ZSTD compression context using external alloc and free functions */
-ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
-
-/*! ZSTD_initStaticCCtx() : initialize a fixed-size zstd compression context
- * workspace: The memory area to emplace the context into.
- * Provided pointer must 8-bytes aligned.
- * It must outlive context usage.
- * workspaceSize: Use ZSTD_estimateCCtxSize() or ZSTD_estimateCStreamSize()
- * to determine how large workspace must be to support scenario.
- * @return : pointer to ZSTD_CCtx* (same address as workspace, but different type),
- * or NULL if error (typically size too small)
- * Note : zstd will never resize nor malloc() when using a static cctx.
- * If it needs more memory than available, it will simply error out.
+/*! ZSTD_initStatic*() :
+ * Initialize an object using a pre-allocated fixed-size buffer.
+ * workspace: The memory area to emplace the object into.
+ * Provided pointer *must be 8-bytes aligned*.
+ * Buffer must outlive object.
+ * workspaceSize: Use ZSTD_estimate*Size() to determine
+ * how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ * or NULL if error (size too small, incorrect alignment, etc.)
+ * Note : zstd will never resize nor malloc() when using a static buffer.
+ * If the object requires more memory than available,
+ * zstd will just error out (typically ZSTD_error_memory_allocation).
* Note 2 : there is no corresponding "free" function.
- * Since workspace was allocated externally, it must be freed externally too.
- * Limitation 1 : currently not compatible with internal CDict creation, such as
- * ZSTD_CCtx_loadDictionary() or ZSTD_initCStream_usingDict().
- * Limitation 2 : currently not compatible with multi-threading
+ * Since workspace is allocated externally, it must be freed externally too.
+ * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ * into its associated cParams.
+ * Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ * Limitation 2 : static cctx currently not compatible with multi-threading.
+ * Limitation 3 : static dctx is incompatible with legacy support.
*/
-ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */
+ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */
-/*! ZSTD_createCDict_byReference() :
- * Create a digested dictionary for compression
- * Dictionary content is simply referenced, and therefore stays in dictBuffer.
- * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+ void* workspace, size_t workspaceSize,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType,
+ ZSTD_compressionParameters cParams);
+
+ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+ void* workspace, size_t workspaceSize,
+ const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType);
+
+/*! Custom memory allocation :
+ * These prototypes make it possible to pass your own allocation/free functions.
+ * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ * All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */
+
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-typedef enum { ZSTD_dm_auto=0, /* dictionary is "full" if it starts with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
- ZSTD_dm_rawContent, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
- ZSTD_dm_fullDict /* refuses to load a dictionary if it does not respect Zstandard's specification */
-} ZSTD_dictMode_e;
-/*! ZSTD_createCDict_advanced() :
- * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */
ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_dictMode_e dictMode,
+ ZSTD_dictContentType_e dictContentType,
ZSTD_compressionParameters cParams,
ZSTD_customMem customMem);
-/*! ZSTD_initStaticCDict() :
- * Generate a digested dictionary in provided memory area.
- * workspace: The memory area to emplace the dictionary into.
- * Provided pointer must 8-bytes aligned.
- * It must outlive dictionary usage.
- * workspaceSize: Use ZSTD_estimateCDictSize()
- * to determine how large workspace must be.
- * cParams : use ZSTD_getCParams() to transform a compression level
- * into its relevants cParams.
- * @return : pointer to ZSTD_CDict* (same address as workspace, but different type),
- * or NULL if error (typically, size too small).
- * Note : there is no corresponding "free" function.
- * Since workspace was allocated externally, it must be freed externally.
- */
-ZSTDLIB_API ZSTD_CDict* ZSTD_initStaticCDict(
- void* workspace, size_t workspaceSize,
- const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode,
- ZSTD_compressionParameters cParams);
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+ ZSTD_dictLoadMethod_e dictLoadMethod,
+ ZSTD_dictContentType_e dictContentType,
+ ZSTD_customMem customMem);
+
+
+
+/***************************************
+* Advanced compression functions
+***************************************/
+
+/*! ZSTD_createCDict_byReference() :
+ * Create a digested dictionary for compression
+ * Dictionary content is simply referenced, and therefore stays in dictBuffer.
+ * It is important that dictBuffer outlives CDict, it must remain read accessible throughout the lifetime of CDict */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
/*! ZSTD_getCParams() :
* @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
@@ -652,28 +664,6 @@ ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
* Note 3 : Skippable Frame Identifiers are considered valid. */
ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
-/*! ZSTD_createDCtx_advanced() :
- * Create a ZSTD decompression context using external alloc and free functions */
-ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
-
-/*! ZSTD_initStaticDCtx() : initialize a fixed-size zstd decompression context
- * workspace: The memory area to emplace the context into.
- * Provided pointer must 8-bytes aligned.
- * It must outlive context usage.
- * workspaceSize: Use ZSTD_estimateDCtxSize() or ZSTD_estimateDStreamSize()
- * to determine how large workspace must be to support scenario.
- * @return : pointer to ZSTD_DCtx* (same address as workspace, but different type),
- * or NULL if error (typically size too small)
- * Note : zstd will never resize nor malloc() when using a static dctx.
- * If it needs more memory than available, it will simply error out.
- * Note 2 : static dctx is incompatible with legacy support
- * Note 3 : there is no corresponding "free" function.
- * Since workspace was allocated externally, it must be freed externally.
- * Limitation : currently not compatible with internal DDict creation,
- * such as ZSTD_initDStream_usingDict().
- */
-ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
-
/*! ZSTD_createDDict_byReference() :
* Create a digested dictionary, ready to start decompression operation without startup delay.
* Dictionary content is referenced, and therefore stays in dictBuffer.
@@ -681,26 +671,6 @@ ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize
* it must remain read accessible throughout the lifetime of DDict */
ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
-/*! ZSTD_createDDict_advanced() :
- * Create a ZSTD_DDict using external alloc and free, optionally by reference */
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod,
- ZSTD_customMem customMem);
-
-/*! ZSTD_initStaticDDict() :
- * Generate a digested dictionary in provided memory area.
- * workspace: The memory area to emplace the dictionary into.
- * Provided pointer must 8-bytes aligned.
- * It must outlive dictionary usage.
- * workspaceSize: Use ZSTD_estimateDDictSize()
- * to determine how large workspace must be.
- * @return : pointer to ZSTD_DDict*, or NULL if error (size too small)
- * Note : there is no corresponding "free" function.
- * Since workspace was allocated externally, it must be freed externally.
- */
-ZSTDLIB_API ZSTD_DDict* ZSTD_initStaticDDict(void* workspace, size_t workspaceSize,
- const void* dict, size_t dictSize,
- ZSTD_dictLoadMethod_e dictLoadMethod);
/*! ZSTD_getDictID_fromDict() :
* Provides the dictID stored within dictionary.
@@ -732,8 +702,6 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
********************************************************************/
/*===== Advanced Streaming compression functions =====*/
-ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */
ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */
ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/
ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize,
@@ -748,14 +716,28 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const
* If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
* If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
* For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
- * but it may change to mean "empty" in some future version, so prefer using macro ZSTD_CONTENTSIZE_UNKNOWN.
+ * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+typedef struct {
+ unsigned long long ingested;
+ unsigned long long consumed;
+ unsigned long long produced;
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression():
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Therefore, (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Can report progression inside worker threads (multi-threading and non-blocking mode).
+ */
+ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+
+
+
/*===== Advanced Streaming decompression functions =====*/
-ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */
typedef enum { DStream_p_maxWindowSize } ZSTD_DStreamParameter_e;
ZSTDLIB_API size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds, ZSTD_DStreamParameter_e paramType, unsigned paramValue); /* obsolete : this API will be removed in a future version */
ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */
@@ -924,10 +906,8 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
* and then applied on all subsequent compression jobs.
* When no parameter is ever provided, CCtx is created with compression level ZSTD_CLEVEL_DEFAULT.
*
- * This API is intended to replace all others experimental API.
- * It can basically do all other use cases, and even new ones.
- * In constrast with _advanced() variants, it stands a reasonable chance to become "stable",
- * after a good testing period.
+ * This API is intended to replace all others advanced / experimental API entry points.
+ * But it stands a reasonable chance to become "stable", after a reasonable testing period.
*/
/* note on naming convention :
@@ -944,12 +924,12 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
* All enum will be pinned to explicit values before reaching "stable API" status */
typedef enum {
- /* Question : should we have a format ZSTD_f_auto ?
- * For the time being, it would mean exactly the same as ZSTD_f_zstd1.
- * But, in the future, should several formats be supported,
+ /* Opened question : should we have a format ZSTD_f_auto ?
+ * Today, it would mean exactly the same as ZSTD_f_zstd1.
+ * But, in the future, should several formats become supported,
* on the compression side, it would mean "default format".
- * On the decompression side, it would mean "multi format",
- * and ZSTD_f_zstd1 could be reserved to mean "accept *only* zstd frames".
+ * On the decompression side, it would mean "automatic format detection",
+ * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
* Since meaning is a little different, another option could be to define different enums for compression and decompression.
* This question could be kept for later, when there are actually multiple formats to support,
* but there is also the question of pinning enum values, and pinning value `0` is especially important */
@@ -967,42 +947,76 @@ typedef enum {
/* compression parameters */
ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
* Default level is ZSTD_CLEVEL_DEFAULT==3.
- * Special: value 0 means "do not change cLevel". */
+ * Special: value 0 means "do not change cLevel".
+ * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
+ * Note 2 : setting a level sets all default values of other compression parameters.
+ * Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
ZSTD_p_windowLog, /* Maximum allowed back-reference distance, expressed as power of 2.
* Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
- * Special: value 0 means "do not change windowLog".
+ * Special: value 0 means "use default windowLog".
* Note: Using a window size greater than ZSTD_MAXWINDOWSIZE_DEFAULT (default: 2^27)
- * requires setting the maximum window size at least as large during decompression. */
+ * requires explicitly allowing such window size during decompression stage. */
ZSTD_p_hashLog, /* Size of the probe table, as a power of 2.
* Resulting table size is (1 << (hashLog+2)).
* Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
* Larger tables improve compression ratio of strategies <= dFast,
* and improve speed of strategies > dFast.
- * Special: value 0 means "do not change hashLog". */
+ * Special: value 0 means "use default hashLog". */
ZSTD_p_chainLog, /* Size of the full-search table, as a power of 2.
* Resulting table size is (1 << (chainLog+2)).
* Larger tables result in better and slower compression.
* This parameter is useless when using "fast" strategy.
- * Special: value 0 means "do not change chainLog". */
+ * Special: value 0 means "use default chainLog". */
ZSTD_p_searchLog, /* Number of search attempts, as a power of 2.
* More attempts result in better and slower compression.
* This parameter is useless when using "fast" and "dFast" strategies.
- * Special: value 0 means "do not change searchLog". */
+ * Special: value 0 means "use default searchLog". */
ZSTD_p_minMatch, /* Minimum size of searched matches (note : repCode matches can be smaller).
* Larger values make faster compression and decompression, but decrease ratio.
* Must be clamped between ZSTD_SEARCHLENGTH_MIN and ZSTD_SEARCHLENGTH_MAX.
* Note that currently, for all strategies < btopt, effective minimum is 4.
- * Note that currently, for all strategies > fast, effective maximum is 6.
- * Special: value 0 means "do not change minMatchLength". */
- ZSTD_p_targetLength, /* Only useful for strategies >= btopt.
- * Length of Match considered "good enough" to stop search.
- * Larger values make compression stronger and slower.
- * Special: value 0 means "do not change targetLength". */
+ * , for all strategies > fast, effective maximum is 6.
+ * Special: value 0 means "use default minMatchLength". */
+ ZSTD_p_targetLength, /* Impact of this field depends on strategy.
+ * For strategies btopt & btultra:
+ * Length of Match considered "good enough" to stop search.
+ * Larger values make compression stronger, and slower.
+ * For strategy fast:
+ * Distance between match sampling.
+ * Larger values make compression faster, and weaker.
+ * Special: value 0 means "use default targetLength". */
ZSTD_p_compressionStrategy, /* See ZSTD_strategy enum definition.
* Cast selected strategy as unsigned for ZSTD_CCtx_setParameter() compatibility.
* The higher the value of selected strategy, the more complex it is,
* resulting in stronger and slower compression.
- * Special: value 0 means "do not change strategy". */
+ * Special: value 0 means "use default strategy". */
+
+ ZSTD_p_enableLongDistanceMatching=160, /* Enable long distance matching.
+ * This parameter is designed to improve compression ratio
+ * for large inputs, by finding large matches at long distance.
+ * It increases memory usage and window size.
+ * Note: enabling this parameter increases ZSTD_p_windowLog to 128 MB
+ * except when expressly set to a different value. */
+ ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2.
+ * Larger values increase memory usage and compression ratio,
+ * but decrease compression speed.
+ * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+ * default: windowlog - 7.
+ * Special: value 0 means "automatically determine hashlog". */
+ ZSTD_p_ldmMinMatch, /* Minimum match size for long distance matcher.
+ * Larger/too small values usually decrease compression ratio.
+ * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+ * Special: value 0 means "use default value" (default: 64). */
+ ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution.
+ * Larger values improve collision resolution but decrease compression speed.
+ * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX .
+ * Special: value 0 means "use default value" (default: 3). */
+ ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table.
+ * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+ * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+ * Larger values improve compression speed.
+ * Deviating far from default value will likely result in a compression ratio decrease.
+ * Special: value 0 means "automatically determine hashEveryLog". */
/* frame parameters */
ZSTD_p_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
@@ -1012,58 +1026,45 @@ typedef enum {
ZSTD_p_dictIDFlag, /* When applicable, dictionary's ID is written into frame header (default:1) */
/* multi-threading parameters */
- ZSTD_p_nbThreads=400, /* Select how many threads a compression job can spawn (default:1)
- * More threads improve speed, but also increase memory usage.
- * Can only receive a value > 1 if ZSTD_MULTITHREAD is enabled.
- * Special: value 0 means "do not change nbThreads" */
- ZSTD_p_jobSize, /* Size of a compression job. This value is only enforced in streaming (non-blocking) mode.
- * Each compression job is completed in parallel, so indirectly controls the nb of active threads.
+ /* These parameters are only useful if multi-threading is enabled (ZSTD_MULTITHREAD).
+ * They return an error otherwise. */
+ ZSTD_p_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel.
+ * When nbWorkers >= 1, triggers asynchronous mode :
+ * ZSTD_compress_generic() consumes some input, flush some output if possible, and immediately gives back control to caller,
+ * while compression work is performed in parallel, within worker threads.
+ * (note : a strong exception to this rule is when first invocation sets ZSTD_e_end : it becomes a blocking call).
+ * More workers improve speed, but also increase memory usage.
+ * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */
+ ZSTD_p_jobSize, /* Size of a compression job. This value is enforced only in non-blocking mode.
+ * Each compression job is completed in parallel, so this value indirectly controls the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters.
- * Job size must be a minimum of overlapSize, or 1 KB, whichever is largest
+ * Job size must be a minimum of overlapSize, or 1 MB, whichever is largest.
* The minimum size is automatically and transparently enforced */
ZSTD_p_overlapSizeLog, /* Size of previous input reloaded at the beginning of each job.
* 0 => no overlap, 6(default) => use 1/8th of windowSize, >=9 => use full windowSize */
- /* advanced parameters - may not remain available after API update */
+ /* =================================================================== */
+ /* experimental parameters - no stability guaranteed */
+ /* =================================================================== */
+
+ ZSTD_p_compressLiterals=1000, /* control huffman compression of literals (enabled) by default.
+ * disabling it improves speed and decreases compression ratio by a large amount.
+ * note : this setting is automatically updated when changing compression level.
+ * positive compression levels set ZSTD_p_compressLiterals to 1.
+ * negative compression levels set ZSTD_p_compressLiterals to 0. */
+
ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
* even when referencing into Dictionary content (default:0) */
- ZSTD_p_enableLongDistanceMatching=1200, /* Enable long distance matching.
- * This parameter is designed to improve the compression
- * ratio for large inputs with long distance matches.
- * This increases the memory usage as well as window size.
- * Note: setting this parameter sets all the LDM parameters
- * as well as ZSTD_p_windowLog. It should be set after
- * ZSTD_p_compressionLevel and before ZSTD_p_windowLog and
- * other LDM parameters. Setting the compression level
- * after this parameter overrides the window log, though LDM
- * will remain enabled until explicitly disabled. */
- ZSTD_p_ldmHashLog, /* Size of the table for long distance matching, as a power of 2.
- * Larger values increase memory usage and compression ratio, but decrease
- * compression speed.
- * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
- * (default: windowlog - 7). */
- ZSTD_p_ldmMinMatch, /* Minimum size of searched matches for long distance matcher.
- * Larger/too small values usually decrease compression ratio.
- * Must be clamped between ZSTD_LDM_MINMATCH_MIN
- * and ZSTD_LDM_MINMATCH_MAX (default: 64). */
- ZSTD_p_ldmBucketSizeLog, /* Log size of each bucket in the LDM hash table for collision resolution.
- * Larger values usually improve collision resolution but may decrease
- * compression speed.
- * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX (default: 3). */
- ZSTD_p_ldmHashEveryLog, /* Frequency of inserting/looking up entries in the LDM hash table.
- * The default is MAX(0, (windowLog - ldmHashLog)) to
- * optimize hash table usage.
- * Larger values improve compression speed. Deviating far from the
- * default value will likely result in a decrease in compression ratio.
- * Must be clamped between 0 and ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN. */
} ZSTD_cParameter;
/*! ZSTD_CCtx_setParameter() :
* Set one compression parameter, selected by enum ZSTD_cParameter.
+ * Setting a parameter is generally only possible during frame initialization (before starting compression),
+ * except for a few exceptions which can be updated during compression: compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
* Note : when `value` is an enum, cast it to unsigned for proper type checking.
- * @result : informational value (typically, the one being set, possibly corrected),
+ * @result : informational value (typically, value being set clamped correctly),
* or an error code (which can be tested with ZSTD_isError()). */
ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
@@ -1079,26 +1080,24 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param
ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
/*! ZSTD_CCtx_loadDictionary() :
- * Create an internal CDict from dict buffer.
- * Decompression will have to use same buffer.
+ * Create an internal CDict from `dict` buffer.
+ * Decompression will have to use same dictionary.
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
- * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
- * meaning "return to no-dictionary mode".
- * Note 1 : `dict` content will be copied internally. Use
- * ZSTD_CCtx_loadDictionary_byReference() to reference dictionary
- * content instead. The dictionary buffer must then outlive its
- * users.
+ * Special: Adding a NULL (or 0-size) dictionary invalidates previous dictionary,
+ * meaning "return to no-dictionary mode".
+ * Note 1 : Dictionary will be used for all future compression jobs.
+ * To return to "no-dictionary" situation, load a NULL dictionary
* Note 2 : Loading a dictionary involves building tables, which are dependent on compression parameters.
* For this reason, compression parameters cannot be changed anymore after loading a dictionary.
- * It's also a CPU-heavy operation, with non-negligible impact on latency.
- * Note 3 : Dictionary will be used for all future compression jobs.
- * To return to "no-dictionary" situation, load a NULL dictionary
- * Note 5 : Use ZSTD_CCtx_loadDictionary_advanced() to select how dictionary
- * content will be interpreted.
- */
+ * It's also a CPU consuming operation, with non-negligible impact on latency.
+ * Note 3 :`dict` content will be copied internally.
+ * Use ZSTD_CCtx_loadDictionary_byReference() to reference dictionary content instead.
+ * In such a case, dictionary buffer must outlive its users.
+ * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ * to precisely select how dictionary content must be interpreted. */
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
-ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode);
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
/*! ZSTD_CCtx_refCDict() :
@@ -1110,8 +1109,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void
* Special : adding a NULL CDict means "return to no-dictionary mode".
* Note 1 : Currently, only one dictionary can be managed.
* Adding a new dictionary effectively "discards" any previous one.
- * Note 2 : CDict is just referenced, its lifetime must outlive CCtx.
- */
+ * Note 2 : CDict is just referenced, its lifetime must outlive CCtx. */
ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
/*! ZSTD_CCtx_refPrefix() :
@@ -1121,20 +1119,29 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
* Subsequent compression jobs will be done without prefix (if none is explicitly referenced).
* If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead.
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
- * Special : Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
* Note 1 : Prefix buffer is referenced. It must outlive compression job.
* Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
- * It's a CPU-heavy operation, with non-negligible impact on latency.
- * Note 3 : By default, the prefix is treated as raw content
- * (ZSTD_dm_rawContent). Use ZSTD_CCtx_refPrefix_advanced() to alter
- * dictMode. */
+ * It's a CPU consuming operation, with non-negligible impact on latency.
+ * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
+ * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. */
ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);
-ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode);
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_CCtx_reset() :
+ * Return a CCtx to clean state.
+ * Useful after an error, or to interrupt an ongoing compression job and start a new one.
+ * Any internal data not yet flushed is cancelled.
+ * Dictionary (if any) is dropped.
+ * All parameters are back to default values.
+ * It's possible to modify compression parameters after a reset.
+ */
+ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx);
typedef enum {
- ZSTD_e_continue=0, /* collect more data, encoder transparently decides when to output result, for optimal conditions */
+ ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal conditions */
ZSTD_e_flush, /* flush any data provided so far - frame will continue, future data can still reference previous data for better compression */
ZSTD_e_end /* flush any remaining data and close current frame. Any additional data starts a new frame. */
} ZSTD_EndDirective;
@@ -1150,10 +1157,11 @@ typedef enum {
* and then immediately returns, just indicating that there is some data remaining to be flushed.
* The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
* - Exception : in multi-threading mode, if the first call requests a ZSTD_e_end directive, it is blocking : it will complete compression before giving back control to caller.
- * - @return provides the minimum amount of data remaining to be flushed from internal buffers
+ * - @return provides a minimum amount of data remaining to be flushed from internal buffers
* or an error code, which can be tested using ZSTD_isError().
* if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
- * This is useful to determine if a ZSTD_e_flush or ZSTD_e_end directive is completed.
+ * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
* - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
* only ZSTD_e_end or ZSTD_e_flush operations are allowed.
* Before starting a new compression job, or changing compression parameters,
@@ -1164,16 +1172,6 @@ ZSTDLIB_API size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
ZSTD_inBuffer* input,
ZSTD_EndDirective endOp);
-/*! ZSTD_CCtx_reset() :
- * Return a CCtx to clean state.
- * Useful after an error, or to interrupt an ongoing compression job and start a new one.
- * Any internal data not yet flushed is cancelled.
- * Dictionary (if any) is dropped.
- * All parameters are back to default values.
- * It's possible to modify compression parameters after a reset.
- */
-ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx); /* Not ready yet ! */
-
/*! ZSTD_compress_generic_simpleArgs() :
* Same as ZSTD_compress_generic(),
@@ -1207,25 +1205,26 @@ ZSTDLIB_API size_t ZSTD_compress_generic_simpleArgs (
* for static allocation for single-threaded compression.
*/
ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
-/*! ZSTD_resetCCtxParams() :
- * Reset params to default, with the default compression level.
+
+/*! ZSTD_CCtxParams_reset() :
+ * Reset params to default values.
*/
-ZSTDLIB_API size_t ZSTD_resetCCtxParams(ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
-/*! ZSTD_initCCtxParams() :
+/*! ZSTD_CCtxParams_init() :
* Initializes the compression parameters of cctxParams according to
* compression level. All other parameters are reset to their default values.
*/
-ZSTDLIB_API size_t ZSTD_initCCtxParams(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
-/*! ZSTD_initCCtxParams_advanced() :
+/*! ZSTD_CCtxParams_init_advanced() :
* Initializes the compression and frame parameters of cctxParams according to
* params. All other parameters are reset to their default values.
*/
-ZSTDLIB_API size_t ZSTD_initCCtxParams_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
-ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);
/*! ZSTD_CCtxParam_setParameter() :
* Similar to ZSTD_CCtx_setParameter.
@@ -1238,9 +1237,10 @@ ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cP
/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
* Apply a set of ZSTD_CCtx_params to the compression context.
- * This must be done before the dictionary is loaded.
- * The pledgedSrcSize is treated as unknown.
- * Multithreading parameters are applied only if nbThreads > 1.
+ * This can be done even after compression is started,
+ * if nbWorkers==0, this will have no impact until a new compression is started.
+ * if nbWorkers>=1, new parameters will be picked up at next job,
+ * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
*/
ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
@@ -1267,9 +1267,9 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
* Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to select
* how dictionary content will be interpreted and loaded.
*/
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); /* not implemented */
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); /* not implemented */
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictMode_e dictMode); /* not implemented */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
/*! ZSTD_DCtx_refDDict() :
@@ -1281,7 +1281,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void
* Special : adding a NULL DDict means "return to no-dictionary mode".
* Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
*/
-ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); /* not implemented */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
/*! ZSTD_DCtx_refPrefix() :
@@ -1295,8 +1295,8 @@ ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
* Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode.
* Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
*/
-ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize); /* not implemented */
-ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictMode_e dictMode); /* not implemented */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize);
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
/*! ZSTD_DCtx_setMaxWindowSize() :
@@ -1389,7 +1389,7 @@ ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DCtx* dctx);
ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx);
ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression */
+ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */