diff options
118 files changed, 3490 insertions, 1730 deletions
diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index 757913f0bd..6122452b88 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -380,7 +380,7 @@ License: Zlib Files: ./thirdparty/zstd/ Comment: Zstandard -Copyright: 2016-2019, Facebook, Inc. +Copyright: 2016-2020, Facebook, Inc. License: BSD-3-clause diff --git a/core/SCsub b/core/SCsub index 16ee49d003..59fe63b4b3 100644 --- a/core/SCsub +++ b/core/SCsub @@ -121,6 +121,7 @@ if env["builtin_zstd"]: "compress/zstdmt_compress.c", "compress/zstd_compress_literals.c", "compress/zstd_compress_sequences.c", + "compress/zstd_compress_superblock.c", "decompress/huf_decompress.c", "decompress/zstd_ddict.c", "decompress/zstd_decompress_block.c", diff --git a/core/io/file_access_zip.h b/core/io/file_access_zip.h index 2cce24e878..c251b3c424 100644 --- a/core/io/file_access_zip.h +++ b/core/io/file_access_zip.h @@ -79,7 +79,7 @@ public: }; class FileAccessZip : public FileAccess { - unzFile zfile; + unzFile zfile = nullptr; unz_file_info64 file_info; mutable bool at_eof; diff --git a/core/math/math_funcs.h b/core/math/math_funcs.h index 9f8d4da5b3..24b42790f0 100644 --- a/core/math/math_funcs.h +++ b/core/math/math_funcs.h @@ -46,7 +46,8 @@ class Math { public: Math() {} // useless to instance - static const uint64_t RANDOM_MAX = 0xFFFFFFFF; + // Not using 'RANDOM_MAX' to avoid conflict with system headers on some OSes (at least NetBSD). + static const uint64_t RANDOM_32BIT_MAX = 0xFFFFFFFF; static _ALWAYS_INLINE_ double sin(double p_x) { return ::sin(p_x); } static _ALWAYS_INLINE_ float sin(float p_x) { return ::sinf(p_x); } @@ -283,8 +284,8 @@ public: static void randomize(); static uint32_t rand_from_seed(uint64_t *seed); static uint32_t rand(); - static _ALWAYS_INLINE_ double randd() { return (double)rand() / (double)Math::RANDOM_MAX; } - static _ALWAYS_INLINE_ float randf() { return (float)rand() / (float)Math::RANDOM_MAX; } + static _ALWAYS_INLINE_ double randd() { return (double)rand() / (double)Math::RANDOM_32BIT_MAX; } + static _ALWAYS_INLINE_ float randf() { return (float)rand() / (float)Math::RANDOM_32BIT_MAX; } static double random(double from, double to); static float random(float from, float to); diff --git a/core/math/random_pcg.h b/core/math/random_pcg.h index 8fd5a056fa..09b13ab74d 100644 --- a/core/math/random_pcg.h +++ b/core/math/random_pcg.h @@ -31,12 +31,12 @@ #ifndef RANDOM_PCG_H #define RANDOM_PCG_H -#include <math.h> - #include "core/math/math_defs.h" #include "thirdparty/misc/pcg.h" +#include <math.h> + #if defined(__GNUC__) #define CLZ32(x) __builtin_clz(x) #elif defined(_MSC_VER) @@ -67,7 +67,6 @@ class RandomPCG { public: static const uint64_t DEFAULT_SEED = 12047754176567800795U; static const uint64_t DEFAULT_INC = PCG_DEFAULT_INC_64; - static const uint64_t RANDOM_MAX = 0xFFFFFFFF; RandomPCG(uint64_t p_seed = DEFAULT_SEED, uint64_t p_inc = DEFAULT_INC); diff --git a/core/register_core_types.cpp b/core/register_core_types.cpp index 4f094dd6c6..a2b3f75bea 100644 --- a/core/register_core_types.cpp +++ b/core/register_core_types.cpp @@ -232,7 +232,7 @@ void register_core_types() { } void register_core_settings() { - //since in register core types, globals may not e present + // Since in register core types, globals may not be present. GLOBAL_DEF("network/limits/tcp/connect_timeout_seconds", (30)); ProjectSettings::get_singleton()->set_custom_property_info("network/limits/tcp/connect_timeout_seconds", PropertyInfo(Variant::INT, "network/limits/tcp/connect_timeout_seconds", PROPERTY_HINT_RANGE, "1,1800,1")); GLOBAL_DEF_RST("network/limits/packet_peer_stream/max_buffer_po2", (16)); diff --git a/core/translation_po.h b/core/translation_po.h index 730635f63d..88830210ef 100644 --- a/core/translation_po.h +++ b/core/translation_po.h @@ -42,7 +42,7 @@ class TranslationPO : public Translation { // TLDR: Maps context to a list of source strings and translated strings. In PO terms, maps msgctxt to a list of msgid and msgstr. // The first key corresponds to context, and the second key (of the contained HashMap) corresponds to source string. // The value Vector<StringName> in the second map stores the translated strings. Index 0, 1, 2 matches msgstr[0], msgstr[1], msgstr[2]... in the case of plurals. - // Otherwise index 0 mathes to msgstr in a singular translation. + // Otherwise index 0 matches to msgstr in a singular translation. // Strings without context have "" as first key. HashMap<StringName, HashMap<StringName, Vector<StringName>>> translation_map; diff --git a/doc/classes/AABB.xml b/doc/classes/AABB.xml index cbc04ca672..faa380ff33 100644 --- a/doc/classes/AABB.xml +++ b/doc/classes/AABB.xml @@ -4,7 +4,9 @@ Axis-Aligned Bounding Box. </brief_description> <description> - AABB consists of a position, a size, and several utility functions. It is typically used for fast overlap tests. + [AABB] consists of a position, a size, and several utility functions. It is typically used for fast overlap tests. + It uses floating-point coordinates. The 2D counterpart to [AABB] is [Rect2]. + [b]Note:[/b] Unlike [Rect2], [AABB] does not have a variant that uses integer coordinates. </description> <tutorials> <link title="Math tutorial index">https://docs.godotengine.org/en/latest/tutorials/math/index.html</link> diff --git a/doc/classes/Array.xml b/doc/classes/Array.xml index 20579e0159..8bbb0817ea 100644 --- a/doc/classes/Array.xml +++ b/doc/classes/Array.xml @@ -292,7 +292,7 @@ <return type="Variant"> </return> <description> - Removes and returns the first element of the array. Returns [code]null[/code] if the array is empty, wwithout printing an error message. + Removes and returns the first element of the array. Returns [code]null[/code] if the array is empty, without printing an error message. </description> </method> <method name="push_back"> diff --git a/doc/classes/HTTPRequest.xml b/doc/classes/HTTPRequest.xml index dc88c53810..8cc7ecfbe3 100644 --- a/doc/classes/HTTPRequest.xml +++ b/doc/classes/HTTPRequest.xml @@ -146,7 +146,7 @@ <members> <member name="accept_gzip" type="bool" setter="set_accept_gzip" getter="is_accepting_gzip" default="true"> If [code]true[/code], this header will be added to each request: [code]Accept-Encoding: gzip, deflate[/code] telling servers that it's okay to compress response bodies. - Any Reponse body declaring a [code]Content-Encoding[/code] of either [code]gzip[/code] or [code]deflate[/code] will then be automatically decompressed, and the uncompressed bytes will be delivered via [code]request_completed[/code]. + Any Response body declaring a [code]Content-Encoding[/code] of either [code]gzip[/code] or [code]deflate[/code] will then be automatically decompressed, and the uncompressed bytes will be delivered via [code]request_completed[/code]. If the user has specified their own [code]Accept-Encoding[/code] header, then no header will be added regaurdless of [code]accept_gzip[/code]. If [code]false[/code] no header will be added, and no decompression will be performed on response bodies. The raw bytes of the response body will be returned via [code]request_completed[/code]. </member> diff --git a/doc/classes/JavaScript.xml b/doc/classes/JavaScript.xml index d2cb558385..c707a72ee8 100644 --- a/doc/classes/JavaScript.xml +++ b/doc/classes/JavaScript.xml @@ -5,6 +5,7 @@ </brief_description> <description> The JavaScript singleton is implemented only in the HTML5 export. It's used to access the browser's JavaScript context. This allows interaction with embedding pages or calling third-party JavaScript APIs. + [b]Note:[/b] This singleton can be disabled at build-time to improve security. By default, the JavaScript singleton is enabled. Official export templates also have the JavaScript singleton enabled. See [url=https://docs.godotengine.org/en/latest/development/compiling/compiling_for_web.html]Compiling for the Web[/url] in the documentation for more information. </description> <tutorials> <link title="Exporting for the Web: Calling JavaScript from script">https://docs.godotengine.org/en/latest/getting_started/workflow/export/exporting_for_web.html#calling-javascript-from-script</link> diff --git a/doc/classes/NavigationMesh.xml b/doc/classes/NavigationMesh.xml index 6deca4394f..a2fa31bf65 100644 --- a/doc/classes/NavigationMesh.xml +++ b/doc/classes/NavigationMesh.xml @@ -80,14 +80,17 @@ <member name="agent/height" type="float" setter="set_agent_height" getter="get_agent_height" default="2.0"> </member> <member name="agent/max_climb" type="float" setter="set_agent_max_climb" getter="get_agent_max_climb" default="0.9"> + The maximum height difference between two areas for navigation to be generated between them. </member> <member name="agent/max_slope" type="float" setter="set_agent_max_slope" getter="get_agent_max_slope" default="45.0"> + The maximum angle a slope can be at for navigation to be generated on it. </member> <member name="agent/radius" type="float" setter="set_agent_radius" getter="get_agent_radius" default="0.6"> </member> <member name="cell/height" type="float" setter="set_cell_height" getter="get_cell_height" default="0.2"> </member> <member name="cell/size" type="float" setter="set_cell_size" getter="get_cell_size" default="0.3"> + The size of cells in the [NavigationMesh]. </member> <member name="detail/sample_distance" type="float" setter="set_detail_sample_distance" getter="get_detail_sample_distance" default="6.0"> </member> @@ -104,18 +107,25 @@ <member name="filter/low_hanging_obstacles" type="bool" setter="set_filter_low_hanging_obstacles" getter="get_filter_low_hanging_obstacles" default="false"> </member> <member name="geometry/collision_mask" type="int" setter="set_collision_mask" getter="get_collision_mask"> + The physics layers used to generate the [NavigationMesh]. </member> <member name="geometry/parsed_geometry_type" type="int" setter="set_parsed_geometry_type" getter="get_parsed_geometry_type" default="0"> + What kind of geomerty is used to generate the [NavigationMesh]. </member> <member name="geometry/source_geometry_mode" type="int" setter="set_source_geometry_mode" getter="get_source_geometry_mode" default="0"> + Which geometry is used to generate the [NavigationMesh]. </member> <member name="geometry/source_group_name" type="StringName" setter="set_source_group_name" getter="get_source_group_name"> + The name of the group that is used to generate the [NavigationMesh]. </member> <member name="polygon/verts_per_poly" type="float" setter="set_verts_per_poly" getter="get_verts_per_poly" default="6.0"> + The number of vertices to use per polygon. Higher values will improve performance at the cost of lower precision. </member> <member name="region/merge_size" type="float" setter="set_region_merge_size" getter="get_region_merge_size" default="20.0"> + If two adjacent regions' edges are separated by a distance lower than this value, the regions will be merged together. </member> <member name="region/min_size" type="float" setter="set_region_min_size" getter="get_region_min_size" default="8.0"> + The minimum size of a region for it to be created. </member> <member name="sample_partition_type/sample_partition_type" type="int" setter="set_sample_partition_type" getter="get_sample_partition_type" default="0"> </member> diff --git a/doc/classes/PackedByteArray.xml b/doc/classes/PackedByteArray.xml index 0fcfed0595..0b43522bce 100644 --- a/doc/classes/PackedByteArray.xml +++ b/doc/classes/PackedByteArray.xml @@ -66,9 +66,8 @@ </argument> <description> Returns a new [PackedByteArray] with the data decompressed. Set the compression mode using one of [enum File.CompressionMode]'s constants. [b]This method only accepts gzip and deflate compression modes.[/b] - This method is potentially slower than [code]decompress[/code], as it may have to re-allocate it's output buffer multiple times while decompressing, where as [code]decompress[/code] knows it's output buffer size from the begining. - - GZIP has a maximal compression ratio of 1032:1, meaning it's very possible for a small compressed payload to decompress to a potentially very large output. To guard against this, you may provide a maximum size this function is allowed to allocate in bytes via [code]max_output_size[/code]. Passing -1 will allow for unbounded output. If any positive value is passed, and the decompression exceeds that ammount in bytes, then an error will be returned. + This method is potentially slower than [code]decompress[/code], as it may have to re-allocate it's output buffer multiple times while decompressing, where as [code]decompress[/code] knows it's output buffer size from the beginning. + GZIP has a maximal compression ratio of 1032:1, meaning it's very possible for a small compressed payload to decompress to a potentially very large output. To guard against this, you may provide a maximum size this function is allowed to allocate in bytes via [code]max_output_size[/code]. Passing -1 will allow for unbounded output. If any positive value is passed, and the decompression exceeds that amount in bytes, then an error will be returned. </description> </method> <method name="empty"> @@ -89,21 +88,21 @@ <return type="String"> </return> <description> - Converts UTF-16 encoded array to [String]. If the BOM is missing, system endianness is assumed. Returns empty string if source array is not vaild UTF-16 string. + Converts UTF-16 encoded array to [String]. If the BOM is missing, system endianness is assumed. Returns empty string if source array is not valid UTF-16 string. </description> </method> <method name="get_string_from_utf32"> <return type="String"> </return> <description> - Converts UTF-32 encoded array to [String]. System endianness is assumed. Returns empty string if source array is not vaild UTF-32 string. + Converts UTF-32 encoded array to [String]. System endianness is assumed. Returns empty string if source array is not valid UTF-32 string. </description> </method> <method name="get_string_from_utf8"> <return type="String"> </return> <description> - Converts UTF-8 encoded array to [String]. Slower than [method get_string_from_ascii] but supports UTF-8 encoded data. Use this function if you are unsure about the source of the data. For user input this function should always be preferred. Returns empty string if source array is not vaild UTF-8 string. + Converts UTF-8 encoded array to [String]. Slower than [method get_string_from_ascii] but supports UTF-8 encoded data. Use this function if you are unsure about the source of the data. For user input this function should always be preferred. Returns empty string if source array is not valid UTF-8 string. </description> </method> <method name="has"> diff --git a/doc/classes/Rect2.xml b/doc/classes/Rect2.xml index fe05f14fa1..2d1685871d 100644 --- a/doc/classes/Rect2.xml +++ b/doc/classes/Rect2.xml @@ -5,7 +5,8 @@ </brief_description> <description> [Rect2] consists of a position, a size, and several utility functions. It is typically used for fast overlap tests. - It uses floating point coordinates. + It uses floating-point coordinates. If you need integer coordinates, use [Rect2i] instead. + The 3D counterpart to [Rect2] is [AABB]. </description> <tutorials> <link title="Math tutorial index">https://docs.godotengine.org/en/latest/tutorials/math/index.html</link> diff --git a/doc/classes/Rect2i.xml b/doc/classes/Rect2i.xml index 2fdfe7c24b..809f385f6e 100644 --- a/doc/classes/Rect2i.xml +++ b/doc/classes/Rect2i.xml @@ -5,7 +5,7 @@ </brief_description> <description> [Rect2i] consists of a position, a size, and several utility functions. It is typically used for fast overlap tests. - It uses integer coordinates. + It uses integer coordinates. If you need floating-point coordinates, use [Rect2] instead. </description> <tutorials> <link title="Math tutorial index">https://docs.godotengine.org/en/latest/tutorials/math/index.html</link> diff --git a/doc/classes/RichTextLabel.xml b/doc/classes/RichTextLabel.xml index dc3c7c7dc0..050f1aab25 100644 --- a/doc/classes/RichTextLabel.xml +++ b/doc/classes/RichTextLabel.xml @@ -44,6 +44,7 @@ </argument> <description> Parses [code]bbcode[/code] and adds tags to the tag stack as needed. Returns the result of the parsing, [constant OK] if successful. + [b]Note:[/b] Using this method, you can't close a tag that was opened in a previous [method append_bbcode] call. This is done to improve performance, especially when updating large RichTextLabels since rebuilding the whole BBCode every time would be slower. If you absolutely need to close a tag in a future method call, append the [member bbcode_text] instead of using [method append_bbcode]. </description> </method> <method name="clear"> @@ -289,7 +290,7 @@ </member> <member name="bbcode_text" type="String" setter="set_bbcode" getter="get_bbcode" default=""""> The label's text in BBCode format. Is not representative of manual modifications to the internal tag stack. Erases changes made by other methods when edited. - [b]Note:[/b] It is unadvised to use [code]+=[/code] operator with [code]bbcode_text[/code] (e.g. [code]bbcode_text += "some string"[/code]) as it replaces the whole text and can cause slowdowns. Use [method append_bbcode] for adding text instead. + [b]Note:[/b] It is unadvised to use the [code]+=[/code] operator with [code]bbcode_text[/code] (e.g. [code]bbcode_text += "some string"[/code]) as it replaces the whole text and can cause slowdowns. Use [method append_bbcode] for adding text instead, unless you absolutely need to close a tag that was opened in an earlier method call. </member> <member name="custom_effects" type="Array" setter="set_effects" getter="get_effects" default="[ ]"> The currently installed custom effects. This is an array of [RichTextEffect]s. diff --git a/doc/classes/ScriptEditor.xml b/doc/classes/ScriptEditor.xml index 20b0750431..d5a32dd20c 100644 --- a/doc/classes/ScriptEditor.xml +++ b/doc/classes/ScriptEditor.xml @@ -86,6 +86,7 @@ <argument index="1" name="base_path" type="String"> </argument> <description> + Opens the script create dialog. The script will extend [code]base_name[/code]. The file extension can be omitted from [code]base_path[/code]. It will be added based on the selected scripting language. </description> </method> <method name="register_syntax_highlighter"> diff --git a/doc/translations/classes.pot b/doc/translations/classes.pot index 0e89e02762..7f352bce28 100644 --- a/doc/translations/classes.pot +++ b/doc/translations/classes.pot @@ -41208,7 +41208,7 @@ msgid "" "result, it should not be depended upon for reproducible random streams " "across Godot versions.\n" "To generate a random float number (within a given range) based on a time-" -"dependant seed:\n" +"dependent seed:\n" "[codeblock]\n" "var rng = RandomNumberGenerator.new()\n" "func _ready():\n" diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp index 6b98a344dc..96c338f86b 100644 --- a/drivers/unix/os_unix.cpp +++ b/drivers/unix/os_unix.cpp @@ -48,7 +48,7 @@ #include <mach/mach_time.h> #endif -#if defined(__FreeBSD__) || defined(__OpenBSD__) +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) #include <sys/param.h> #include <sys/sysctl.h> #endif @@ -477,7 +477,7 @@ String OS_Unix::get_executable_path() const { return OS::get_executable_path(); } return b; -#elif defined(__OpenBSD__) +#elif defined(__OpenBSD__) || defined(__NetBSD__) char resolved_path[MAXPATHLEN]; realpath(OS::get_executable_path().utf8().get_data(), resolved_path); diff --git a/drivers/unix/thread_posix.cpp b/drivers/unix/thread_posix.cpp index aa1b5019ca..285088342b 100644 --- a/drivers/unix/thread_posix.cpp +++ b/drivers/unix/thread_posix.cpp @@ -29,17 +29,17 @@ /*************************************************************************/ #include "thread_posix.h" -#include "core/script_language.h" #if (defined(UNIX_ENABLED) || defined(PTHREAD_ENABLED)) && !defined(NO_THREADS) +#include "core/os/memory.h" +#include "core/safe_refcount.h" +#include "core/script_language.h" + #ifdef PTHREAD_BSD_SET_NAME #include <pthread_np.h> #endif -#include "core/os/memory.h" -#include "core/safe_refcount.h" - static void _thread_id_key_destr_callback(void *p_value) { memdelete(static_cast<Thread::ID *>(p_value)); } @@ -126,6 +126,8 @@ Error ThreadPosix::set_name_func_posix(const String &p_name) { #ifdef PTHREAD_BSD_SET_NAME pthread_set_name_np(running_thread, p_name.utf8().get_data()); int err = 0; // Open/FreeBSD ignore errors in this function +#elif defined(PTHREAD_NETBSD_SET_NAME) + int err = pthread_setname_np(running_thread, "%s", const_cast<char *>(p_name.utf8().get_data())); #else int err = pthread_setname_np(running_thread, p_name.utf8().get_data()); #endif // PTHREAD_BSD_SET_NAME diff --git a/editor/editor_themes.cpp b/editor/editor_themes.cpp index 11b0228fd5..79525ced51 100644 --- a/editor/editor_themes.cpp +++ b/editor/editor_themes.cpp @@ -218,8 +218,15 @@ void editor_register_and_generate_icons(Ref<Theme> p_theme, bool p_dark_theme = // Generate icons. if (!p_only_thumbs) { for (int i = 0; i < editor_icons_count; i++) { + float icon_scale = EDSCALE; + + // Always keep the DefaultProjectIcon at the default size + if (strcmp(editor_icons_names[i], "DefaultProjectIcon") == 0) { + icon_scale = 1.0f; + } + const int is_exception = exceptions.has(editor_icons_names[i]); - const Ref<ImageTexture> icon = editor_generate_icon(i, !is_exception); + const Ref<ImageTexture> icon = editor_generate_icon(i, !is_exception, icon_scale); p_theme->set_icon(editor_icons_names[i], "EditorIcons", icon); } diff --git a/editor/plugins/visual_shader_editor_plugin.cpp b/editor/plugins/visual_shader_editor_plugin.cpp index e34f0855b2..7a70f4c5b6 100644 --- a/editor/plugins/visual_shader_editor_plugin.cpp +++ b/editor/plugins/visual_shader_editor_plugin.cpp @@ -82,9 +82,10 @@ void VisualShaderGraphPlugin::_bind_methods() { ClassDB::bind_method("set_node_position", &VisualShaderGraphPlugin::set_node_position); ClassDB::bind_method("set_node_size", &VisualShaderGraphPlugin::set_node_size); ClassDB::bind_method("show_port_preview", &VisualShaderGraphPlugin::show_port_preview); - ClassDB::bind_method("update_property_editor", &VisualShaderGraphPlugin::update_property_editor); - ClassDB::bind_method("update_property_editor_deferred", &VisualShaderGraphPlugin::update_property_editor_deferred); + ClassDB::bind_method("update_node", &VisualShaderGraphPlugin::update_node); + ClassDB::bind_method("update_node_deferred", &VisualShaderGraphPlugin::update_node_deferred); ClassDB::bind_method("set_input_port_default_value", &VisualShaderGraphPlugin::set_input_port_default_value); + ClassDB::bind_method("set_uniform_name", &VisualShaderGraphPlugin::set_uniform_name); } void VisualShaderGraphPlugin::register_shader(VisualShader *p_shader) { @@ -134,11 +135,11 @@ void VisualShaderGraphPlugin::show_port_preview(VisualShader::Type p_type, int p } } -void VisualShaderGraphPlugin::update_property_editor_deferred(VisualShader::Type p_type, int p_node_id) { - call_deferred("update_property_editor", p_type, p_node_id); +void VisualShaderGraphPlugin::update_node_deferred(VisualShader::Type p_type, int p_node_id) { + call_deferred("update_node", p_type, p_node_id); } -void VisualShaderGraphPlugin::update_property_editor(VisualShader::Type p_type, int p_node_id) { +void VisualShaderGraphPlugin::update_node(VisualShader::Type p_type, int p_node_id) { if (p_type != visual_shader->get_shader_type() || !links.has(p_node_id)) { return; } @@ -176,10 +177,26 @@ void VisualShaderGraphPlugin::set_input_port_default_value(VisualShader::Type p_ } } +void VisualShaderGraphPlugin::set_uniform_name(VisualShader::Type p_type, int p_node_id, const String &p_name) { + if (visual_shader->get_shader_type() == p_type && links.has(p_node_id) && links[p_node_id].uniform_name != nullptr) { + links[p_node_id].uniform_name->set_text(p_name); + } +} + void VisualShaderGraphPlugin::register_default_input_button(int p_node_id, int p_port_id, Button *p_button) { links[p_node_id].input_ports.insert(p_port_id, { p_button }); } +void VisualShaderGraphPlugin::update_uniform_refs() { + for (Map<int, Link>::Element *E = links.front(); E; E = E->next()) { + VisualShaderNodeUniformRef *ref = Object::cast_to<VisualShaderNodeUniformRef>(E->get().visual_node); + if (ref) { + remove_node(E->get().type, E->key()); + add_node(E->get().type, E->key()); + } + } +} + VisualShader::Type VisualShaderGraphPlugin::get_shader_type() const { return visual_shader->get_shader_type(); } @@ -213,13 +230,17 @@ void VisualShaderGraphPlugin::make_dirty(bool p_enabled) { } void VisualShaderGraphPlugin::register_link(VisualShader::Type p_type, int p_id, VisualShaderNode *p_visual_node, GraphNode *p_graph_node) { - links.insert(p_id, { p_type, p_visual_node, p_graph_node, p_visual_node->get_output_port_for_preview() != -1, -1, Map<int, InputPort>(), Map<int, Port>(), nullptr }); + links.insert(p_id, { p_type, p_visual_node, p_graph_node, p_visual_node->get_output_port_for_preview() != -1, -1, Map<int, InputPort>(), Map<int, Port>(), nullptr, nullptr }); } void VisualShaderGraphPlugin::register_output_port(int p_node_id, int p_port, TextureButton *p_button) { links[p_node_id].output_ports.insert(p_port, { p_button }); } +void VisualShaderGraphPlugin::register_uniform_name(int p_node_id, LineEdit *p_uniform_name) { + links[p_node_id].uniform_name = p_uniform_name; +} + void VisualShaderGraphPlugin::add_node(VisualShader::Type p_type, int p_id) { if (p_type != visual_shader->get_shader_type()) { return; @@ -280,43 +301,22 @@ void VisualShaderGraphPlugin::add_node(VisualShader::Type p_type, int p_id) { } Ref<VisualShaderNodeUniform> uniform = vsnode; - - if (uniform.is_valid()) { - VisualShaderEditor::get_singleton()->call_deferred("_update_uniforms"); - } - - Ref<VisualShaderNodeFloatUniform> float_uniform = vsnode; - Ref<VisualShaderNodeIntUniform> int_uniform = vsnode; - Ref<VisualShaderNodeVec3Uniform> vec3_uniform = vsnode; - Ref<VisualShaderNodeColorUniform> color_uniform = vsnode; - Ref<VisualShaderNodeBooleanUniform> bool_uniform = vsnode; - Ref<VisualShaderNodeTransformUniform> transform_uniform = vsnode; if (uniform.is_valid()) { VisualShaderEditor::get_singleton()->graph->add_child(node); VisualShaderEditor::get_singleton()->_update_created_node(node); LineEdit *uniform_name = memnew(LineEdit); + register_uniform_name(p_id, uniform_name); uniform_name->set_text(uniform->get_uniform_name()); node->add_child(uniform_name); - uniform_name->connect("text_entered", callable_mp(VisualShaderEditor::get_singleton(), &VisualShaderEditor::_line_edit_changed), varray(uniform_name, p_id)); - uniform_name->connect("focus_exited", callable_mp(VisualShaderEditor::get_singleton(), &VisualShaderEditor::_line_edit_focus_out), varray(uniform_name, p_id)); - - String error = vsnode->get_warning(visual_shader->get_mode(), p_type); - if (error != String()) { - offset = memnew(Control); - offset->set_custom_minimum_size(Size2(0, 4 * EDSCALE)); - node->add_child(offset); - Label *error_label = memnew(Label); - error_label->add_theme_color_override("font_color", VisualShaderEditor::get_singleton()->get_theme_color("error_color", "Editor")); - error_label->set_text(error); - node->add_child(error_label); - } + uniform_name->connect("text_entered", callable_mp(VisualShaderEditor::get_singleton(), &VisualShaderEditor::_uniform_line_edit_changed), varray(p_id)); + uniform_name->connect("focus_exited", callable_mp(VisualShaderEditor::get_singleton(), &VisualShaderEditor::_uniform_line_edit_focus_out), varray(uniform_name, p_id)); if (vsnode->get_input_port_count() == 0 && vsnode->get_output_port_count() == 1 && vsnode->get_output_port_name(0) == "") { //shortcut VisualShaderNode::PortType port_right = vsnode->get_output_port_type(0); node->set_slot(0, false, VisualShaderNode::PORT_TYPE_SCALAR, Color(), true, port_right, type_color[port_right]); - if (!float_uniform.is_valid() && !int_uniform.is_valid() && !vec3_uniform.is_valid() && !color_uniform.is_valid() && !bool_uniform.is_valid() && !transform_uniform.is_valid()) { + if (!vsnode->is_use_prop_slots()) { return; } } @@ -330,20 +330,19 @@ void VisualShaderGraphPlugin::add_node(VisualShader::Type p_type, int p_id) { vsnode->remove_meta("id"); vsnode->remove_meta("shader_type"); if (custom_editor) { + if (vsnode->is_show_prop_names()) { + custom_editor->call_deferred("_show_prop_names", true); + } break; } } - if (custom_editor && !float_uniform.is_valid() && !int_uniform.is_valid() && !vec3_uniform.is_valid() && !bool_uniform.is_valid() && !transform_uniform.is_valid() && vsnode->get_output_port_count() > 0 && vsnode->get_output_port_name(0) == "" && (vsnode->get_input_port_count() == 0 || vsnode->get_input_port_name(0) == "")) { + if (custom_editor && !vsnode->is_use_prop_slots() && vsnode->get_output_port_count() > 0 && vsnode->get_output_port_name(0) == "" && (vsnode->get_input_port_count() == 0 || vsnode->get_input_port_name(0) == "")) { //will be embedded in first port } else if (custom_editor) { port_offset++; node->add_child(custom_editor); - if (color_uniform.is_valid()) { - custom_editor->call_deferred("_show_prop_names", true); - } - if (float_uniform.is_valid() || int_uniform.is_valid() || vec3_uniform.is_valid() || bool_uniform.is_valid() || transform_uniform.is_valid()) { - custom_editor->call_deferred("_show_prop_names", true); + if (vsnode->is_use_prop_slots()) { return; } custom_editor = nullptr; @@ -590,19 +589,13 @@ void VisualShaderGraphPlugin::add_node(VisualShader::Type p_type, int p_id) { VisualShaderEditor::get_singleton()->graph->add_child(node); VisualShaderEditor::get_singleton()->_update_created_node(node); if (is_group) { - call_deferred("_set_node_size", (int)p_type, p_id, size); + VisualShaderEditor::get_singleton()->call_deferred("_set_node_size", (int)p_type, p_id, size); } } } void VisualShaderGraphPlugin::remove_node(VisualShader::Type p_type, int p_id) { if (visual_shader->get_shader_type() == p_type && links.has(p_id)) { - Ref<VisualShaderNodeUniform> uniform = Ref<VisualShaderNode>(links[p_id].visual_node); - - if (uniform.is_valid()) { - VisualShaderEditor::get_singleton()->call_deferred("_update_uniforms"); - } - links[p_id].graph_node->get_parent()->remove_child(links[p_id].graph_node); memdelete(links[p_id].graph_node); links.erase(p_id); @@ -1015,7 +1008,7 @@ void VisualShaderEditor::_update_created_node(GraphNode *node) { } } -void VisualShaderEditor::_update_uniforms() { +void VisualShaderEditor::_update_uniforms(bool p_update_refs) { VisualShaderNodeUniformRef::clear_uniforms(); for (int t = 0; t < VisualShader::TYPE_MAX; t++) { @@ -1052,6 +1045,30 @@ void VisualShaderEditor::_update_uniforms() { } } } + if (p_update_refs) { + graph_plugin->update_uniform_refs(); + } +} + +void VisualShaderEditor::_update_uniform_refs(Set<String> &p_deleted_names) { + for (int i = 0; i < VisualShader::TYPE_MAX; i++) { + VisualShader::Type type = VisualShader::Type(i); + + Vector<int> nodes = visual_shader->get_node_list(type); + for (int j = 0; j < nodes.size(); j++) { + if (j > 0) { + Ref<VisualShaderNodeUniformRef> ref = visual_shader->get_node(type, nodes[j]); + if (ref.is_valid()) { + if (p_deleted_names.has(ref->get_uniform_name())) { + undo_redo->add_do_method(ref.ptr(), "set_uniform_name", "[None]"); + undo_redo->add_undo_method(ref.ptr(), "set_uniform_name", ref->get_uniform_name()); + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", VisualShader::Type(i), nodes[j]); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", VisualShader::Type(i), nodes[j]); + } + } + } + } + } } void VisualShaderEditor::_update_graph() { @@ -1084,7 +1101,7 @@ void VisualShaderEditor::_update_graph() { Vector<int> nodes = visual_shader->get_node_list(type); - _update_uniforms(); + _update_uniforms(false); graph_plugin->clear_links(); graph_plugin->make_dirty(true); @@ -1125,10 +1142,8 @@ void VisualShaderEditor::_add_input_port(int p_node, int p_port, int p_port_type undo_redo->create_action(TTR("Add input port")); undo_redo->add_do_method(node.ptr(), "add_input_port", p_port, p_port_type, p_name); undo_redo->add_undo_method(node.ptr(), "remove_input_port", p_port); - undo_redo->add_do_method(this, "_update_graph"); - undo_redo->add_undo_method(this, "_update_graph"); - undo_redo->add_do_method(this, "_rebuild"); - undo_redo->add_undo_method(this, "_rebuild"); + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", type, p_node); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", type, p_node); undo_redo->commit_action(); } @@ -1142,10 +1157,8 @@ void VisualShaderEditor::_add_output_port(int p_node, int p_port, int p_port_typ undo_redo->create_action(TTR("Add output port")); undo_redo->add_do_method(node.ptr(), "add_output_port", p_port, p_port_type, p_name); undo_redo->add_undo_method(node.ptr(), "remove_output_port", p_port); - undo_redo->add_do_method(this, "_update_graph"); - undo_redo->add_undo_method(this, "_update_graph"); - undo_redo->add_do_method(this, "_rebuild"); - undo_redo->add_undo_method(this, "_rebuild"); + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", type, p_node); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", type, p_node); undo_redo->commit_action(); } @@ -1159,10 +1172,8 @@ void VisualShaderEditor::_change_input_port_type(int p_type, int p_node, int p_p undo_redo->create_action(TTR("Change input port type")); undo_redo->add_do_method(node.ptr(), "set_input_port_type", p_port, p_type); undo_redo->add_undo_method(node.ptr(), "set_input_port_type", p_port, node->get_input_port_type(p_port)); - undo_redo->add_do_method(this, "_update_graph"); - undo_redo->add_undo_method(this, "_update_graph"); - undo_redo->add_do_method(this, "_rebuild"); - undo_redo->add_undo_method(this, "_rebuild"); + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", type, p_node); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", type, p_node); undo_redo->commit_action(); } @@ -1176,10 +1187,8 @@ void VisualShaderEditor::_change_output_port_type(int p_type, int p_node, int p_ undo_redo->create_action(TTR("Change output port type")); undo_redo->add_do_method(node.ptr(), "set_output_port_type", p_port, p_type); undo_redo->add_undo_method(node.ptr(), "set_output_port_type", p_port, node->get_output_port_type(p_port)); - undo_redo->add_do_method(this, "_update_graph"); - undo_redo->add_undo_method(this, "_update_graph"); - undo_redo->add_do_method(this, "_rebuild"); - undo_redo->add_undo_method(this, "_rebuild"); + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", type, p_node); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", type, p_node); undo_redo->commit_action(); } @@ -1192,8 +1201,8 @@ void VisualShaderEditor::_change_input_port_name(const String &p_text, Object *l undo_redo->create_action(TTR("Change input port name")); undo_redo->add_do_method(node.ptr(), "set_input_port_name", p_port_id, p_text); undo_redo->add_undo_method(node.ptr(), "set_input_port_name", p_port_id, node->get_input_port_name(p_port_id)); - undo_redo->add_do_method(this, "_rebuild"); - undo_redo->add_undo_method(this, "_rebuild"); + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", type, p_node_id); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", type, p_node_id); undo_redo->commit_action(); } @@ -1206,8 +1215,8 @@ void VisualShaderEditor::_change_output_port_name(const String &p_text, Object * undo_redo->create_action(TTR("Change output port name")); undo_redo->add_do_method(node.ptr(), "set_output_port_name", p_port_id, p_text); undo_redo->add_undo_method(node.ptr(), "set_output_port_name", p_port_id, node->get_output_port_name(p_port_id)); - undo_redo->add_do_method(this, "_rebuild"); - undo_redo->add_undo_method(this, "_rebuild"); + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", type, p_node_id); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", type, p_node_id); undo_redo->commit_action(); } @@ -1232,12 +1241,21 @@ void VisualShaderEditor::_remove_input_port(int p_node, int p_port) { if (to_port == p_port) { undo_redo->add_do_method(visual_shader.ptr(), "disconnect_nodes", type, from_node, from_port, to_node, to_port); undo_redo->add_undo_method(visual_shader.ptr(), "connect_nodes_forced", type, from_node, from_port, to_node, to_port); + + undo_redo->add_do_method(graph_plugin.ptr(), "disconnect_nodes", type, from_node, from_port, to_node, to_port); + undo_redo->add_undo_method(graph_plugin.ptr(), "connect_nodes", type, from_node, from_port, to_node, to_port); } else if (to_port > p_port) { undo_redo->add_do_method(visual_shader.ptr(), "disconnect_nodes", type, from_node, from_port, to_node, to_port); undo_redo->add_undo_method(visual_shader.ptr(), "connect_nodes_forced", type, from_node, from_port, to_node, to_port); + undo_redo->add_do_method(graph_plugin.ptr(), "disconnect_nodes", type, from_node, from_port, to_node, to_port); + undo_redo->add_undo_method(graph_plugin.ptr(), "connect_nodes", type, from_node, from_port, to_node, to_port); + undo_redo->add_do_method(visual_shader.ptr(), "connect_nodes_forced", type, from_node, from_port, to_node, to_port - 1); undo_redo->add_undo_method(visual_shader.ptr(), "disconnect_nodes", type, from_node, from_port, to_node, to_port - 1); + + undo_redo->add_do_method(graph_plugin.ptr(), "connect_nodes", type, from_node, from_port, to_node, to_port - 1); + undo_redo->add_undo_method(graph_plugin.ptr(), "disconnect_nodes", type, from_node, from_port, to_node, to_port - 1); } } } @@ -1245,11 +1263,8 @@ void VisualShaderEditor::_remove_input_port(int p_node, int p_port) { undo_redo->add_do_method(node.ptr(), "remove_input_port", p_port); undo_redo->add_undo_method(node.ptr(), "add_input_port", p_port, (int)node->get_input_port_type(p_port), node->get_input_port_name(p_port)); - undo_redo->add_do_method(this, "_update_graph"); - undo_redo->add_undo_method(this, "_update_graph"); - - undo_redo->add_do_method(this, "_rebuild"); - undo_redo->add_undo_method(this, "_rebuild"); + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", type, p_node); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", type, p_node); undo_redo->commit_action(); } @@ -1275,12 +1290,21 @@ void VisualShaderEditor::_remove_output_port(int p_node, int p_port) { if (from_port == p_port) { undo_redo->add_do_method(visual_shader.ptr(), "disconnect_nodes", type, from_node, from_port, to_node, to_port); undo_redo->add_undo_method(visual_shader.ptr(), "connect_nodes_forced", type, from_node, from_port, to_node, to_port); + + undo_redo->add_do_method(graph_plugin.ptr(), "disconnect_nodes", type, from_node, from_port, to_node, to_port); + undo_redo->add_undo_method(graph_plugin.ptr(), "connect_nodes", type, from_node, from_port, to_node, to_port); } else if (from_port > p_port) { undo_redo->add_do_method(visual_shader.ptr(), "disconnect_nodes", type, from_node, from_port, to_node, to_port); undo_redo->add_undo_method(visual_shader.ptr(), "connect_nodes_forced", type, from_node, from_port, to_node, to_port); + undo_redo->add_do_method(graph_plugin.ptr(), "disconnect_nodes", type, from_node, from_port, to_node, to_port); + undo_redo->add_undo_method(graph_plugin.ptr(), "connect_nodes", type, from_node, from_port, to_node, to_port); + undo_redo->add_do_method(visual_shader.ptr(), "connect_nodes_forced", type, from_node, from_port - 1, to_node, to_port); undo_redo->add_undo_method(visual_shader.ptr(), "disconnect_nodes", type, from_node, from_port - 1, to_node, to_port); + + undo_redo->add_do_method(graph_plugin.ptr(), "connect_nodes", type, from_node, from_port - 1, to_node, to_port); + undo_redo->add_undo_method(graph_plugin.ptr(), "disconnect_nodes", type, from_node, from_port - 1, to_node, to_port); } } } @@ -1288,11 +1312,8 @@ void VisualShaderEditor::_remove_output_port(int p_node, int p_port) { undo_redo->add_do_method(node.ptr(), "remove_output_port", p_port); undo_redo->add_undo_method(node.ptr(), "add_output_port", p_port, (int)node->get_output_port_type(p_port), node->get_output_port_name(p_port)); - undo_redo->add_do_method(this, "_update_graph"); - undo_redo->add_undo_method(this, "_update_graph"); - - undo_redo->add_do_method(this, "_rebuild"); - undo_redo->add_undo_method(this, "_rebuild"); + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", type, p_node); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", type, p_node); undo_redo->commit_action(); } @@ -1313,18 +1334,9 @@ void VisualShaderEditor::_expression_focus_out(Object *code_edit, int p_node) { undo_redo->create_action(TTR("Set expression")); undo_redo->add_do_method(node.ptr(), "set_expression", expression_box->get_text()); undo_redo->add_undo_method(node.ptr(), "set_expression", node->get_expression()); - undo_redo->add_do_method(this, "_rebuild"); - undo_redo->add_undo_method(this, "_rebuild"); undo_redo->commit_action(); } -void VisualShaderEditor::_rebuild() { - if (visual_shader != nullptr) { - EditorNode::get_singleton()->get_log()->clear(); - visual_shader->rebuild(); - } -} - void VisualShaderEditor::_set_node_size(int p_type, int p_node, const Vector2 &p_size) { VisualShader::Type type = get_current_shader_type(); Ref<VisualShaderNode> node = visual_shader->get_node(type, p_node); @@ -1403,7 +1415,7 @@ void VisualShaderEditor::_preview_select_port(int p_node, int p_port) { undo_redo->commit_action(); } -void VisualShaderEditor::_line_edit_changed(const String &p_text, Object *line_edit, int p_node_id) { +void VisualShaderEditor::_uniform_line_edit_changed(const String &p_text, int p_node_id) { VisualShader::Type type = get_current_shader_type(); Ref<VisualShaderNodeUniform> node = visual_shader->get_node(type, p_node_id); @@ -1411,21 +1423,28 @@ void VisualShaderEditor::_line_edit_changed(const String &p_text, Object *line_e String validated_name = visual_shader->validate_uniform_name(p_text, node); - updating = true; + if (validated_name == node->get_uniform_name()) { + return; + } + undo_redo->create_action(TTR("Set Uniform Name")); undo_redo->add_do_method(node.ptr(), "set_uniform_name", validated_name); undo_redo->add_undo_method(node.ptr(), "set_uniform_name", node->get_uniform_name()); - undo_redo->add_do_method(this, "_update_graph"); - undo_redo->add_undo_method(this, "_update_graph"); - undo_redo->commit_action(); - updating = false; + undo_redo->add_do_method(graph_plugin.ptr(), "set_uniform_name", type, p_node_id, validated_name); + undo_redo->add_undo_method(graph_plugin.ptr(), "set_uniform_name", type, p_node_id, node->get_uniform_name()); + + undo_redo->add_do_method(this, "_update_uniforms", true); + undo_redo->add_undo_method(this, "_update_uniforms", true); - Object::cast_to<LineEdit>(line_edit)->set_text(validated_name); + Set<String> changed_names; + changed_names.insert(node->get_uniform_name()); + _update_uniform_refs(changed_names); + + undo_redo->commit_action(); } -void VisualShaderEditor::_line_edit_focus_out(Object *line_edit, int p_node_id) { - String text = Object::cast_to<LineEdit>(line_edit)->get_text(); - _line_edit_changed(text, line_edit, p_node_id); +void VisualShaderEditor::_uniform_line_edit_focus_out(Object *line_edit, int p_node_id) { + _uniform_line_edit_changed(Object::cast_to<LineEdit>(line_edit)->get_text(), p_node_id); } void VisualShaderEditor::_port_name_focus_out(Object *line_edit, int p_node_id, int p_port_id, bool p_output) { @@ -1704,6 +1723,12 @@ VisualShaderNode *VisualShaderEditor::_add_node(int p_idx, int p_op_idx) { } } + VisualShaderNodeUniform *uniform = Object::cast_to<VisualShaderNodeUniform>(vsnode.ptr()); + if (uniform) { + undo_redo->add_do_method(this, "_update_uniforms", true); + undo_redo->add_undo_method(this, "_update_uniforms", true); + } + undo_redo->commit_action(); return vsnode.ptr(); } @@ -1758,14 +1783,12 @@ void VisualShaderEditor::_disconnection_request(const String &p_from, int p_from int from = p_from.to_int(); int to = p_to.to_int(); - //updating = true; seems graph edit can handle this, no need to protect undo_redo->create_action(TTR("Nodes Disconnected")); undo_redo->add_do_method(visual_shader.ptr(), "disconnect_nodes", type, from, p_from_index, to, p_to_index); undo_redo->add_undo_method(visual_shader.ptr(), "connect_nodes", type, from, p_from_index, to, p_to_index); undo_redo->add_do_method(graph_plugin.ptr(), "disconnect_nodes", type, from, p_from_index, to, p_to_index); undo_redo->add_undo_method(graph_plugin.ptr(), "connect_nodes", type, from, p_from_index, to, p_to_index); undo_redo->commit_action(); - //updating = false; } void VisualShaderEditor::_connection_to_empty(const String &p_from, int p_from_slot, const Vector2 &p_release_position) { @@ -1823,6 +1846,18 @@ void VisualShaderEditor::_delete_request(int which) { } // delete a node from the graph undo_redo->add_do_method(graph_plugin.ptr(), "remove_node", type, which); + + VisualShaderNodeUniform *uniform = Object::cast_to<VisualShaderNodeUniform>(node.ptr()); + if (uniform) { + undo_redo->add_do_method(this, "_update_uniforms", true); + undo_redo->add_undo_method(this, "_update_uniforms", true); + + Set<String> uniform_names; + uniform_names.insert(uniform->get_uniform_name()); + + _update_uniform_refs(uniform_names); + } + undo_redo->commit_action(); } @@ -2222,6 +2257,8 @@ void VisualShaderEditor::_delete_nodes() { } } + Set<String> uniform_names; + for (List<int>::Element *F = to_erase.front(); F; F = F->next()) { Ref<VisualShaderNode> node = visual_shader->get_node(type, F->get()); @@ -2245,6 +2282,11 @@ void VisualShaderEditor::_delete_nodes() { if (expression) { undo_redo->add_undo_method(expression, "set_expression", expression->get_expression()); } + + VisualShaderNodeUniform *uniform = Object::cast_to<VisualShaderNodeUniform>(node.ptr()); + if (uniform) { + uniform_names.insert(uniform->get_uniform_name()); + } } List<VisualShader::Connection> used_conns; @@ -2272,6 +2314,14 @@ void VisualShaderEditor::_delete_nodes() { undo_redo->add_do_method(graph_plugin.ptr(), "remove_node", type, F->get()); } + // update uniform refs if any uniform has been deleted + if (uniform_names.size() > 0) { + undo_redo->add_do_method(this, "_update_uniforms", true); + undo_redo->add_undo_method(this, "_update_uniforms", true); + + _update_uniform_refs(uniform_names); + } + undo_redo->commit_action(); } @@ -2281,37 +2331,49 @@ void VisualShaderEditor::_mode_selected(int p_id) { _update_graph(); } -void VisualShaderEditor::_input_select_item(Ref<VisualShaderNodeInput> input, String name) { - String prev_name = input->get_input_name(); +void VisualShaderEditor::_input_select_item(Ref<VisualShaderNodeInput> p_input, String p_name) { + String prev_name = p_input->get_input_name(); - if (name == prev_name) { + if (p_name == prev_name) { return; } - bool type_changed = input->get_input_type_by_name(name) != input->get_input_type_by_name(prev_name); + bool type_changed = p_input->get_input_type_by_name(p_name) != p_input->get_input_type_by_name(prev_name); UndoRedo *undo_redo = EditorNode::get_singleton()->get_undo_redo(); undo_redo->create_action(TTR("Visual Shader Input Type Changed")); - undo_redo->add_do_method(input.ptr(), "set_input_name", name); - undo_redo->add_undo_method(input.ptr(), "set_input_name", prev_name); - - if (type_changed) { - //restore connections if type changed - VisualShader::Type type = get_current_shader_type(); - int id = visual_shader->find_node_id(type, input); - List<VisualShader::Connection> conns; - visual_shader->get_node_connections(type, &conns); - for (List<VisualShader::Connection>::Element *E = conns.front(); E; E = E->next()) { - if (E->get().from_node == id) { - undo_redo->add_undo_method(visual_shader.ptr(), "connect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + undo_redo->add_do_method(p_input.ptr(), "set_input_name", p_name); + undo_redo->add_undo_method(p_input.ptr(), "set_input_name", prev_name); + + // update output port + for (int type_id = 0; type_id < VisualShader::TYPE_MAX; type_id++) { + VisualShader::Type type = VisualShader::Type(type_id); + int id = visual_shader->find_node_id(type, p_input); + if (id != VisualShader::NODE_ID_INVALID) { + if (type_changed) { + List<VisualShader::Connection> conns; + visual_shader->get_node_connections(type, &conns); + for (List<VisualShader::Connection>::Element *E = conns.front(); E; E = E->next()) { + if (E->get().from_node == id) { + if (visual_shader->is_port_types_compatible(p_input->get_input_type_by_name(p_name), visual_shader->get_node(type, E->get().to_node)->get_input_port_type(E->get().to_port))) { + undo_redo->add_do_method(visual_shader.ptr(), "connect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + undo_redo->add_undo_method(visual_shader.ptr(), "connect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + continue; + } + undo_redo->add_do_method(visual_shader.ptr(), "disconnect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + undo_redo->add_undo_method(visual_shader.ptr(), "connect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + undo_redo->add_do_method(graph_plugin.ptr(), "disconnect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + undo_redo->add_undo_method(graph_plugin.ptr(), "connect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + } + } } + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", type_id, id); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", type_id, id); + break; } } - undo_redo->add_do_method(VisualShaderEditor::get_singleton(), "_update_graph"); - undo_redo->add_undo_method(VisualShaderEditor::get_singleton(), "_update_graph"); - undo_redo->commit_action(); } @@ -2330,23 +2392,32 @@ void VisualShaderEditor::_uniform_select_item(Ref<VisualShaderNodeUniformRef> p_ undo_redo->add_do_method(p_uniform_ref.ptr(), "set_uniform_name", p_name); undo_redo->add_undo_method(p_uniform_ref.ptr(), "set_uniform_name", prev_name); - if (type_changed) { - //restore connections if type changed - VisualShader::Type type = get_current_shader_type(); + // update output port + for (int type_id = 0; type_id < VisualShader::TYPE_MAX; type_id++) { + VisualShader::Type type = VisualShader::Type(type_id); int id = visual_shader->find_node_id(type, p_uniform_ref); - List<VisualShader::Connection> conns; - visual_shader->get_node_connections(type, &conns); - for (List<VisualShader::Connection>::Element *E = conns.front(); E; E = E->next()) { - if (E->get().from_node == id) { - undo_redo->add_do_method(visual_shader.ptr(), "disconnect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); - undo_redo->add_undo_method(visual_shader.ptr(), "connect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + if (id != VisualShader::NODE_ID_INVALID) { + if (type_changed) { + List<VisualShader::Connection> conns; + visual_shader->get_node_connections(type, &conns); + for (List<VisualShader::Connection>::Element *E = conns.front(); E; E = E->next()) { + if (E->get().from_node == id) { + if (visual_shader->is_port_types_compatible(p_uniform_ref->get_uniform_type_by_name(p_name), visual_shader->get_node(type, E->get().to_node)->get_input_port_type(E->get().to_port))) { + continue; + } + undo_redo->add_do_method(visual_shader.ptr(), "disconnect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + undo_redo->add_undo_method(visual_shader.ptr(), "connect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + undo_redo->add_do_method(graph_plugin.ptr(), "disconnect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + undo_redo->add_undo_method(graph_plugin.ptr(), "connect_nodes", type, E->get().from_node, E->get().from_port, E->get().to_node, E->get().to_port); + } + } } + undo_redo->add_do_method(graph_plugin.ptr(), "update_node", type_id, id); + undo_redo->add_undo_method(graph_plugin.ptr(), "update_node", type_id, id); + break; } } - undo_redo->add_do_method(VisualShaderEditor::get_singleton(), "_update_graph"); - undo_redo->add_undo_method(VisualShaderEditor::get_singleton(), "_update_graph"); - undo_redo->commit_action(); } @@ -2569,7 +2640,6 @@ void VisualShaderEditor::_update_preview() { } void VisualShaderEditor::_bind_methods() { - ClassDB::bind_method("_rebuild", &VisualShaderEditor::_rebuild); ClassDB::bind_method("_update_graph", &VisualShaderEditor::_update_graph); ClassDB::bind_method("_update_options_menu", &VisualShaderEditor::_update_options_menu); ClassDB::bind_method("_add_node", &VisualShaderEditor::_add_node); @@ -3434,8 +3504,8 @@ public: } } if (p_property != "constant") { - undo_redo->add_do_method(VisualShaderEditor::get_singleton()->get_graph_plugin(), "update_property_editor_deferred", shader_type, node_id); - undo_redo->add_undo_method(VisualShaderEditor::get_singleton()->get_graph_plugin(), "update_property_editor_deferred", shader_type, node_id); + undo_redo->add_do_method(VisualShaderEditor::get_singleton()->get_graph_plugin(), "update_node_deferred", shader_type, node_id); + undo_redo->add_undo_method(VisualShaderEditor::get_singleton()->get_graph_plugin(), "update_node_deferred", shader_type, node_id); } undo_redo->commit_action(); diff --git a/editor/plugins/visual_shader_editor_plugin.h b/editor/plugins/visual_shader_editor_plugin.h index 056d4c6a11..531a117156 100644 --- a/editor/plugins/visual_shader_editor_plugin.h +++ b/editor/plugins/visual_shader_editor_plugin.h @@ -71,6 +71,7 @@ private: Map<int, InputPort> input_ports; Map<int, Port> output_ports; VBoxContainer *preview_box; + LineEdit *uniform_name; }; Ref<VisualShader> visual_shader; @@ -86,11 +87,14 @@ public: void set_connections(List<VisualShader::Connection> &p_connections); void register_link(VisualShader::Type p_type, int p_id, VisualShaderNode *p_visual_node, GraphNode *p_graph_node); void register_output_port(int p_id, int p_port, TextureButton *p_button); + void register_uniform_name(int p_id, LineEdit *p_uniform_name); void clear_links(); void set_shader_type(VisualShader::Type p_type); bool is_preview_visible(int p_id) const; bool is_dirty() const; void make_dirty(bool p_enabled); + void update_node(VisualShader::Type p_type, int p_id); + void update_node_deferred(VisualShader::Type p_type, int p_node_id); void add_node(VisualShader::Type p_type, int p_id); void remove_node(VisualShader::Type p_type, int p_id); void connect_nodes(VisualShader::Type p_type, int p_from_node, int p_from_port, int p_to_node, int p_to_port); @@ -99,10 +103,10 @@ public: void set_node_position(VisualShader::Type p_type, int p_id, const Vector2 &p_position); void set_node_size(VisualShader::Type p_type, int p_id, const Vector2 &p_size); void refresh_node_ports(VisualShader::Type p_type, int p_node); - void update_property_editor(VisualShader::Type p_type, int p_node_id); - void update_property_editor_deferred(VisualShader::Type p_type, int p_node_id); void set_input_port_default_value(VisualShader::Type p_type, int p_node_id, int p_port_id, Variant p_value); void register_default_input_button(int p_node_id, int p_port_id, Button *p_button); + void update_uniform_refs(); + void set_uniform_name(VisualShader::Type p_type, int p_node_id, const String &p_name); VisualShader::Type get_shader_type() const; VisualShaderGraphPlugin(); @@ -241,6 +245,8 @@ class VisualShaderEditor : public VBoxContainer { int custom_node_option_idx; List<String> keyword_list; + List<VisualShaderNodeUniformRef> uniform_refs; + void _draw_color_over_button(Object *obj, Color p_color); void _add_custom_node(const String &p_path); @@ -282,8 +288,8 @@ class VisualShaderEditor : public VBoxContainer { void _connection_to_empty(const String &p_from, int p_from_slot, const Vector2 &p_release_position); void _connection_from_empty(const String &p_to, int p_to_slot, const Vector2 &p_release_position); - void _line_edit_changed(const String &p_text, Object *line_edit, int p_node_id); - void _line_edit_focus_out(Object *line_edit, int p_node_id); + void _uniform_line_edit_changed(const String &p_text, int p_node_id); + void _uniform_line_edit_focus_out(Object *line_edit, int p_node_id); void _port_name_focus_out(Object *line_edit, int p_node_id, int p_port_id, bool p_output); @@ -306,7 +312,6 @@ class VisualShaderEditor : public VBoxContainer { Ref<VisualShaderGraphPlugin> graph_plugin; void _mode_selected(int p_id); - void _rebuild(); void _input_select_item(Ref<VisualShaderNodeInput> input, String name); void _uniform_select_item(Ref<VisualShaderNodeUniformRef> p_uniform, String p_name); @@ -347,7 +352,8 @@ class VisualShaderEditor : public VBoxContainer { bool _is_available(int p_mode); void _update_created_node(GraphNode *node); - void _update_uniforms(); + void _update_uniforms(bool p_update_refs); + void _update_uniform_refs(Set<String> &p_names); protected: void _notification(int p_what); diff --git a/methods.py b/methods.py index 8f14de6dac..555d9fa594 100644 --- a/methods.py +++ b/methods.py @@ -534,13 +534,28 @@ def generate_vs_project(env, num_jobs): common_build_prefix = [ 'cmd /V /C set "plat=$(PlatformTarget)"', '(if "$(PlatformTarget)"=="x64" (set "plat=x86_amd64"))', - 'set "tools=yes"', + 'set "tools=%s"' % env["tools"], '(if "$(Configuration)"=="release" (set "tools=no"))', - 'set "custom_modules=%s"' % env["custom_modules"], 'call "' + batch_file + '" !plat!', ] - result = " ^& ".join(common_build_prefix + [commands]) + # windows allows us to have spaces in paths, so we need + # to double quote off the directory. However, the path ends + # in a backslash, so we need to remove this, lest it escape the + # last double quote off, confusing MSBuild + common_build_postfix = [ + "--directory=\"$(ProjectDir.TrimEnd('\\'))\"", + "platform=windows", + "target=$(Configuration)", + "progress=no", + "tools=!tools!", + "-j%s" % num_jobs, + ] + + if env["custom_modules"]: + common_build_postfix.append("custom_modules=%s" % env["custom_modules"]) + + result = " ^& ".join(common_build_prefix + [" ".join([commands] + common_build_postfix)]) return result env.AddToVSProject(env.core_sources) @@ -550,22 +565,9 @@ def generate_vs_project(env, num_jobs): env.AddToVSProject(env.servers_sources) env.AddToVSProject(env.editor_sources) - # windows allows us to have spaces in paths, so we need - # to double quote off the directory. However, the path ends - # in a backslash, so we need to remove this, lest it escape the - # last double quote off, confusing MSBuild - env["MSVSBUILDCOM"] = build_commandline( - "scons --directory=\"$(ProjectDir.TrimEnd('\\'))\" platform=windows progress=no target=$(Configuration)" - " tools=!tools! custom_modules=!custom_modules! -j" + str(num_jobs) - ) - env["MSVSREBUILDCOM"] = build_commandline( - "scons --directory=\"$(ProjectDir.TrimEnd('\\'))\" platform=windows progress=no target=$(Configuration)" - " tools=!tools! custom_modules=!custom_modules! vsproj=yes -j" + str(num_jobs) - ) - env["MSVSCLEANCOM"] = build_commandline( - "scons --directory=\"$(ProjectDir.TrimEnd('\\'))\" --clean platform=windows progress=no" - " target=$(Configuration) tools=!tools! custom_modules=!custom_modules! -j" + str(num_jobs) - ) + env["MSVSBUILDCOM"] = build_commandline("scons") + env["MSVSREBUILDCOM"] = build_commandline("scons vsproj=yes") + env["MSVSCLEANCOM"] = build_commandline("scons --clean") # This version information (Win32, x64, Debug, Release, Release_Debug seems to be # required for Visual Studio to understand that it needs to generate an NMAKE diff --git a/modules/bullet/bullet_types_converter.cpp b/modules/bullet/bullet_types_converter.cpp index c9493d8892..7ecad9b78a 100644 --- a/modules/bullet/bullet_types_converter.cpp +++ b/modules/bullet/bullet_types_converter.cpp @@ -95,12 +95,61 @@ void G_TO_B(Transform const &inVal, btTransform &outVal) { } void UNSCALE_BT_BASIS(btTransform &scaledBasis) { - btMatrix3x3 &m(scaledBasis.getBasis()); - btVector3 column0(m[0][0], m[1][0], m[2][0]); - btVector3 column1(m[0][1], m[1][1], m[2][1]); - btVector3 column2(m[0][2], m[1][2], m[2][2]); + btMatrix3x3 &basis(scaledBasis.getBasis()); + btVector3 column0 = basis.getColumn(0); + btVector3 column1 = basis.getColumn(1); + btVector3 column2 = basis.getColumn(2); + + // Check for zero scaling. + if (btFuzzyZero(column0[0])) { + if (btFuzzyZero(column1[1])) { + if (btFuzzyZero(column2[2])) { + // All dimensions are fuzzy zero. Create a default basis. + column0 = btVector3(1, 0, 0); + column1 = btVector3(0, 1, 0); + column2 = btVector3(0, 0, 1); + } else { // Column 2 scale not fuzzy zero. + // Create two vectors orthogonal to row 2. + // Ensure that a default basis is created if row 2 = <0, 0, 1> + column1 = btVector3(0, column2[2], -column2[1]); + column0 = column1.cross(column2); + } + } else { // Column 1 scale not fuzzy zero. + if (btFuzzyZero(column2[2])) { + // Create two vectors othogonal to column 1. + // Ensure that a default basis is created if column 1 = <0, 1, 0> + column0 = btVector3(column1[1], -column1[0], 0); + column2 = column0.cross(column1); + } else { // Column 1 and column 2 scales not fuzzy zero. + // Create column 0 orthogonal to column 1 and column 2. + column0 = column1.cross(column2); + } + } + } else { // Column 0 scale not fuzzy zero. + if (btFuzzyZero(column1[1])) { + if (btFuzzyZero(column2[2])) { + // Create two vectors orthogonal to column 0. + // Ensure that a default basis is created if column 0 = <1, 0, 0> + column2 = btVector3(-column0[2], 0, column0[0]); + column1 = column2.cross(column0); + } else { // Column 0 and column 2 scales not fuzzy zero. + // Create column 1 orthogonal to column 0 and column 2. + column1 = column2.cross(column0); + } + } else { // Column 0 and column 1 scales not fuzzy zero. + if (btFuzzyZero(column2[2])) { + // Create column 2 orthogonal to column 0 and column 1. + column2 = column0.cross(column1); + } + } + } + + // Normalize column0.normalize(); column1.normalize(); column2.normalize(); - m.setValue(column0[0], column1[0], column2[0], column0[1], column1[1], column2[1], column0[2], column1[2], column2[2]); + + basis.setValue(column0[0], column1[0], column2[0], + column0[1], column1[1], column2[1], + column0[2], column1[2], column2[2]); } diff --git a/modules/gdnative/gdnative_library_editor_plugin.cpp b/modules/gdnative/gdnative_library_editor_plugin.cpp index fdd755845f..f0f095ddf5 100644 --- a/modules/gdnative/gdnative_library_editor_plugin.cpp +++ b/modules/gdnative/gdnative_library_editor_plugin.cpp @@ -318,6 +318,7 @@ GDNativeLibraryEditor::GDNativeLibraryEditor() { platform_ios.name = "iOS"; platform_ios.entries.push_back("armv7"); platform_ios.entries.push_back("arm64"); + platform_ios.entries.push_back("x86_64"); // iOS can use both Static and Dynamic libraries. // Frameworks is actually a folder with files. platform_ios.library_extension = "*.framework; Framework, *.xcframework; Binary Framework, *.a; Static Library, *.dylib; Dynamic Library"; diff --git a/modules/gdscript/gdscript.cpp b/modules/gdscript/gdscript.cpp index 0263e32c5b..7f303a966d 100644 --- a/modules/gdscript/gdscript.cpp +++ b/modules/gdscript/gdscript.cpp @@ -1053,7 +1053,9 @@ GDScript::~GDScript() { memdelete(E->get()); } - GDScriptCache::remove_script(get_path()); + if (GDScriptCache::singleton) { // Cache may have been already destroyed at engine shutdown. + GDScriptCache::remove_script(get_path()); + } _save_orphaned_subclasses(); @@ -2035,7 +2037,23 @@ GDScriptLanguage::~GDScriptLanguage() { if (_call_stack) { memdelete_arr(_call_stack); } - singleton = nullptr; + + // Clear dependencies between scripts, to ensure cyclic references are broken (to avoid leaks at exit). + while (script_list.first()) { + GDScript *script = script_list.first()->self(); + for (Map<StringName, GDScriptFunction *>::Element *E = script->member_functions.front(); E; E = E->next()) { + GDScriptFunction *func = E->get(); + for (int i = 0; i < func->argument_types.size(); i++) { + func->argument_types.write[i].script_type_ref = Ref<Script>(); + } + func->return_type.script_type_ref = Ref<Script>(); + } + for (Map<StringName, GDScript::MemberInfo>::Element *E = script->member_indices.front(); E; E = E->next()) { + E->get().data_type.script_type_ref = Ref<Script>(); + } + } + + singleton = NULL; } void GDScriptLanguage::add_orphan_subclass(const String &p_qualified_name, const ObjectID &p_subclass) { diff --git a/modules/gdscript/gdscript_analyzer.cpp b/modules/gdscript/gdscript_analyzer.cpp index b4ede55f0a..943a49060f 100644 --- a/modules/gdscript/gdscript_analyzer.cpp +++ b/modules/gdscript/gdscript_analyzer.cpp @@ -1286,7 +1286,7 @@ void GDScriptAnalyzer::resolve_parameter(GDScriptParser::ParameterNode *p_parame if (p_parameter->default_value != nullptr) { if (!is_type_compatible(result, p_parameter->default_value->get_datatype())) { - push_error(vformat(R"(Type of default value for parameter "%s" (%s) is not compatible with paremeter type (%s).)", p_parameter->identifier->name, p_parameter->default_value->get_datatype().to_string(), p_parameter->datatype_specifier->get_datatype().to_string()), p_parameter->default_value); + push_error(vformat(R"(Type of default value for parameter "%s" (%s) is not compatible with parameter type (%s).)", p_parameter->identifier->name, p_parameter->default_value->get_datatype().to_string(), p_parameter->datatype_specifier->get_datatype().to_string()), p_parameter->default_value); } else if (p_parameter->default_value->get_datatype().is_variant()) { mark_node_unsafe(p_parameter); } diff --git a/modules/gdscript/gdscript_compiler.cpp b/modules/gdscript/gdscript_compiler.cpp index c3d651ee79..bad450c9f9 100644 --- a/modules/gdscript/gdscript_compiler.cpp +++ b/modules/gdscript/gdscript_compiler.cpp @@ -76,7 +76,7 @@ void GDScriptCompiler::_set_error(const String &p_error, const GDScriptParser::N } } -GDScriptDataType GDScriptCompiler::_gdtype_from_datatype(const GDScriptParser::DataType &p_datatype) const { +GDScriptDataType GDScriptCompiler::_gdtype_from_datatype(const GDScriptParser::DataType &p_datatype, GDScript *p_owner) const { if (!p_datatype.is_set() || !p_datatype.is_hard_type()) { return GDScriptDataType(); } @@ -98,7 +98,7 @@ GDScriptDataType GDScriptCompiler::_gdtype_from_datatype(const GDScriptParser::D } break; case GDScriptParser::DataType::SCRIPT: { result.kind = GDScriptDataType::SCRIPT; - result.script_type = p_datatype.script_type; + result.script_type = Ref<Script>(p_datatype.script_type).ptr(); result.native_type = result.script_type->get_instance_base_type(); } break; case GDScriptParser::DataType::CLASS: { @@ -124,11 +124,11 @@ GDScriptDataType GDScriptCompiler::_gdtype_from_datatype(const GDScriptParser::D names.pop_back(); } result.kind = GDScriptDataType::GDSCRIPT; - result.script_type = script; + result.script_type = script.ptr(); result.native_type = script->get_instance_base_type(); } else { result.kind = GDScriptDataType::GDSCRIPT; - result.script_type = GDScriptCache::get_shallow_script(p_datatype.script_path, main_script->path); + result.script_type = GDScriptCache::get_shallow_script(p_datatype.script_path, main_script->path).ptr(); result.native_type = p_datatype.native_type; } } @@ -149,6 +149,12 @@ GDScriptDataType GDScriptCompiler::_gdtype_from_datatype(const GDScriptParser::D } } + // Only hold strong reference to the script if it's not the owner of the + // element qualified with this type, to avoid cyclic references (leaks). + if (result.script_type && result.script_type != p_owner) { + result.script_type_ref = Ref<Script>(result.script_type); + } + return result; } @@ -1668,7 +1674,7 @@ Error GDScriptCompiler::_parse_function(GDScript *p_script, const GDScriptParser func_name = p_func->identifier->name; is_static = p_func->is_static; rpc_mode = p_func->rpc_mode; - return_type = _gdtype_from_datatype(p_func->get_datatype()); + return_type = _gdtype_from_datatype(p_func->get_datatype(), p_script); } else { if (p_for_ready) { func_name = "_ready"; @@ -1685,7 +1691,7 @@ Error GDScriptCompiler::_parse_function(GDScript *p_script, const GDScriptParser if (p_func) { for (int i = 0; i < p_func->parameters.size(); i++) { const GDScriptParser::ParameterNode *parameter = p_func->parameters[i]; - GDScriptDataType par_type = _gdtype_from_datatype(parameter->get_datatype()); + GDScriptDataType par_type = _gdtype_from_datatype(parameter->get_datatype(), p_script); uint32_t par_addr = codegen.generator->add_parameter(parameter->identifier->name, parameter->default_value != nullptr, par_type); codegen.parameters[parameter->identifier->name] = GDScriptCodeGenerator::Address(GDScriptCodeGenerator::Address::FUNCTION_PARAMETER, par_addr, par_type); @@ -1830,7 +1836,7 @@ Error GDScriptCompiler::_parse_setter_getter(GDScript *p_script, const GDScriptP return_type.kind = GDScriptDataType::BUILTIN; return_type.builtin_type = Variant::NIL; } else { - return_type = _gdtype_from_datatype(p_variable->get_datatype()); + return_type = _gdtype_from_datatype(p_variable->get_datatype(), p_script); } codegen.generator->write_start(p_script, func_name, false, p_variable->rpc_mode, return_type); @@ -1927,7 +1933,7 @@ Error GDScriptCompiler::_parse_class_level(GDScript *p_script, const GDScriptPar p_script->native = native; } break; case GDScriptDataType::GDSCRIPT: { - Ref<GDScript> base = base_type.script_type; + Ref<GDScript> base = Ref<GDScript>(base_type.script_type); p_script->base = base; p_script->_base = base.ptr(); @@ -1994,7 +2000,7 @@ Error GDScriptCompiler::_parse_class_level(GDScript *p_script, const GDScriptPar break; } minfo.rpc_mode = variable->rpc_mode; - minfo.data_type = _gdtype_from_datatype(variable->get_datatype()); + minfo.data_type = _gdtype_from_datatype(variable->get_datatype(), p_script); PropertyInfo prop_info = minfo.data_type; prop_info.name = name; diff --git a/modules/gdscript/gdscript_compiler.h b/modules/gdscript/gdscript_compiler.h index 80fba6a934..db02079d26 100644 --- a/modules/gdscript/gdscript_compiler.h +++ b/modules/gdscript/gdscript_compiler.h @@ -84,7 +84,7 @@ class GDScriptCompiler { Ref<Script> script = obj->get_script(); if (script.is_valid()) { - type.script_type = script; + type.script_type = script.ptr(); Ref<GDScript> gdscript = script; if (gdscript.is_valid()) { type.kind = GDScriptDataType::GDSCRIPT; @@ -125,7 +125,7 @@ class GDScriptCompiler { Error _create_binary_operator(CodeGen &codegen, const GDScriptParser::BinaryOpNode *on, Variant::Operator op, bool p_initializer = false, const GDScriptCodeGenerator::Address &p_index_addr = GDScriptCodeGenerator::Address()); Error _create_binary_operator(CodeGen &codegen, const GDScriptParser::ExpressionNode *p_left_operand, const GDScriptParser::ExpressionNode *p_right_operand, Variant::Operator op, bool p_initializer = false, const GDScriptCodeGenerator::Address &p_index_addr = GDScriptCodeGenerator::Address()); - GDScriptDataType _gdtype_from_datatype(const GDScriptParser::DataType &p_datatype) const; + GDScriptDataType _gdtype_from_datatype(const GDScriptParser::DataType &p_datatype, GDScript *p_owner = nullptr) const; GDScriptCodeGenerator::Address _parse_assign_right_expression(CodeGen &codegen, Error &r_error, const GDScriptParser::AssignmentNode *p_assignmentint, const GDScriptCodeGenerator::Address &p_index_addr = GDScriptCodeGenerator::Address()); GDScriptCodeGenerator::Address _parse_expression(CodeGen &codegen, Error &r_error, const GDScriptParser::ExpressionNode *p_expression, bool p_root = false, bool p_initializer = false, const GDScriptCodeGenerator::Address &p_index_addr = GDScriptCodeGenerator::Address()); diff --git a/modules/gdscript/gdscript_function.h b/modules/gdscript/gdscript_function.h index d1c98a5456..c98ac09310 100644 --- a/modules/gdscript/gdscript_function.h +++ b/modules/gdscript/gdscript_function.h @@ -56,7 +56,8 @@ struct GDScriptDataType { bool has_type = false; Variant::Type builtin_type = Variant::NIL; StringName native_type; - Ref<Script> script_type; + Script *script_type = nullptr; + Ref<Script> script_type_ref; bool is_type(const Variant &p_variant, bool p_allow_implicit_conversion = false) const { if (!has_type) { diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp index e8dea8180a..2a69db130b 100644 --- a/modules/gdscript/gdscript_parser.cpp +++ b/modules/gdscript/gdscript_parser.cpp @@ -1042,7 +1042,7 @@ GDScriptParser::EnumNode *GDScriptParser::parse_enum() { if (check(GDScriptTokenizer::Token::BRACE_CLOSE)) { break; // Allow trailing comma. } - if (consume(GDScriptTokenizer::Token::IDENTIFIER, R"(Expected identifer for enum key.)")) { + if (consume(GDScriptTokenizer::Token::IDENTIFIER, R"(Expected identifier for enum key.)")) { EnumNode::Value item; item.identifier = parse_identifier(); item.parent_enum = enum_node; @@ -2516,7 +2516,7 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_call(ExpressionNode *p_pre GDScriptParser::ExpressionNode *GDScriptParser::parse_get_node(ExpressionNode *p_previous_operand, bool p_can_assign) { if (match(GDScriptTokenizer::Token::LITERAL)) { if (previous.literal.get_type() != Variant::STRING) { - push_error(R"(Expect node path as string or identifer after "$".)"); + push_error(R"(Expect node path as string or identifier after "$".)"); return nullptr; } GetNodeNode *get_node = alloc_node<GetNodeNode>(); @@ -2539,7 +2539,7 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_get_node(ExpressionNode *p } while (match(GDScriptTokenizer::Token::SLASH)); return get_node; } else { - push_error(R"(Expect node path as string or identifer after "$".)"); + push_error(R"(Expect node path as string or identifier after "$".)"); return nullptr; } } diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index ed27604aec..9a40aa50ac 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -578,7 +578,7 @@ GDScriptTokenizer::Token GDScriptTokenizer::potential_identifier() { } void GDScriptTokenizer::newline(bool p_make_token) { - // Don't overwrite previous newline, nor create if we want a line contination. + // Don't overwrite previous newline, nor create if we want a line continuation. if (p_make_token && !pending_newline && !line_continuation) { Token newline(Token::NEWLINE); newline.start_line = line; diff --git a/modules/mono/editor/Godot.NET.Sdk/Godot.NET.Sdk/Sdk/Sdk.props b/modules/mono/editor/Godot.NET.Sdk/Godot.NET.Sdk/Sdk/Sdk.props index dfc59e6ccb..4f8faffde2 100644 --- a/modules/mono/editor/Godot.NET.Sdk/Godot.NET.Sdk/Sdk/Sdk.props +++ b/modules/mono/editor/Godot.NET.Sdk/Godot.NET.Sdk/Sdk/Sdk.props @@ -88,7 +88,7 @@ <PropertyGroup> <!-- ExportDebug also defines DEBUG like Debug does. --> <DefineConstants Condition=" '$(Configuration)' == 'ExportDebug' ">$(DefineConstants);DEBUG</DefineConstants> - <!-- Debug defines TOOLS to differenciate between Debug and ExportDebug configurations. --> + <!-- Debug defines TOOLS to differentiate between Debug and ExportDebug configurations. --> <DefineConstants Condition=" '$(Configuration)' == 'Debug' ">$(DefineConstants);TOOLS</DefineConstants> <DefineConstants>$(GodotDefineConstants);$(DefineConstants)</DefineConstants> diff --git a/modules/mono/editor/GodotTools/GodotTools/Export/AotBuilder.cs b/modules/mono/editor/GodotTools/GodotTools/Export/AotBuilder.cs index f60e469503..42ede3f3f3 100755 --- a/modules/mono/editor/GodotTools/GodotTools/Export/AotBuilder.cs +++ b/modules/mono/editor/GodotTools/GodotTools/Export/AotBuilder.cs @@ -328,7 +328,7 @@ MONO_AOT_MODE_LAST = 1000, if (lipoExitCode != 0) throw new Exception($"Command 'lipo' exited with code: {lipoExitCode}"); - // TODO: Add the AOT lib and interpreter libs as device only to supress warnings when targeting the simulator + // TODO: Add the AOT lib and interpreter libs as device only to suppress warnings when targeting the simulator // Add the fat AOT static library to the Xcode project exporter.AddIosProjectStaticLib(fatOutputFilePath); diff --git a/modules/visual_script/visual_script_editor.cpp b/modules/visual_script/visual_script_editor.cpp index 7c3af016b9..b1d8c05d87 100644 --- a/modules/visual_script/visual_script_editor.cpp +++ b/modules/visual_script/visual_script_editor.cpp @@ -1124,8 +1124,8 @@ void VisualScriptEditor::_update_members() { TreeItem *ti = members->create_item(variables); ti->set_text(0, E->get()); - Variant var = script->get_variable_default_value(E->get()); - ti->set_suffix(0, "= " + String(var)); + + ti->set_suffix(0, "= " + _sanitized_variant_text(E->get())); ti->set_icon(0, type_icons[script->get_variable_info(E->get()).type]); ti->set_selectable(0, true); @@ -1167,6 +1167,18 @@ void VisualScriptEditor::_update_members() { updating_members = false; } +String VisualScriptEditor::_sanitized_variant_text(const StringName &property_name) { + Variant var = script->get_variable_default_value(property_name); + + if (script->get_variable_info(property_name).type != Variant::NIL) { + Callable::CallError ce; + const Variant *converted = &var; + var = Variant::construct(script->get_variable_info(property_name).type, &converted, 1, ce, false); + } + + return String(var); +} + void VisualScriptEditor::_member_selected() { if (updating_members) { return; diff --git a/modules/visual_script/visual_script_editor.h b/modules/visual_script/visual_script_editor.h index b5d5589250..66e435741f 100644 --- a/modules/visual_script/visual_script_editor.h +++ b/modules/visual_script/visual_script_editor.h @@ -146,6 +146,7 @@ class VisualScriptEditor : public ScriptEditorBase { bool updating_members; void _update_members(); + String _sanitized_variant_text(const StringName &property_name); StringName selected; diff --git a/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java b/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java index 138c2de94c..5aa48d87da 100644 --- a/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java +++ b/platform/android/java/lib/src/org/godotengine/godot/FullScreenGodotApp.java @@ -34,6 +34,8 @@ import android.content.Intent; import android.os.Bundle; import android.view.KeyEvent; +import androidx.annotation.NonNull; +import androidx.annotation.Nullable; import androidx.fragment.app.FragmentActivity; /** @@ -43,13 +45,18 @@ import androidx.fragment.app.FragmentActivity; * within an Android app. */ public abstract class FullScreenGodotApp extends FragmentActivity { - protected Godot godotFragment; + @Nullable + private Godot godotFragment; @Override public void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.godot_app_layout); - godotFragment = new Godot(); + godotFragment = initGodotInstance(); + if (godotFragment == null) { + throw new IllegalStateException("Godot instance must be non-null."); + } + getSupportFragmentManager().beginTransaction().replace(R.id.godot_fragment_container, godotFragment).setPrimaryNavigationFragment(godotFragment).commitNowAllowingStateLoss(); } @@ -76,4 +83,17 @@ public abstract class FullScreenGodotApp extends FragmentActivity { } return super.onKeyMultiple(inKeyCode, repeatCount, event); } + + /** + * Used to initialize the Godot fragment instance in {@link FullScreenGodotApp#onCreate(Bundle)}. + */ + @NonNull + protected Godot initGodotInstance() { + return new Godot(); + } + + @Nullable + protected final Godot getGodotFragment() { + return godotFragment; + } } diff --git a/platform/iphone/godot_view_gesture_recognizer.h b/platform/iphone/godot_view_gesture_recognizer.h index ca3bd808d1..8d84914712 100644 --- a/platform/iphone/godot_view_gesture_recognizer.h +++ b/platform/iphone/godot_view_gesture_recognizer.h @@ -32,7 +32,7 @@ // emulating UIScrollView's UIScrollViewDelayedTouchesBeganGestureRecognizer. // It catches all gestures incoming to UIView and delays them for 150ms // (the same value used by UIScrollViewDelayedTouchesBeganGestureRecognizer) -// If touch cancelation or end message is fired it fires delayed +// If touch cancellation or end message is fired it fires delayed // begin touch immediately as well as last touch signal #import <UIKit/UIKit.h> diff --git a/platform/javascript/SCsub b/platform/javascript/SCsub index dcf9a46bf9..21456efde5 100644 --- a/platform/javascript/SCsub +++ b/platform/javascript/SCsub @@ -66,5 +66,5 @@ env.Zip( zip_files, ZIPROOT=zip_dir, ZIPSUFFIX="${PROGSUFFIX}${ZIPSUFFIX}", - ZIPCOMSTR="Archving $SOURCES as $TARGET", + ZIPCOMSTR="Archiving $SOURCES as $TARGET", ) diff --git a/platform/linuxbsd/display_server_x11.cpp b/platform/linuxbsd/display_server_x11.cpp index 6e925d6ac7..e3cf992302 100644 --- a/platform/linuxbsd/display_server_x11.cpp +++ b/platform/linuxbsd/display_server_x11.cpp @@ -3575,7 +3575,12 @@ DisplayServerX11::DisplayServerX11(const String &p_rendering_driver, WindowMode xrandr_handle = dlopen("libXrandr.so.2", RTLD_LAZY); if (!xrandr_handle) { err = dlerror(); - fprintf(stderr, "could not load libXrandr.so.2, Error: %s\n", err); + // For some arcane reason, NetBSD now ships libXrandr.so.3 while the rest of the world has libXrandr.so.2... + // In case this happens for other X11 platforms in the future, let's give it a try too before failing. + xrandr_handle = dlopen("libXrandr.so.3", RTLD_LAZY); + if (!xrandr_handle) { + fprintf(stderr, "could not load libXrandr.so.2, Error: %s\n", err); + } } else { XRRQueryVersion(x11_display, &xrandr_major, &xrandr_minor); if (((xrandr_major << 8) | xrandr_minor) >= 0x0105) { diff --git a/platform/linuxbsd/platform_config.h b/platform/linuxbsd/platform_config.h index 764666681f..571ad03db0 100644 --- a/platform/linuxbsd/platform_config.h +++ b/platform/linuxbsd/platform_config.h @@ -31,7 +31,15 @@ #ifdef __linux__ #include <alloca.h> #endif -#if defined(__FreeBSD__) || defined(__OpenBSD__) -#include <stdlib.h> + +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) +#include <stdlib.h> // alloca +// FreeBSD and OpenBSD use pthread_set_name_np, while other platforms, +// include NetBSD, use pthread_setname_np. NetBSD's version however requires +// a different format, we handle this directly in thread_posix. +#ifdef __NetBSD__ +#define PTHREAD_NETBSD_SET_NAME +#else #define PTHREAD_BSD_SET_NAME #endif +#endif diff --git a/platform/server/platform_config.h b/platform/server/platform_config.h index bdff93f02b..73136ec81b 100644 --- a/platform/server/platform_config.h +++ b/platform/server/platform_config.h @@ -31,10 +31,19 @@ #if defined(__linux__) || defined(__APPLE__) #include <alloca.h> #endif -#if defined(__FreeBSD__) || defined(__OpenBSD__) -#include <stdlib.h> + +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) +#include <stdlib.h> // alloca +// FreeBSD and OpenBSD use pthread_set_name_np, while other platforms, +// include NetBSD, use pthread_setname_np. NetBSD's version however requires +// a different format, we handle this directly in thread_posix. +#ifdef __NetBSD__ +#define PTHREAD_NETBSD_SET_NAME +#else #define PTHREAD_BSD_SET_NAME #endif +#endif + #ifdef __APPLE__ #define PTHREAD_RENAME_SELF #endif diff --git a/platform/windows/detect.py b/platform/windows/detect.py index c380142c72..6b503c1561 100644 --- a/platform/windows/detect.py +++ b/platform/windows/detect.py @@ -439,7 +439,7 @@ def configure_mingw(env): else: env.Append(LIBS=["cfgmgr32"]) - ## TODO !!! Reenable when OpenGLES Rendering Device is implemented !!! + ## TODO !!! Re-enable when OpenGLES Rendering Device is implemented !!! # env.Append(CPPDEFINES=['OPENGL_ENABLED']) env.Append(LIBS=["opengl32"]) diff --git a/scene/2d/cpu_particles_2d.cpp b/scene/2d/cpu_particles_2d.cpp index e3a632c98a..dd21fcfe22 100644 --- a/scene/2d/cpu_particles_2d.cpp +++ b/scene/2d/cpu_particles_2d.cpp @@ -1291,7 +1291,7 @@ void CPUParticles2D::_bind_methods() { ClassDB::bind_method(D_METHOD("convert_from_particles", "particles"), &CPUParticles2D::convert_from_particles); ADD_GROUP("Emission Shape", "emission_"); - ADD_PROPERTY(PropertyInfo(Variant::INT, "emission_shape", PROPERTY_HINT_ENUM, "Point,Sphere,Box,Points,Directed Points"), "set_emission_shape", "get_emission_shape"); + ADD_PROPERTY(PropertyInfo(Variant::INT, "emission_shape", PROPERTY_HINT_ENUM, "Point,Sphere,Box,Points,Directed Points", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_UPDATE_ALL_IF_MODIFIED), "set_emission_shape", "get_emission_shape"); ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "emission_sphere_radius", PROPERTY_HINT_RANGE, "0.01,128,0.01"), "set_emission_sphere_radius", "get_emission_sphere_radius"); ADD_PROPERTY(PropertyInfo(Variant::VECTOR2, "emission_rect_extents"), "set_emission_rect_extents", "get_emission_rect_extents"); ADD_PROPERTY(PropertyInfo(Variant::PACKED_VECTOR2_ARRAY, "emission_points"), "set_emission_points", "get_emission_points"); diff --git a/scene/3d/cpu_particles_3d.cpp b/scene/3d/cpu_particles_3d.cpp index ad8760251f..ded83b75ac 100644 --- a/scene/3d/cpu_particles_3d.cpp +++ b/scene/3d/cpu_particles_3d.cpp @@ -1345,7 +1345,7 @@ void CPUParticles3D::_bind_methods() { ClassDB::bind_method(D_METHOD("convert_from_particles", "particles"), &CPUParticles3D::convert_from_particles); ADD_GROUP("Emission Shape", "emission_"); - ADD_PROPERTY(PropertyInfo(Variant::INT, "emission_shape", PROPERTY_HINT_ENUM, "Point,Sphere,Box,Points,Directed Points"), "set_emission_shape", "get_emission_shape"); + ADD_PROPERTY(PropertyInfo(Variant::INT, "emission_shape", PROPERTY_HINT_ENUM, "Point,Sphere,Box,Points,Directed Points", PROPERTY_USAGE_DEFAULT | PROPERTY_USAGE_UPDATE_ALL_IF_MODIFIED), "set_emission_shape", "get_emission_shape"); ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "emission_sphere_radius", PROPERTY_HINT_RANGE, "0.01,128,0.01"), "set_emission_sphere_radius", "get_emission_sphere_radius"); ADD_PROPERTY(PropertyInfo(Variant::VECTOR3, "emission_box_extents"), "set_emission_box_extents", "get_emission_box_extents"); ADD_PROPERTY(PropertyInfo(Variant::PACKED_VECTOR3_ARRAY, "emission_points"), "set_emission_points", "get_emission_points"); diff --git a/scene/resources/packed_scene.cpp b/scene/resources/packed_scene.cpp index cb201bc539..5e8bfd9387 100644 --- a/scene/resources/packed_scene.cpp +++ b/scene/resources/packed_scene.cpp @@ -848,7 +848,7 @@ Error SceneState::pack(Node *p_scene) { Map<Node *, int> node_map; Map<Node *, int> nodepath_map; - //if using scene inheritance, pack the scene it inherits from + // If using scene inheritance, pack the scene it inherits from. if (scene->get_scene_inherited_state().is_valid()) { String path = scene->get_scene_inherited_state()->get_path(); Ref<PackedScene> instance = ResourceLoader::load(path); @@ -856,8 +856,8 @@ Error SceneState::pack(Node *p_scene) { base_scene_idx = _vm_get_variant(instance, variant_map); } } - //instanced, only direct sub-scnes are supported of course + // Instanced, only direct sub-scenes are supported of course. Error err = _parse_node(scene, scene, -1, name_map, variant_map, node_map, nodepath_map); if (err) { clear(); diff --git a/scene/resources/physics_material.cpp b/scene/resources/physics_material.cpp index 2a5cd1101a..59bf8c0e13 100644 --- a/scene/resources/physics_material.cpp +++ b/scene/resources/physics_material.cpp @@ -43,9 +43,9 @@ void PhysicsMaterial::_bind_methods() { ClassDB::bind_method(D_METHOD("set_absorbent", "absorbent"), &PhysicsMaterial::set_absorbent); ClassDB::bind_method(D_METHOD("is_absorbent"), &PhysicsMaterial::is_absorbent); - ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "friction"), "set_friction", "get_friction"); + ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "friction", PROPERTY_HINT_RANGE, "0,1,0.01"), "set_friction", "get_friction"); ADD_PROPERTY(PropertyInfo(Variant::BOOL, "rough"), "set_rough", "is_rough"); - ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "bounce"), "set_bounce", "get_bounce"); + ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "bounce", PROPERTY_HINT_RANGE, "0,1,0.01"), "set_bounce", "get_bounce"); ADD_PROPERTY(PropertyInfo(Variant::BOOL, "absorbent"), "set_absorbent", "is_absorbent"); } diff --git a/scene/resources/visual_shader.cpp b/scene/resources/visual_shader.cpp index 803ab265d4..b8d2003e68 100644 --- a/scene/resources/visual_shader.cpp +++ b/scene/resources/visual_shader.cpp @@ -101,6 +101,14 @@ bool VisualShaderNode::is_code_generated() const { return true; } +bool VisualShaderNode::is_show_prop_names() const { + return false; +} + +bool VisualShaderNode::is_use_prop_slots() const { + return false; +} + Vector<VisualShader::DefaultTextureParam> VisualShaderNode::get_default_texture_parameters(VisualShader::Type p_type, int p_id) const { return Vector<VisualShader::DefaultTextureParam>(); } @@ -2123,6 +2131,15 @@ void VisualShaderNodeUniformRef::clear_uniforms() { uniforms.clear(); } +bool VisualShaderNodeUniformRef::has_uniform(const String &p_name) { + for (List<VisualShaderNodeUniformRef::Uniform>::Element *E = uniforms.front(); E; E = E->next()) { + if (E->get().name == p_name) { + return true; + } + } + return false; +} + String VisualShaderNodeUniformRef::get_caption() const { return "UniformRef"; } @@ -2140,10 +2157,6 @@ String VisualShaderNodeUniformRef::get_input_port_name(int p_port) const { } int VisualShaderNodeUniformRef::get_output_port_count() const { - if (uniform_name == "[None]") { - return 0; - } - switch (uniform_type) { case UniformType::UNIFORM_TYPE_FLOAT: return 1; @@ -2162,7 +2175,7 @@ int VisualShaderNodeUniformRef::get_output_port_count() const { default: break; } - return 0; + return 1; } VisualShaderNodeUniformRef::PortType VisualShaderNodeUniformRef::get_output_port_type(int p_port) const { @@ -2222,8 +2235,8 @@ String VisualShaderNodeUniformRef::get_output_port_name(int p_port) const { void VisualShaderNodeUniformRef::set_uniform_name(const String &p_name) { uniform_name = p_name; - if (p_name != "[None]") { - uniform_type = get_uniform_type_by_name(p_name); + if (uniform_name != "[None]") { + uniform_type = get_uniform_type_by_name(uniform_name); } else { uniform_type = UniformType::UNIFORM_TYPE_FLOAT; } @@ -2264,6 +2277,9 @@ VisualShaderNodeUniformRef::UniformType VisualShaderNodeUniformRef::get_uniform_ String VisualShaderNodeUniformRef::generate_code(Shader::Mode p_mode, VisualShader::Type p_type, int p_id, const String *p_input_vars, const String *p_output_vars, bool p_for_preview) const { switch (uniform_type) { case UniformType::UNIFORM_TYPE_FLOAT: + if (uniform_name == "[None]") { + return "\t" + p_output_vars[0] + " = 0.0;\n"; + } return "\t" + p_output_vars[0] + " = " + get_uniform_name() + ";\n"; case UniformType::UNIFORM_TYPE_INT: return "\t" + p_output_vars[0] + " = " + get_uniform_name() + ";\n"; @@ -2286,16 +2302,29 @@ String VisualShaderNodeUniformRef::generate_code(Shader::Mode p_mode, VisualShad return ""; } +void VisualShaderNodeUniformRef::_set_uniform_type(int p_uniform_type) { + uniform_type = (UniformType)p_uniform_type; +} + +int VisualShaderNodeUniformRef::_get_uniform_type() const { + return (int)uniform_type; +} + void VisualShaderNodeUniformRef::_bind_methods() { ClassDB::bind_method(D_METHOD("set_uniform_name", "name"), &VisualShaderNodeUniformRef::set_uniform_name); ClassDB::bind_method(D_METHOD("get_uniform_name"), &VisualShaderNodeUniformRef::get_uniform_name); + ClassDB::bind_method(D_METHOD("_set_uniform_type", "type"), &VisualShaderNodeUniformRef::_set_uniform_type); + ClassDB::bind_method(D_METHOD("_get_uniform_type"), &VisualShaderNodeUniformRef::_get_uniform_type); + ADD_PROPERTY(PropertyInfo(Variant::STRING_NAME, "uniform_name", PROPERTY_HINT_ENUM, ""), "set_uniform_name", "get_uniform_name"); + ADD_PROPERTY(PropertyInfo(Variant::INT, "uniform_type", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NOEDITOR | PROPERTY_USAGE_INTERNAL), "_set_uniform_type", "_get_uniform_type"); } Vector<StringName> VisualShaderNodeUniformRef::get_editable_properties() const { Vector<StringName> props; props.push_back("uniform_name"); + props.push_back("uniform_type"); return props; } @@ -2700,6 +2729,7 @@ void VisualShaderNodeGroupBase::add_input_port(int p_id, int p_type, const Strin } _apply_port_changes(); + emit_changed(); } void VisualShaderNodeGroupBase::remove_input_port(int p_id) { @@ -2724,6 +2754,7 @@ void VisualShaderNodeGroupBase::remove_input_port(int p_id) { } _apply_port_changes(); + emit_changed(); } int VisualShaderNodeGroupBase::get_input_port_count() const { @@ -2768,6 +2799,7 @@ void VisualShaderNodeGroupBase::add_output_port(int p_id, int p_type, const Stri } _apply_port_changes(); + emit_changed(); } void VisualShaderNodeGroupBase::remove_output_port(int p_id) { @@ -2792,6 +2824,7 @@ void VisualShaderNodeGroupBase::remove_output_port(int p_id) { } _apply_port_changes(); + emit_changed(); } int VisualShaderNodeGroupBase::get_output_port_count() const { @@ -2838,6 +2871,7 @@ void VisualShaderNodeGroupBase::set_input_port_type(int p_id, int p_type) { inputs = inputs.insert(index, itos(p_type)); _apply_port_changes(); + emit_changed(); } VisualShaderNodeGroupBase::PortType VisualShaderNodeGroupBase::get_input_port_type(int p_id) const { @@ -2873,6 +2907,7 @@ void VisualShaderNodeGroupBase::set_input_port_name(int p_id, const String &p_na inputs = inputs.insert(index, p_name); _apply_port_changes(); + emit_changed(); } String VisualShaderNodeGroupBase::get_input_port_name(int p_id) const { @@ -2908,6 +2943,7 @@ void VisualShaderNodeGroupBase::set_output_port_type(int p_id, int p_type) { outputs = outputs.insert(index, itos(p_type)); _apply_port_changes(); + emit_changed(); } VisualShaderNodeGroupBase::PortType VisualShaderNodeGroupBase::get_output_port_type(int p_id) const { @@ -2943,6 +2979,7 @@ void VisualShaderNodeGroupBase::set_output_port_name(int p_id, const String &p_n outputs = outputs.insert(index, p_name); _apply_port_changes(); + emit_changed(); } String VisualShaderNodeGroupBase::get_output_port_name(int p_id) const { @@ -3053,6 +3090,7 @@ String VisualShaderNodeExpression::get_caption() const { void VisualShaderNodeExpression::set_expression(const String &p_expression) { expression = p_expression; + emit_changed(); } String VisualShaderNodeExpression::get_expression() const { diff --git a/scene/resources/visual_shader.h b/scene/resources/visual_shader.h index 513a17b024..e7d74b6c17 100644 --- a/scene/resources/visual_shader.h +++ b/scene/resources/visual_shader.h @@ -242,6 +242,8 @@ public: virtual bool is_generate_input_var(int p_port) const; virtual bool is_code_generated() const; + virtual bool is_show_prop_names() const; + virtual bool is_use_prop_slots() const; virtual Vector<StringName> get_editable_properties() const; @@ -454,6 +456,7 @@ protected: public: static void add_uniform(const String &p_name, UniformType p_type); static void clear_uniforms(); + static bool has_uniform(const String &p_name); public: virtual String get_caption() const override; @@ -469,6 +472,9 @@ public: void set_uniform_name(const String &p_name); String get_uniform_name() const; + void _set_uniform_type(int p_uniform_type); + int _get_uniform_type() const; + int get_uniforms_count() const; String get_uniform_name_by_index(int p_idx) const; UniformType get_uniform_type_by_name(const String &p_name) const; diff --git a/scene/resources/visual_shader_nodes.cpp b/scene/resources/visual_shader_nodes.cpp index 0e10682daf..cffe0bb5cd 100644 --- a/scene/resources/visual_shader_nodes.cpp +++ b/scene/resources/visual_shader_nodes.cpp @@ -3490,6 +3490,14 @@ String VisualShaderNodeFloatUniform::generate_code(Shader::Mode p_mode, VisualSh return "\t" + p_output_vars[0] + " = " + get_uniform_name() + ";\n"; } +bool VisualShaderNodeFloatUniform::is_show_prop_names() const { + return true; +} + +bool VisualShaderNodeFloatUniform::is_use_prop_slots() const { + return true; +} + void VisualShaderNodeFloatUniform::set_hint(Hint p_hint) { hint = p_hint; emit_changed(); @@ -3649,6 +3657,14 @@ String VisualShaderNodeIntUniform::generate_code(Shader::Mode p_mode, VisualShad return "\t" + p_output_vars[0] + " = " + get_uniform_name() + ";\n"; } +bool VisualShaderNodeIntUniform::is_show_prop_names() const { + return true; +} + +bool VisualShaderNodeIntUniform::is_use_prop_slots() const { + return true; +} + void VisualShaderNodeIntUniform::set_hint(Hint p_hint) { hint = p_hint; emit_changed(); @@ -3823,6 +3839,14 @@ String VisualShaderNodeBooleanUniform::generate_code(Shader::Mode p_mode, Visual return "\t" + p_output_vars[0] + " = " + get_uniform_name() + ";\n"; } +bool VisualShaderNodeBooleanUniform::is_show_prop_names() const { + return true; +} + +bool VisualShaderNodeBooleanUniform::is_use_prop_slots() const { + return true; +} + void VisualShaderNodeBooleanUniform::_bind_methods() { ClassDB::bind_method(D_METHOD("set_default_value_enabled", "enabled"), &VisualShaderNodeBooleanUniform::set_default_value_enabled); ClassDB::bind_method(D_METHOD("is_default_value_enabled"), &VisualShaderNodeBooleanUniform::is_default_value_enabled); @@ -3913,6 +3937,10 @@ String VisualShaderNodeColorUniform::generate_code(Shader::Mode p_mode, VisualSh return code; } +bool VisualShaderNodeColorUniform::is_show_prop_names() const { + return true; +} + void VisualShaderNodeColorUniform::_bind_methods() { ClassDB::bind_method(D_METHOD("set_default_value_enabled", "enabled"), &VisualShaderNodeColorUniform::set_default_value_enabled); ClassDB::bind_method(D_METHOD("is_default_value_enabled"), &VisualShaderNodeColorUniform::is_default_value_enabled); @@ -4012,6 +4040,14 @@ void VisualShaderNodeVec3Uniform::_bind_methods() { ADD_PROPERTY(PropertyInfo(Variant::VECTOR3, "default_value"), "set_default_value", "get_default_value"); } +bool VisualShaderNodeVec3Uniform::is_show_prop_names() const { + return true; +} + +bool VisualShaderNodeVec3Uniform::is_use_prop_slots() const { + return true; +} + bool VisualShaderNodeVec3Uniform::is_qualifier_supported(Qualifier p_qual) const { return true; // all qualifiers are supported } @@ -4104,6 +4140,14 @@ void VisualShaderNodeTransformUniform::_bind_methods() { ADD_PROPERTY(PropertyInfo(Variant::TRANSFORM, "default_value"), "set_default_value", "get_default_value"); } +bool VisualShaderNodeTransformUniform::is_show_prop_names() const { + return true; +} + +bool VisualShaderNodeTransformUniform::is_use_prop_slots() const { + return true; +} + bool VisualShaderNodeTransformUniform::is_qualifier_supported(Qualifier p_qual) const { return true; // all qualifiers are supported } diff --git a/scene/resources/visual_shader_nodes.h b/scene/resources/visual_shader_nodes.h index 06ad42adf5..1c986d1ef4 100644 --- a/scene/resources/visual_shader_nodes.h +++ b/scene/resources/visual_shader_nodes.h @@ -1529,6 +1529,9 @@ public: virtual String generate_global(Shader::Mode p_mode, VisualShader::Type p_type, int p_id) const override; virtual String generate_code(Shader::Mode p_mode, VisualShader::Type p_type, int p_id, const String *p_input_vars, const String *p_output_vars, bool p_for_preview = false) const override; //if no output is connected, the output var passed will be empty. if no input is connected and input is NIL, the input var passed will be empty + virtual bool is_show_prop_names() const override; + virtual bool is_use_prop_slots() const override; + void set_hint(Hint p_hint); Hint get_hint() const; @@ -1591,6 +1594,9 @@ public: virtual String generate_global(Shader::Mode p_mode, VisualShader::Type p_type, int p_id) const override; virtual String generate_code(Shader::Mode p_mode, VisualShader::Type p_type, int p_id, const String *p_input_vars, const String *p_output_vars, bool p_for_preview = false) const override; //if no output is connected, the output var passed will be empty. if no input is connected and input is NIL, the input var passed will be empty + virtual bool is_show_prop_names() const override; + virtual bool is_use_prop_slots() const override; + void set_hint(Hint p_hint); Hint get_hint() const; @@ -1644,6 +1650,9 @@ public: virtual String generate_global(Shader::Mode p_mode, VisualShader::Type p_type, int p_id) const override; virtual String generate_code(Shader::Mode p_mode, VisualShader::Type p_type, int p_id, const String *p_input_vars, const String *p_output_vars, bool p_for_preview = false) const override; //if no output is connected, the output var passed will be empty. if no input is connected and input is NIL, the input var passed will be empty + virtual bool is_show_prop_names() const override; + virtual bool is_use_prop_slots() const override; + void set_default_value_enabled(bool p_enabled); bool is_default_value_enabled() const; @@ -1683,6 +1692,8 @@ public: virtual String generate_global(Shader::Mode p_mode, VisualShader::Type p_type, int p_id) const override; virtual String generate_code(Shader::Mode p_mode, VisualShader::Type p_type, int p_id, const String *p_input_vars, const String *p_output_vars, bool p_for_preview = false) const override; //if no output is connected, the output var passed will be empty. if no input is connected and input is NIL, the input var passed will be empty + virtual bool is_show_prop_names() const override; + void set_default_value_enabled(bool p_enabled); bool is_default_value_enabled() const; @@ -1722,6 +1733,9 @@ public: virtual String generate_global(Shader::Mode p_mode, VisualShader::Type p_type, int p_id) const override; virtual String generate_code(Shader::Mode p_mode, VisualShader::Type p_type, int p_id, const String *p_input_vars, const String *p_output_vars, bool p_for_preview = false) const override; //if no output is connected, the output var passed will be empty. if no input is connected and input is NIL, the input var passed will be empty + virtual bool is_show_prop_names() const override; + virtual bool is_use_prop_slots() const override; + void set_default_value_enabled(bool p_enabled); bool is_default_value_enabled() const; @@ -1761,6 +1775,9 @@ public: virtual String generate_global(Shader::Mode p_mode, VisualShader::Type p_type, int p_id) const override; virtual String generate_code(Shader::Mode p_mode, VisualShader::Type p_type, int p_id, const String *p_input_vars, const String *p_output_vars, bool p_for_preview = false) const override; //if no output is connected, the output var passed will be empty. if no input is connected and input is NIL, the input var passed will be empty + virtual bool is_show_prop_names() const override; + virtual bool is_use_prop_slots() const override; + void set_default_value_enabled(bool p_enabled); bool is_default_value_enabled() const; diff --git a/servers/physics_2d/body_2d_sw.cpp b/servers/physics_2d/body_2d_sw.cpp index 856bba78f7..75c9a95739 100644 --- a/servers/physics_2d/body_2d_sw.cpp +++ b/servers/physics_2d/body_2d_sw.cpp @@ -468,7 +468,7 @@ void Body2DSW::integrate_forces(real_t p_step) { linear_velocity = motion / p_step; real_t rot = new_transform.get_rotation() - get_transform().get_rotation(); - angular_velocity = rot / p_step; + angular_velocity = remainder(rot, 2.0 * Math_PI) / p_step; do_motion = true; diff --git a/servers/rendering/rasterizer_rd/shaders/canvas.glsl b/servers/rendering/rasterizer_rd/shaders/canvas.glsl index e33b3face9..4a40584e16 100644 --- a/servers/rendering/rasterizer_rd/shaders/canvas.glsl +++ b/servers/rendering/rasterizer_rd/shaders/canvas.glsl @@ -101,7 +101,7 @@ void main() { offset += 1; } else { instance_color = vec4(texelFetch(instancing_buffer, offset + 0), texelFetch(instancing_buffer, offset + 1), texelFetch(instancing_buffer, offset + 2), texelFetch(instancing_buffer, offset + 3)); - offser += 4; + offset += 4; } color *= instance_color; diff --git a/servers/rendering/rasterizer_rd/shaders/particles.glsl b/servers/rendering/rasterizer_rd/shaders/particles.glsl index 3de807b57c..a924509771 100644 --- a/servers/rendering/rasterizer_rd/shaders/particles.glsl +++ b/servers/rendering/rasterizer_rd/shaders/particles.glsl @@ -147,7 +147,7 @@ bool emit_particle(mat4 p_xform, vec3 p_velocity, vec4 p_color, vec4 p_custom, u while(attempts-- > 0) { dst_index = dst_particles.particle_count; if (dst_index == dst_particles.particle_max) { - return false; //cant emit anymore + return false; //can't emit anymore } if (atomicCompSwap(dst_particles.particle_count, dst_index, dst_index +1 ) != dst_index) { diff --git a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl index 2a7b73d9aa..e11f3983c5 100644 --- a/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl +++ b/servers/rendering/rasterizer_rd/shaders/scene_high_end.glsl @@ -681,7 +681,7 @@ LIGHT_SHADER_CODE #ifndef USE_NO_SHADOWS -// Produces cheap white noise, optmized for window-space +// Produces cheap white noise, optimized for window-space // Comes from: https://www.shadertoy.com/view/4djSRW // Copyright: Dave Hoskins, MIT License float quick_hash(vec2 pos) { diff --git a/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl b/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl index 66bfefbe89..0cc2b90c53 100644 --- a/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl +++ b/servers/rendering/rasterizer_rd/shaders/scene_high_end_inc.glsl @@ -252,7 +252,7 @@ layout(set = 1, binding = 0) uniform textureCube radiance_cubemap; #endif -/* Set 2, Reflection and Shadow Atlases (view dependant) */ +/* Set 2, Reflection and Shadow Atlases (view dependent) */ layout(set = 2, binding = 0) uniform textureCubeArray reflection_atlas; diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_direct_light.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_direct_light.glsl index c4b29216d5..61e4bf5e18 100644 --- a/servers/rendering/rasterizer_rd/shaders/sdfgi_direct_light.glsl +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_direct_light.glsl @@ -22,7 +22,7 @@ dispatch_data; struct ProcessVoxel { uint position; //xyz 7 bit packed, extra 11 bits for neigbours uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours - uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbous + uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours //total neighbours: 26 }; diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl index 1ec471d204..d516ab22c3 100644 --- a/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_integrate.glsl @@ -336,7 +336,7 @@ void main() { #ifdef MODE_STORE - // converting to octahedral in this step is requiered because + // converting to octahedral in this step is required because // octahedral is much faster to read from the screen than spherical harmonics, // despite the very slight quality loss @@ -512,7 +512,7 @@ void main() { imageStore(lightprobe_average_scroll_texture, dst_pos, value); } } else if (params.cascade < params.max_cascades - 1) { - //cant scroll, must look for position in parent cascade + //can't scroll, must look for position in parent cascade //to global coords float probe_cell_size = float(params.grid_size.x / float(params.probe_axis_size - 1)) / cascades.data[params.cascade].to_cell; diff --git a/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl b/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl index dd0ca5c506..916c60ac89 100644 --- a/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl +++ b/servers/rendering/rasterizer_rd/shaders/sdfgi_preprocess.glsl @@ -103,7 +103,7 @@ dispatch_data; struct ProcessVoxel { uint position; //xyz 7 bit packed, extra 11 bits for neigbours uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours - uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbous + uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours //total neighbours: 26 }; @@ -136,7 +136,7 @@ dispatch_data; struct ProcessVoxel { uint position; //xyz 7 bit packed, extra 11 bits for neigbours uint albedo; //rgb bits 0-15 albedo, bits 16-21 are normal bits (set if geometry exists toward that side), extra 11 bits for neibhbours - uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbous + uint light; //rgbe8985 encoded total saved light, extra 2 bits for neighbours uint light_aniso; //55555 light anisotropy, extra 2 bits for neighbours //total neighbours: 26 }; @@ -274,7 +274,7 @@ void main() { #ifdef MODE_JUMPFLOOD - //regular jumpflood, efficent for large steps, inefficient for small steps + //regular jumpflood, efficient for large steps, inefficient for small steps ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); vec3 posf = vec3(pos); diff --git a/servers/rendering/rasterizer_rd/shaders/volumetric_fog.glsl b/servers/rendering/rasterizer_rd/shaders/volumetric_fog.glsl index cb19fb0b69..13b162f0c9 100644 --- a/servers/rendering/rasterizer_rd/shaders/volumetric_fog.glsl +++ b/servers/rendering/rasterizer_rd/shaders/volumetric_fog.glsl @@ -485,7 +485,7 @@ void main() { //get depth at cell pos float z = get_depth_at_pos(fog_cell_size.z, i); - //get distance from previos pos + //get distance from previous pos float d = abs(prev_z - z); //compute exinction based on beer's float extinction = t * exp(-d * fog.a); diff --git a/servers/rendering/shader_language.cpp b/servers/rendering/shader_language.cpp index 28c41fb2dc..6c835fcadf 100644 --- a/servers/rendering/shader_language.cpp +++ b/servers/rendering/shader_language.cpp @@ -6099,6 +6099,14 @@ Error ShaderLanguage::_parse_shader(const Map<StringName, FunctionInfo> &p_funct case TK_UNIFORM: case TK_VARYING: { bool uniform = tk.type == TK_UNIFORM; + + if (!uniform) { + if (shader_type_identifier == "particles" || shader_type_identifier == "sky") { + _set_error(vformat("Varyings cannot be used in '%s' shaders!", shader_type_identifier)); + return ERR_PARSE_ERROR; + } + } + DataPrecision precision = PRECISION_DEFAULT; DataInterpolation interpolation = INTERPOLATION_SMOOTH; DataType type; diff --git a/tests/test_expression.h b/tests/test_expression.h index 8c026a35f5..a3d4877d52 100644 --- a/tests/test_expression.h +++ b/tests/test_expression.h @@ -66,7 +66,7 @@ TEST_CASE("[Expression] Integer arithmetic") { "Integer / integer division should parse successfully."); CHECK_MESSAGE( int(expression.execute()) == 2, - "Integer / integer divsion should return the expected result."); + "Integer / integer division should return the expected result."); CHECK_MESSAGE( expression.parse("2 * (6 + 14) / 2 - 5") == OK, @@ -98,21 +98,21 @@ TEST_CASE("[Expression] Floating-point arithmetic") { "Float / integer division should parse successfully."); CHECK_MESSAGE( Math::is_equal_approx(float(expression.execute()), 0.3), - "Float / integer divsion should return the expected result."); + "Float / integer division should return the expected result."); CHECK_MESSAGE( expression.parse("3 / 10.0") == OK, "Basic integer / float division should parse successfully."); CHECK_MESSAGE( Math::is_equal_approx(float(expression.execute()), 0.3), - "Basic integer / float divsion should return the expected result."); + "Basic integer / float division should return the expected result."); CHECK_MESSAGE( expression.parse("3.0 / 10.0") == OK, "Float / float division should parse successfully."); CHECK_MESSAGE( Math::is_equal_approx(float(expression.execute()), 0.3), - "Float / float divsion should return the expected result."); + "Float / float division should return the expected result."); CHECK_MESSAGE( expression.parse("2.5 * (6.0 + 14.25) / 2.0 - 5.12345") == OK, diff --git a/thirdparty/README.md b/thirdparty/README.md index 58bdafa850..46a75d94b0 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -658,7 +658,7 @@ Files extracted from upstream source: ## zstd - Upstream: https://github.com/facebook/zstd -- Version: 1.4.4 (2019) +- Version: 1.4.5 (2020) - License: BSD-3-Clause Files extracted from upstream source: diff --git a/thirdparty/zstd/common/bitstream.h b/thirdparty/zstd/common/bitstream.h index 1c294b80d1..37b99c01ee 100644 --- a/thirdparty/zstd/common/bitstream.h +++ b/thirdparty/zstd/common/bitstream.h @@ -1,35 +1,15 @@ /* ****************************************************************** - bitstream - Part of FSE library - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * bitstream + * Part of FSE library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ #ifndef BITSTREAM_H_MODULE #define BITSTREAM_H_MODULE @@ -48,6 +28,7 @@ extern "C" { * Dependencies ******************************************/ #include "mem.h" /* unaligned access routines */ +#include "compiler.h" /* UNLIKELY() */ #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ #include "error_private.h" /* error codes and messages */ @@ -161,8 +142,7 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) { # if defined(_MSC_VER) /* Visual */ unsigned long r=0; - _BitScanReverse ( &r, val ); - return (unsigned) r; + return _BitScanReverse ( &r, val ) ? (unsigned)r : 0; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ @@ -411,6 +391,23 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) return value; } +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; +} + /*! BIT_reloadDStream() : * Refill `bitD` from buffer previously set in BIT_initDStream() . * This function is safe, it guarantees it will not read beyond src buffer. @@ -422,10 +419,7 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) return BIT_DStream_overflow; if (bitD->ptr >= bitD->limitPtr) { - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = MEM_readLEST(bitD->ptr); - return BIT_DStream_unfinished; + return BIT_reloadDStreamFast(bitD); } if (bitD->ptr == bitD->start) { if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; diff --git a/thirdparty/zstd/common/compiler.h b/thirdparty/zstd/common/compiler.h index 1877a0c1d9..95e9483521 100644 --- a/thirdparty/zstd/common/compiler.h +++ b/thirdparty/zstd/common/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -17,7 +17,7 @@ /* force inlining */ #if !defined(ZSTD_NO_INLINE) -#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # define INLINE_KEYWORD inline #else # define INLINE_KEYWORD @@ -114,6 +114,9 @@ # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) @@ -136,7 +139,7 @@ /* vectorization * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ -#if !defined(__clang__) && defined(__GNUC__) +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) # else @@ -146,6 +149,19 @@ # define DONT_VECTORIZE #endif +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#if defined(__GNUC__) +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + /* disable warnings */ #ifdef _MSC_VER /* Visual Studio */ # include <intrin.h> /* For Visual 2005 */ diff --git a/thirdparty/zstd/common/cpu.h b/thirdparty/zstd/common/cpu.h index 5f0923fc92..6e8a974f62 100644 --- a/thirdparty/zstd/common/cpu.h +++ b/thirdparty/zstd/common/cpu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-present, Facebook, Inc. + * Copyright (c) 2018-2020, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/common/debug.c b/thirdparty/zstd/common/debug.c index 3ebdd1cb15..f303f4a2e5 100644 --- a/thirdparty/zstd/common/debug.c +++ b/thirdparty/zstd/common/debug.c @@ -1,35 +1,15 @@ /* ****************************************************************** - debug - Part of FSE library - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * debug + * Part of FSE library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ diff --git a/thirdparty/zstd/common/debug.h b/thirdparty/zstd/common/debug.h index b4fc89d497..ac6224888d 100644 --- a/thirdparty/zstd/common/debug.h +++ b/thirdparty/zstd/common/debug.h @@ -1,35 +1,15 @@ /* ****************************************************************** - debug - Part of FSE library - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * debug + * Part of FSE library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ diff --git a/thirdparty/zstd/common/entropy_common.c b/thirdparty/zstd/common/entropy_common.c index b12944e1de..9d3e4e8e36 100644 --- a/thirdparty/zstd/common/entropy_common.c +++ b/thirdparty/zstd/common/entropy_common.c @@ -1,36 +1,16 @@ -/* - Common functions of New Generation Entropy library - Copyright (C) 2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c -*************************************************************************** */ +/* ****************************************************************** + * Common functions of New Generation Entropy library + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ /* ************************************* * Dependencies diff --git a/thirdparty/zstd/common/error_private.c b/thirdparty/zstd/common/error_private.c index 7c1bb67a23..cd437529c1 100644 --- a/thirdparty/zstd/common/error_private.c +++ b/thirdparty/zstd/common/error_private.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -47,6 +47,7 @@ const char* ERR_getErrorString(ERR_enum code) /* following error codes are not stable and may be removed or changed in a future version */ case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; case PREFIX(maxCode): default: return notErrorCode; } diff --git a/thirdparty/zstd/common/error_private.h b/thirdparty/zstd/common/error_private.h index 0d2fa7e34b..982cf8e9fe 100644 --- a/thirdparty/zstd/common/error_private.h +++ b/thirdparty/zstd/common/error_private.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -49,7 +49,7 @@ typedef ZSTD_ErrorCode ERR_enum; /*-**************************************** * Error codes handling ******************************************/ -#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#undef ERROR /* already defined on Visual Studio */ #define ERROR(name) ZSTD_ERROR(name) #define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) @@ -57,6 +57,10 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } +/* check and forward error code */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + /*-**************************************** * Error Strings diff --git a/thirdparty/zstd/common/fse.h b/thirdparty/zstd/common/fse.h index a7553e3721..ff54e70ea7 100644 --- a/thirdparty/zstd/common/fse.h +++ b/thirdparty/zstd/common/fse.h @@ -1,35 +1,15 @@ /* ****************************************************************** - FSE : Finite State Entropy codec - Public Prototypes declaration - Copyright (C) 2013-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ #if defined (__cplusplus) diff --git a/thirdparty/zstd/common/fse_decompress.c b/thirdparty/zstd/common/fse_decompress.c index 4f07378982..bcc2223ccc 100644 --- a/thirdparty/zstd/common/fse_decompress.c +++ b/thirdparty/zstd/common/fse_decompress.c @@ -1,35 +1,15 @@ /* ****************************************************************** - FSE : Finite State Entropy decoder - Copyright (C) 2013-2015, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c + * FSE : Finite State Entropy decoder + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ @@ -51,11 +31,6 @@ #define FSE_isError ERR_isError #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ -/* check and forward error code */ -#ifndef CHECK_F -#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } -#endif - /* ************************************************************** * Templates @@ -287,7 +262,7 @@ size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size /* normal FSE decoding mode */ size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); if (FSE_isError(NCountLength)) return NCountLength; - //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ + /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ if (tableLog > maxLog) return ERROR(tableLog_tooLarge); ip += NCountLength; cSrcSize -= NCountLength; diff --git a/thirdparty/zstd/common/huf.h b/thirdparty/zstd/common/huf.h index 6b572c448d..ef432685da 100644 --- a/thirdparty/zstd/common/huf.h +++ b/thirdparty/zstd/common/huf.h @@ -1,35 +1,15 @@ /* ****************************************************************** - huff0 huffman codec, - part of Finite State Entropy library - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * huff0 huffman codec, + * part of Finite State Entropy library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ #if defined (__cplusplus) @@ -110,7 +90,7 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, /** HUF_compress4X_wksp() : * Same as HUF_compress2(), but uses externally allocated `workSpace`. * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ -#define HUF_WORKSPACE_SIZE (6 << 10) +#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -208,6 +188,8 @@ typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); typedef enum { HUF_repeat_none, /**< Cannot use the previous table */ @@ -246,7 +228,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, /** HUF_readCTable() : * Loading a CTable saved with HUF_writeCTable() */ -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); /** HUF_getNbBits() : * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX diff --git a/thirdparty/zstd/common/mem.h b/thirdparty/zstd/common/mem.h index 530d30c8f7..89c8aea7d2 100644 --- a/thirdparty/zstd/common/mem.h +++ b/thirdparty/zstd/common/mem.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/common/pool.c b/thirdparty/zstd/common/pool.c index f575935076..aa4b4de0d3 100644 --- a/thirdparty/zstd/common/pool.c +++ b/thirdparty/zstd/common/pool.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/common/pool.h b/thirdparty/zstd/common/pool.h index 458d37f13c..259bafc975 100644 --- a/thirdparty/zstd/common/pool.h +++ b/thirdparty/zstd/common/pool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -18,7 +18,7 @@ extern "C" { #include <stddef.h> /* size_t */ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ -#include "zstd.h" +#include "../zstd.h" typedef struct POOL_ctx_s POOL_ctx; diff --git a/thirdparty/zstd/common/threading.c b/thirdparty/zstd/common/threading.c index 482664bd9a..e2edb313eb 100644 --- a/thirdparty/zstd/common/threading.c +++ b/thirdparty/zstd/common/threading.c @@ -2,12 +2,13 @@ * Copyright (c) 2016 Tino Reichardt * All rights reserved. * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). - * - * You can contact the author at: - * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * You may select, at your option, one of the above-listed licenses. */ /** diff --git a/thirdparty/zstd/common/threading.h b/thirdparty/zstd/common/threading.h index 3193ca7db8..fd0060d5aa 100644 --- a/thirdparty/zstd/common/threading.h +++ b/thirdparty/zstd/common/threading.h @@ -2,12 +2,13 @@ * Copyright (c) 2016 Tino Reichardt * All rights reserved. * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). - * - * You can contact the author at: - * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * You may select, at your option, one of the above-listed licenses. */ #ifndef THREADING_H_938743 diff --git a/thirdparty/zstd/common/xxhash.c b/thirdparty/zstd/common/xxhash.c index 99d2459621..597de18fc8 100644 --- a/thirdparty/zstd/common/xxhash.c +++ b/thirdparty/zstd/common/xxhash.c @@ -1,35 +1,15 @@ /* -* xxHash - Fast Hash algorithm -* Copyright (C) 2012-2016, Yann Collet -* -* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following conditions are -* met: -* -* * Redistributions of source code must retain the above copyright -* notice, this list of conditions and the following disclaimer. -* * Redistributions in binary form must reproduce the above -* copyright notice, this list of conditions and the following disclaimer -* in the documentation and/or other materials provided with the -* distribution. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -* -* You can contact the author at : -* - xxHash homepage: http://www.xxhash.com -* - xxHash source repository : https://github.com/Cyan4973/xxHash + * xxHash - Fast Hash algorithm + * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash homepage: http://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ @@ -115,7 +95,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp /* ************************************* * Compiler Specific Options ***************************************/ -#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # define INLINE_KEYWORD inline #else # define INLINE_KEYWORD @@ -729,7 +709,9 @@ FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, c state->total_len += len; if (state->memsize + len < 32) { /* fill in tmp buffer */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + if (input != NULL) { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + } state->memsize += (U32)len; return XXH_OK; } diff --git a/thirdparty/zstd/common/xxhash.h b/thirdparty/zstd/common/xxhash.h index 9bad1f59f6..4207eba832 100644 --- a/thirdparty/zstd/common/xxhash.h +++ b/thirdparty/zstd/common/xxhash.h @@ -1,35 +1,15 @@ /* - xxHash - Extremely Fast Hash algorithm - Header File - Copyright (C) 2012-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - xxHash source repository : https://github.com/Cyan4973/xxHash + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ /* Notice extracted from xxHash homepage : diff --git a/thirdparty/zstd/common/zstd_common.c b/thirdparty/zstd/common/zstd_common.c index 667f4a27fc..91fe3323a5 100644 --- a/thirdparty/zstd/common/zstd_common.c +++ b/thirdparty/zstd/common/zstd_common.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/common/zstd_errors.h b/thirdparty/zstd/common/zstd_errors.h index 92a3433896..998398e7e5 100644 --- a/thirdparty/zstd/common/zstd_errors.h +++ b/thirdparty/zstd/common/zstd_errors.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -76,6 +76,7 @@ typedef enum { /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ ZSTD_error_frameIndex_tooLarge = 100, ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; diff --git a/thirdparty/zstd/common/zstd_internal.h b/thirdparty/zstd/common/zstd_internal.h index dcdcbdb81c..3bc7e55a0a 100644 --- a/thirdparty/zstd/common/zstd_internal.h +++ b/thirdparty/zstd/common/zstd_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -19,12 +19,15 @@ /*-************************************* * Dependencies ***************************************/ +#ifdef __aarch64__ +#include <arm_neon.h> +#endif #include "compiler.h" #include "mem.h" #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ #include "error_private.h" #define ZSTD_STATIC_LINKING_ONLY -#include "zstd.h" +#include "../zstd.h" #define FSE_STATIC_LINKING_ONLY #include "fse.h" #define HUF_STATIC_LINKING_ONLY @@ -54,6 +57,31 @@ extern "C" { #define MAX(a,b) ((a)>(b) ? (a) : (b)) /** + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/** + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } + +/** * Return the specified error if the condition evaluates to true. * * In debug modes, prints additional information. @@ -62,7 +90,9 @@ extern "C" { */ #define RETURN_ERROR_IF(cond, err, ...) \ if (cond) { \ - RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ RAWLOG(3, ": " __VA_ARGS__); \ RAWLOG(3, "\n"); \ return ERROR(err); \ @@ -75,7 +105,9 @@ extern "C" { */ #define RETURN_ERROR(err, ...) \ do { \ - RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ RAWLOG(3, ": " __VA_ARGS__); \ RAWLOG(3, "\n"); \ return ERROR(err); \ @@ -90,7 +122,9 @@ extern "C" { do { \ size_t const err_code = (err); \ if (ERR_isError(err_code)) { \ - RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ RAWLOG(3, ": " __VA_ARGS__); \ RAWLOG(3, "\n"); \ return err_code; \ @@ -128,6 +162,8 @@ static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; +#define ZSTD_FRAMECHECKSUMSIZE 4 + #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ @@ -191,10 +227,22 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; /*-******************************************* * Shared functions to include for inlining *********************************************/ -static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } +static void ZSTD_copy8(void* dst, const void* src) { +#ifdef __aarch64__ + vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); +#else + memcpy(dst, src, 8); +#endif +} #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } -static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } +static void ZSTD_copy16(void* dst, const void* src) { +#ifdef __aarch64__ + vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); +#else + memcpy(dst, src, 16); +#endif +} #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } #define WILDCOPY_OVERLENGTH 32 @@ -213,7 +261,7 @@ typedef enum { * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. * The src buffer must be before the dst buffer. */ -MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE +MEM_STATIC FORCE_INLINE_ATTR void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) { ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; @@ -230,13 +278,18 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e } while (op < oend); } else { assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); - /* Separate out the first two COPY16() calls because the copy length is + /* Separate out the first COPY16() call because the copy length is * almost certain to be short, so the branches have different - * probabilities. - * On gcc-9 unrolling once is +1.6%, twice is +2%, thrice is +1.8%. - * On clang-8 unrolling once is +1.4%, twice is +3.3%, thrice is +3%. + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. */ - COPY16(op, ip); +#ifndef __aarch64__ + do { + COPY16(op, ip); + } + while (op < oend); +#else COPY16(op, ip); if (op >= oend) return; do { @@ -244,9 +297,29 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e COPY16(op, ip); } while (op < oend); +#endif + } +} + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + memcpy(dst, src, length); } + return length; } +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + /*-******************************************* * Private declarations @@ -271,6 +344,31 @@ typedef struct { U32 longLengthPos; } seqStore_t; +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; + +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->matchLength + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthID == 1) { + seqLen.litLength += 0xFFFF; + } + if (seqStore->longLengthID == 2) { + seqLen.matchLength += 0xFFFF; + } + } + return seqLen; +} + /** * Contains the compressed frame size and an upper-bound for the decompressed frame size. * Note: before using `compressedSize`, check for errors using ZSTD_isError(). @@ -297,8 +395,7 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus { # if defined(_MSC_VER) /* Visual */ unsigned long r=0; - _BitScanReverse(&r, val); - return (unsigned)r; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ diff --git a/thirdparty/zstd/compress/fse_compress.c b/thirdparty/zstd/compress/fse_compress.c index 68b47e1093..a42759814f 100644 --- a/thirdparty/zstd/compress/fse_compress.c +++ b/thirdparty/zstd/compress/fse_compress.c @@ -1,35 +1,15 @@ /* ****************************************************************** - FSE : Finite State Entropy encoder - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c + * FSE : Finite State Entropy encoder + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* ************************************************************** @@ -37,14 +17,14 @@ ****************************************************************/ #include <stdlib.h> /* malloc, free, qsort */ #include <string.h> /* memcpy, memset */ -#include "compiler.h" -#include "mem.h" /* U32, U16, etc. */ -#include "debug.h" /* assert, DEBUGLOG */ +#include "../common/compiler.h" +#include "../common/mem.h" /* U32, U16, etc. */ +#include "../common/debug.h" /* assert, DEBUGLOG */ #include "hist.h" /* HIST_count_wksp */ -#include "bitstream.h" +#include "../common/bitstream.h" #define FSE_STATIC_LINKING_ONLY -#include "fse.h" -#include "error_private.h" +#include "../common/fse.h" +#include "../common/error_private.h" /* ************************************************************** @@ -645,9 +625,6 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize, size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } -#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e -#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } - /* FSE_compress_wksp() : * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). * `wkspSize` size must be `(1<<tableLog)`. diff --git a/thirdparty/zstd/compress/hist.c b/thirdparty/zstd/compress/hist.c index 45b7babc1e..61e08c7968 100644 --- a/thirdparty/zstd/compress/hist.c +++ b/thirdparty/zstd/compress/hist.c @@ -1,42 +1,22 @@ /* ****************************************************************** - hist : Histogram functions - part of Finite State Entropy project - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* --- dependencies --- */ -#include "mem.h" /* U32, BYTE, etc. */ -#include "debug.h" /* assert, DEBUGLOG */ -#include "error_private.h" /* ERROR */ +#include "../common/mem.h" /* U32, BYTE, etc. */ +#include "../common/debug.h" /* assert, DEBUGLOG */ +#include "../common/error_private.h" /* ERROR */ #include "hist.h" diff --git a/thirdparty/zstd/compress/hist.h b/thirdparty/zstd/compress/hist.h index 8b389358dc..77e3ec4fb1 100644 --- a/thirdparty/zstd/compress/hist.h +++ b/thirdparty/zstd/compress/hist.h @@ -1,36 +1,16 @@ /* ****************************************************************** - hist : Histogram functions - part of Finite State Entropy project - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* --- dependencies --- */ diff --git a/thirdparty/zstd/compress/huf_compress.c b/thirdparty/zstd/compress/huf_compress.c index f074f1e0a9..546879868a 100644 --- a/thirdparty/zstd/compress/huf_compress.c +++ b/thirdparty/zstd/compress/huf_compress.c @@ -1,35 +1,15 @@ /* ****************************************************************** - Huffman encoder, part of New Generation Entropy library - Copyright (C) 2013-2016, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - - Public forum : https://groups.google.com/forum/#!forum/lz4c + * Huffman encoder, part of New Generation Entropy library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* ************************************************************** @@ -45,14 +25,14 @@ ****************************************************************/ #include <string.h> /* memcpy, memset */ #include <stdio.h> /* printf (debug) */ -#include "compiler.h" -#include "bitstream.h" +#include "../common/compiler.h" +#include "../common/bitstream.h" #include "hist.h" #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ -#include "fse.h" /* header compression */ +#include "../common/fse.h" /* header compression */ #define HUF_STATIC_LINKING_ONLY -#include "huf.h" -#include "error_private.h" +#include "../common/huf.h" +#include "../common/error_private.h" /* ************************************************************** @@ -60,8 +40,6 @@ ****************************************************************/ #define HUF_isError ERR_isError #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ -#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e -#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } /* ************************************************************** @@ -110,18 +88,18 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); /* Write table description header */ - { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); op += hSize; } /* Compress */ CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); - { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) ); if (cSize == 0) return 0; /* not enough space for compressed data */ op += cSize; } - return op-ostart; + return (size_t)(op-ostart); } @@ -169,7 +147,7 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize, } -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) { BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ @@ -192,9 +170,11 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void } } /* fill nbBits */ + *hasZeroWeights = 0; { U32 n; for (n=0; n<nbSymbols; n++) { const U32 w = huffWeight[n]; - CTable[n].nbBits = (BYTE)(tableLog + 1 - w); + *hasZeroWeights |= (w == 0); + CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0); } } /* fill val */ @@ -240,7 +220,7 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) /* there are several too large elements (at least >= 2) */ { int totalCost = 0; const U32 baseCost = 1 << (largestBits - maxNbBits); - U32 n = lastNonNull; + int n = (int)lastNonNull; while (huffNode[n].nbBits > maxNbBits) { totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); @@ -255,22 +235,22 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) /* repay normalized cost */ { U32 const noSymbol = 0xF0F0F0F0; U32 rankLast[HUF_TABLELOG_MAX+2]; - int pos; /* Get pos of last (smallest) symbol per rank */ memset(rankLast, 0xF0, sizeof(rankLast)); { U32 currentNbBits = maxNbBits; + int pos; for (pos=n ; pos >= 0; pos--) { if (huffNode[pos].nbBits >= currentNbBits) continue; currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ - rankLast[maxNbBits-currentNbBits] = pos; + rankLast[maxNbBits-currentNbBits] = (U32)pos; } } while (totalCost > 0) { - U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; + U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { - U32 highPos = rankLast[nBitsToDecrease]; - U32 lowPos = rankLast[nBitsToDecrease-1]; + U32 const highPos = rankLast[nBitsToDecrease]; + U32 const lowPos = rankLast[nBitsToDecrease-1]; if (highPos == noSymbol) continue; if (lowPos == noSymbol) break; { U32 const highTotal = huffNode[highPos].count; @@ -297,7 +277,8 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ while (huffNode[n].nbBits == maxNbBits) n--; huffNode[n+1].nbBits--; - rankLast[1] = n+1; + assert(n >= 0); + rankLast[1] = (U32)(n+1); totalCost++; continue; } @@ -309,29 +290,36 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) return maxNbBits; } - typedef struct { U32 base; U32 current; } rankPos; -static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue) +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; + +#define RANK_POSITION_TABLE_SIZE 32 + +typedef struct { + huffNodeTable huffNodeTbl; + rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; +} HUF_buildCTable_wksp_tables; + +static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) { - rankPos rank[32]; U32 n; - memset(rank, 0, sizeof(rank)); + memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); for (n=0; n<=maxSymbolValue; n++) { U32 r = BIT_highbit32(count[n] + 1); - rank[r].base ++; + rankPosition[r].base ++; } - for (n=30; n>0; n--) rank[n-1].base += rank[n].base; - for (n=0; n<32; n++) rank[n].current = rank[n].base; + for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base; + for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base; for (n=0; n<=maxSymbolValue; n++) { U32 const c = count[n]; U32 const r = BIT_highbit32(c+1) + 1; - U32 pos = rank[r].current++; - while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { + U32 pos = rankPosition[r].current++; + while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { huffNode[pos] = huffNode[pos-1]; pos--; } @@ -343,45 +331,48 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu /** HUF_buildCTable_wksp() : * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). */ #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) -typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; + size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) { - nodeElt* const huffNode0 = (nodeElt*)workSpace; + HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace; + nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; nodeElt* const huffNode = huffNode0+1; - U32 n, nonNullRank; + int nonNullRank; int lowS, lowN; - U16 nodeNb = STARTNODE; - U32 nodeRoot; + int nodeNb = STARTNODE; + int n, nodeRoot; /* safety checks */ if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); + if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) + return ERROR(workSpace_tooSmall); if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) + return ERROR(maxSymbolValue_tooLarge); memset(huffNode0, 0, sizeof(huffNodeTable)); /* sort, decreasing order */ - HUF_sort(huffNode, count, maxSymbolValue); + HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); /* init for parents */ - nonNullRank = maxSymbolValue; + nonNullRank = (int)maxSymbolValue; while(huffNode[nonNullRank].count == 0) nonNullRank--; lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; - huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; + huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; nodeNb++; lowS-=2; for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ /* create parents */ while (nodeNb <= nodeRoot) { - U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; - huffNode[n1].parent = huffNode[n2].parent = nodeNb; + huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; nodeNb++; } @@ -393,24 +384,25 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; /* enforce maxTableLog */ - maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); + maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); /* fill result into tree (val, nbBits) */ { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + int const alphabetSize = (int)(maxSymbolValue + 1); if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ for (n=0; n<=nonNullRank; n++) nbPerRank[huffNode[n].nbBits]++; /* determine stating value per rank */ { U16 min = 0; - for (n=maxNbBits; n>0; n--) { + for (n=(int)maxNbBits; n>0; n--) { valPerRank[n] = min; /* get starting value within each rank */ min += nbPerRank[n]; min >>= 1; } } - for (n=0; n<=maxSymbolValue; n++) + for (n=0; n<alphabetSize; n++) tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ - for (n=0; n<=maxSymbolValue; n++) + for (n=0; n<alphabetSize; n++) tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ } @@ -423,11 +415,11 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo */ size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) { - huffNodeTable nodeTable; - return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); + HUF_buildCTable_wksp_tables workspace; + return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace)); } -static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { size_t nbBits = 0; int s; @@ -437,7 +429,7 @@ static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count return nbBits >> 3; } -static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { int bad = 0; int s; for (s = 0; s <= (int)maxSymbolValue; ++s) { @@ -476,7 +468,7 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, /* init */ if (dstSize < 8) return 0; /* not enough space to compress */ - { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); + { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); if (HUF_isError(initErr)) return 0; } n = srcSize & ~3; /* join to mod 4 */ @@ -573,7 +565,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, if (srcSize < 12) return 0; /* no saving possible : too small input */ op += 6; /* jumpTable */ - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); if (cSize==0) return 0; assert(cSize <= 65535); MEM_writeLE16(ostart, (U16)cSize); @@ -581,7 +574,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, } ip += segmentSize; - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); if (cSize==0) return 0; assert(cSize <= 65535); MEM_writeLE16(ostart+2, (U16)cSize); @@ -589,7 +583,8 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, } ip += segmentSize; - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); if (cSize==0) return 0; assert(cSize <= 65535); MEM_writeLE16(ostart+4, (U16)cSize); @@ -597,12 +592,14 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, } ip += segmentSize; - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); + assert(op <= oend); + assert(ip <= iend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); if (cSize==0) return 0; op += cSize; } - return op-ostart; + return (size_t)(op-ostart); } size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) @@ -618,20 +615,21 @@ static size_t HUF_compressCTable_internal( HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) { size_t const cSize = (nbStreams==HUF_singleStream) ? - HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : - HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); if (HUF_isError(cSize)) { return cSize; } if (cSize==0) { return 0; } /* uncompressible */ op += cSize; /* check compressibility */ + assert(op >= ostart); if ((size_t)(op-ostart) >= srcSize-1) { return 0; } - return op-ostart; + return (size_t)(op-ostart); } typedef struct { unsigned count[HUF_SYMBOLVALUE_MAX + 1]; HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; - huffNodeTable nodeTable; + HUF_buildCTable_wksp_tables buildCTable_wksp; } HUF_compress_tables_t; /* HUF_compress_internal() : @@ -650,6 +648,8 @@ HUF_compress_internal (void* dst, size_t dstSize, BYTE* const oend = ostart + dstSize; BYTE* op = ostart; + HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); + /* checks & inits */ if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); @@ -691,7 +691,7 @@ HUF_compress_internal (void* dst, size_t dstSize, huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, maxSymbolValue, huffLog, - table->nodeTable, sizeof(table->nodeTable)); + &table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); CHECK_F(maxBits); huffLog = (U32)maxBits; /* Zero unused symbols in CTable, so we can check it for validity */ diff --git a/thirdparty/zstd/compress/zstd_compress.c b/thirdparty/zstd/compress/zstd_compress.c index 35346b92cb..3f963b1cff 100644 --- a/thirdparty/zstd/compress/zstd_compress.c +++ b/thirdparty/zstd/compress/zstd_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -13,13 +13,13 @@ ***************************************/ #include <limits.h> /* INT_MAX */ #include <string.h> /* memset */ -#include "cpu.h" -#include "mem.h" +#include "../common/cpu.h" +#include "../common/mem.h" #include "hist.h" /* HIST_countFast_wksp */ #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ -#include "fse.h" +#include "../common/fse.h" #define HUF_STATIC_LINKING_ONLY -#include "huf.h" +#include "../common/huf.h" #include "zstd_compress_internal.h" #include "zstd_compress_sequences.h" #include "zstd_compress_literals.h" @@ -28,11 +28,19 @@ #include "zstd_lazy.h" #include "zstd_opt.h" #include "zstd_ldm.h" +#include "zstd_compress_superblock.h" /*-************************************* * Helper functions ***************************************/ +/* ZSTD_compressBound() + * Note that the result from this function is only compatible with the "normal" + * full-block strategy. + * When there are a lot of small blocks due to frequent flush in streaming mode + * the overhead of headers can make the compressed data to be larger than the + * return value of ZSTD_compressBound(). + */ size_t ZSTD_compressBound(size_t srcSize) { return ZSTD_COMPRESSBOUND(srcSize); } @@ -82,7 +90,7 @@ ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) } } -ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) +ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) { ZSTD_cwksp ws; ZSTD_CCtx* cctx; @@ -91,9 +99,8 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) ZSTD_cwksp_init(&ws, workspace, workspaceSize); cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); - if (cctx == NULL) { - return NULL; - } + if (cctx == NULL) return NULL; + memset(cctx, 0, sizeof(ZSTD_CCtx)); ZSTD_cwksp_move(&cctx->workspace, &ws); cctx->staticSize = workspaceSize; @@ -102,8 +109,7 @@ ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize) if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); - cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object( - &cctx->workspace, HUF_WORKSPACE_SIZE); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, HUF_WORKSPACE_SIZE); cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); return cctx; } @@ -227,7 +233,7 @@ size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) } size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { - RETURN_ERROR_IF(!cctxParams, GENERIC); + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); memset(cctxParams, 0, sizeof(*cctxParams)); cctxParams->compressionLevel = compressionLevel; cctxParams->fParams.contentSizeFlag = 1; @@ -236,8 +242,8 @@ size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) { - RETURN_ERROR_IF(!cctxParams, GENERIC); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); memset(cctxParams, 0, sizeof(*cctxParams)); assert(!ZSTD_checkCParams(params.cParams)); cctxParams->cParams = params.cParams; @@ -249,12 +255,12 @@ size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_paramete /* ZSTD_assignParamsToCCtxParams() : * params is presumed valid at this stage */ static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( - const ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) + const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) { ZSTD_CCtx_params ret = *cctxParams; - assert(!ZSTD_checkCParams(params.cParams)); - ret.cParams = params.cParams; - ret.fParams = params.fParams; + assert(!ZSTD_checkCParams(params->cParams)); + ret.cParams = params->cParams; + ret.fParams = params->fParams; ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ return ret; } @@ -339,8 +345,13 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) return bounds; case ZSTD_c_overlapLog: +#ifdef ZSTD_MULTITHREAD bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; bounds.upperBound = ZSTD_OVERLAPLOG_MAX; +#else + bounds.lowerBound = 0; + bounds.upperBound = 0; +#endif return bounds; case ZSTD_c_enableLongDistanceMatching: @@ -408,9 +419,8 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) return bounds; default: - { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; - return boundError; - } + bounds.error = ERROR(parameter_unsupported); + return bounds; } } @@ -428,7 +438,7 @@ static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) #define BOUNDCHECK(cParam, val) { \ RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ - parameter_outOfBound); \ + parameter_outOfBound, "Param out of bounds"); \ } @@ -476,7 +486,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) if (ZSTD_isUpdateAuthorized(param)) { cctx->cParamsChanged = 1; } else { - RETURN_ERROR(stage_wrong); + RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); } } switch(param) @@ -513,7 +523,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_srcSizeHint: break; - default: RETURN_ERROR(parameter_unsupported); + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); } @@ -530,7 +540,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, return (size_t)CCtxParams->format; case ZSTD_c_compressionLevel : { - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); if (value) { /* 0 : does not change current level */ CCtxParams->compressionLevel = value; } @@ -618,7 +628,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); return 0; #else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); CCtxParams->nbWorkers = value; return CCtxParams->nbWorkers; #endif @@ -631,7 +641,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, /* Adjust to the minimum non-default value. */ if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) value = ZSTDMT_JOBSIZE_MIN; - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); assert(value >= 0); CCtxParams->jobSize = value; return CCtxParams->jobSize; @@ -642,7 +652,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); return 0; #else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); CCtxParams->overlapLog = value; return CCtxParams->overlapLog; #endif @@ -652,7 +662,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); return 0; #else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); CCtxParams->rsyncable = value; return CCtxParams->rsyncable; #endif @@ -681,7 +691,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, case ZSTD_c_ldmHashRateLog : RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, - parameter_outOfBound); + parameter_outOfBound, "Param out of bounds!"); CCtxParams->ldmParams.hashRateLog = value; return CCtxParams->ldmParams.hashRateLog; @@ -821,8 +831,11 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) { DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); - RETURN_ERROR_IF(cctx->cdict, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "The context is in the wrong stage!"); + RETURN_ERROR_IF(cctx->cdict, stage_wrong, + "Can't override parameters with cdict attached (some must " + "be inherited from the cdict)."); cctx->requestedParams = *params; return 0; @@ -831,7 +844,8 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams( ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't set pledgedSrcSize when not in init stage."); cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; return 0; } @@ -845,7 +859,7 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) { ZSTD_localDict* const dl = &cctx->localDict; ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, 0, dl->dictSize); + &cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize); if (dl->dict == NULL) { /* No local dictionary. */ assert(dl->dictBuffer == NULL); @@ -869,7 +883,7 @@ static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) dl->dictContentType, cParams, cctx->customMem); - RETURN_ERROR_IF(!dl->cdict, memory_allocation); + RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); cctx->cdict = dl->cdict; return 0; } @@ -878,7 +892,8 @@ size_t ZSTD_CCtx_loadDictionary_advanced( ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't load a dictionary when ctx is not in init stage."); RETURN_ERROR_IF(cctx->staticSize, memory_allocation, "no malloc for static CCtx"); DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); @@ -889,7 +904,7 @@ size_t ZSTD_CCtx_loadDictionary_advanced( cctx->localDict.dict = dict; } else { void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); - RETURN_ERROR_IF(!dictBuffer, memory_allocation); + RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); memcpy(dictBuffer, dict, dictSize); cctx->localDict.dictBuffer = dictBuffer; cctx->localDict.dict = dictBuffer; @@ -915,7 +930,8 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a dict when ctx not in init stage."); /* Free the existing local cdict (if any) to save memory. */ ZSTD_clearAllDicts(cctx); cctx->cdict = cdict; @@ -930,11 +946,14 @@ size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSiz size_t ZSTD_CCtx_refPrefix_advanced( ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a prefix when ctx not in init stage."); ZSTD_clearAllDicts(cctx); - cctx->prefixDict.dict = prefix; - cctx->prefixDict.dictSize = prefixSize; - cctx->prefixDict.dictContentType = dictContentType; + if (prefix != NULL && prefixSize > 0) { + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + } return 0; } @@ -949,7 +968,8 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) } if ( (reset == ZSTD_reset_parameters) || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't reset parameters only when not in init stage."); ZSTD_clearAllDicts(cctx); return ZSTD_CCtxParams_reset(&cctx->requestedParams); } @@ -996,7 +1016,7 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams) /** ZSTD_cycleLog() : * condition for correct operation : hashLog > 1 */ -static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) { U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); return hashLog - btScale; @@ -1006,7 +1026,7 @@ static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) * optimize `cPar` for a specified input (`srcSize` and `dictSize`). * mostly downsize to reduce memory consumption and initialization latency. * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. - * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention. + * note : `srcSize==0` means 0! * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ static ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, @@ -1017,10 +1037,8 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); assert(ZSTD_checkCParams(cPar)==0); - if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ ) - srcSize = minSrcSize; /* presumed small when there is a dictionary */ - else if (srcSize == 0) - srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; /* resize windowLog if input is small enough, to use less memory */ if ( (srcSize < maxWindowResize) @@ -1049,9 +1067,13 @@ ZSTD_adjustCParams(ZSTD_compressionParameters cPar, size_t dictSize) { cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); } +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); + ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) { @@ -1059,7 +1081,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { srcSizeHint = CCtxParams->srcSizeHint; } - cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize); + cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize); if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; @@ -1069,6 +1091,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; assert(!ZSTD_checkCParams(cParams)); + /* srcSizeHint == 0 means 0 */ return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); } @@ -1104,7 +1127,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) { RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, 0, 0); + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); U32 const divider = (cParams.minMatch==3) ? 3 : 4; size_t const maxNbSeq = blockSize / divider; @@ -1118,13 +1141,26 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq)); - size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace + - matchStateSize + ldmSpace + ldmSeqSpace; + /* estimateCCtxSize is for one-shot compression. So no buffers should + * be needed. However, we still allocate two 0-sized buffers, which can + * take space under ASAN. */ + size_t const bufferSpace = ZSTD_cwksp_alloc_size(0) + + ZSTD_cwksp_alloc_size(0); + size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)); - DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)cctxSpace); + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); - return cctxSpace + neededSpace; + return neededSpace; } } @@ -1136,7 +1172,7 @@ size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); return ZSTD_estimateCCtxSize_usingCParams(cParams); } @@ -1155,7 +1191,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) { RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, 0, 0); + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; @@ -1175,7 +1211,7 @@ size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); return ZSTD_estimateCStreamSize_usingCParams(cParams); } @@ -1243,7 +1279,7 @@ static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, assert(cParams1.strategy == cParams2.strategy); } -static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) { int i; for (i = 0; i < ZSTD_REP_NUM; ++i) @@ -1320,10 +1356,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); if (forceResetIndex == ZSTDirp_reset) { - memset(&ms->window, 0, sizeof(ms->window)); - ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */ - ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */ + ZSTD_window_init(&ms->window); ZSTD_cwksp_mark_tables_dirty(ws); } @@ -1416,13 +1449,13 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); - ZSTD_indexResetPolicy_e needsIndexReset = ZSTDirp_continue; + ZSTD_indexResetPolicy_e needsIndexReset = zc->initialized ? ZSTDirp_continue : ZSTDirp_reset; if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { needsIndexReset = ZSTDirp_reset; } - ZSTD_cwksp_bump_oversized_duration(ws, 0); + if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); /* Check if workspace is large enough, alloc a new one if needed */ { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; @@ -1459,7 +1492,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, needsIndexReset = ZSTDirp_reset; ZSTD_cwksp_free(ws, zc->customMem); - FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem)); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); DEBUGLOG(5, "reserving object space"); /* Statically sized space. @@ -1530,7 +1563,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ¶ms.cParams, crp, needsIndexReset, - ZSTD_resetTarget_CCtx)); + ZSTD_resetTarget_CCtx), ""); /* ldm hash table */ if (params.ldmParams.enableLdm) { @@ -1541,11 +1574,13 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); zc->maxNbLdmSequences = maxNbLdmSeq; - memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); + ZSTD_window_init(&zc->ldmState.window); ZSTD_window_clear(&zc->ldmState.window); + zc->ldmState.loadedDictEnd = 0; } DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + zc->initialized = 1; return 0; } @@ -1603,10 +1638,11 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, assert(windowLog != 0); /* Resize working context table params for input only, since the dict * has its own tables. */ + /* pledgeSrcSize == 0 means 0! */ params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); params.cParams.windowLog = windowLog; FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff)); + ZSTDcrp_makeClean, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); } @@ -1655,7 +1691,7 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, params.cParams = *cdict_cParams; params.cParams.windowLog = windowLog; FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_leaveDirty, zbuff)); + ZSTDcrp_leaveDirty, zbuff), ""); assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); @@ -1736,7 +1772,8 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, ZSTD_buffered_policy_e zbuff) { DEBUGLOG(5, "ZSTD_copyCCtx_internal"); - RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong); + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, + "Can't copy a ctx that's not in init stage."); memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); { ZSTD_CCtx_params params = dstCCtx->requestedParams; @@ -1889,16 +1926,6 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par /* See doc/zstd_compression_format.md for detailed format description */ -static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) -{ - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); - RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, - dstSize_tooSmall); - MEM_writeLE24(dst, cBlockHeader24); - memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); - return ZSTD_blockHeaderSize + srcSize; -} - void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) { const seqDef* const sequences = seqStorePtr->sequencesStart; @@ -1921,19 +1948,14 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } -static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +/* ZSTD_useTargetCBlockSize(): + * Returns if target compressed block size param is being used. + * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) { - switch (cctxParams->literalCompressionMode) { - case ZSTD_lcm_huffman: - return 0; - case ZSTD_lcm_uncompressed: - return 1; - default: - assert(0 /* impossible: pre-validated */); - /* fall-through */ - case ZSTD_lcm_auto: - return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); - } + DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); + return (cctxParams->targetCBlockSize != 0); } /* ZSTD_compressSequences_internal(): @@ -1979,14 +2001,14 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, literals, litSize, entropyWorkspace, entropyWkspSize, bmi2); - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); assert(cSize <= dstCapacity); op += cSize; } /* Sequences Header */ RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall); + dstSize_tooSmall, "Can't fit seq hdr in output buf!"); if (nbSeq < 128) { *op++ = (BYTE)nbSeq; } else if (nbSeq < LONGNBSEQ) { @@ -2031,7 +2053,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); if (LLtype == set_compressed) lastNCount = op; op += countSize; @@ -2059,7 +2081,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); if (Offtype == set_compressed) lastNCount = op; op += countSize; @@ -2085,7 +2107,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); if (MLtype == set_compressed) lastNCount = op; op += countSize; @@ -2101,7 +2123,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, CTable_LitLength, llCodeTable, sequences, nbSeq, longOffsets, bmi2); - FORWARD_IF_ERROR(bitstreamSize); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); op += bitstreamSize; assert(op <= oend); /* zstd versions <= 1.3.4 mistakenly report corruption when @@ -2145,7 +2167,7 @@ ZSTD_compressSequences(seqStore_t* seqStorePtr, */ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) return 0; /* block not compressed */ - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSequences_internal failed"); /* Check compressibility */ { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); @@ -2271,7 +2293,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) /* Updates ldmSeqStore.size */ FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, &zc->appliedParams.ldmParams, - src, srcSize)); + src, srcSize), ""); /* Updates ldmSeqStore.pos */ lastLLSize = ZSTD_ldm_blockCompress(&ldmSeqStore, @@ -2347,7 +2369,7 @@ size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, void* dst = ZSTD_malloc(dstCapacity, ZSTD_defaultCMem); SeqCollector seqCollector; - RETURN_ERROR_IF(dst == NULL, memory_allocation); + RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); seqCollector.collectSequences = 1; seqCollector.seqStart = outSeqs; @@ -2370,6 +2392,25 @@ static int ZSTD_isRLE(const BYTE *ip, size_t length) { return 1; } +/* Returns true if the given block may be RLE. + * This is just a heuristic based on the compressibility. + * It may return both false positives and false negatives. + */ +static int ZSTD_maybeRLE(seqStore_t const* seqStore) +{ + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); + + return nbSeqs < 4 && nbLits < 10; +} + +static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) +{ + ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; +} + static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 frame) @@ -2387,7 +2428,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, (unsigned)zc->blockState.matchState.nextToUpdate); { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - FORWARD_IF_ERROR(bss); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } } @@ -2420,10 +2461,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, out: if (!ZSTD_isError(cSize) && cSize > 1) { - /* confirm repcodes and entropy tables when emitting a compressed block */ - ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; - zc->blockState.prevCBlock = zc->blockState.nextCBlock; - zc->blockState.nextCBlock = tmp; + ZSTD_confirmRepcodesAndEntropyTables(zc); } /* We check that dictionaries have offset codes available for the first * block. After the first block, the offcode table might not have large @@ -2435,6 +2473,80 @@ out: return cSize; } +static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const size_t bss, U32 lastBlock) +{ + DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); + if (bss == ZSTDbss_compress) { + if (/* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) + { + return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); + } + /* Attempt superblock compression. + * + * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the + * standard ZSTD_compressBound(). This is a problem, because even if we have + * space now, taking an extra byte now could cause us to run out of space later + * and violate ZSTD_compressBound(). + * + * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. + * + * In order to respect ZSTD_compressBound() we must attempt to emit a raw + * uncompressed block in these cases: + * * cSize == 0: Return code for an uncompressed block. + * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). + * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of + * output space. + * * cSize >= blockBound(srcSize): We have expanded the block too much so + * emit an uncompressed block. + */ + { + size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); + if (cSize != ERROR(dstSize_tooSmall)) { + size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); + if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + return cSize; + } + } + } + } + + DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); + /* Superblock compression failed, attempt to emit a single no compress block. + * The decoder will be able to stream this block since it is uncompressed. + */ + return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); +} + +static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) +{ + size_t cSize = 0; + const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + + cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_cwksp* ws, @@ -2478,6 +2590,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); @@ -2500,21 +2613,31 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; - { size_t cSize = ZSTD_compressBlock_internal(cctx, - op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, - ip, blockSize, 1 /* frame */); - FORWARD_IF_ERROR(cSize); - if (cSize == 0) { /* block is not compressible */ - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cSize); + { size_t cSize; + if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); + assert(cSize > 0); + assert(cSize <= blockSize + ZSTD_blockHeaderSize); } else { - const U32 cBlockHeader = cSize == 1 ? - lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : - lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader); - cSize += ZSTD_blockHeaderSize; + cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize; + } } + ip += blockSize; assert(remaining >= blockSize); remaining -= blockSize; @@ -2546,7 +2669,8 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, size_t pos=0; assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); - RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, + "dst buf is too small to fit worst-case frame header size."); DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); @@ -2582,7 +2706,8 @@ static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, */ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) { - RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, + "dst buf is too small to write frame trailer empty block."); { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ MEM_writeLE24(dst, cBlockHeader24); return ZSTD_blockHeaderSize; @@ -2591,9 +2716,11 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) { - RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong); + RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, + "wrong cctx stage"); RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, - parameter_unsupported); + parameter_unsupported, + "incompatible with ldm"); cctx->externSeqStore.seq = seq; cctx->externSeqStore.size = nbSeq; cctx->externSeqStore.capacity = nbSeq; @@ -2618,7 +2745,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, if (frame && (cctx->stage==ZSTDcs_init)) { fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, cctx->pledgedSrcSizePlusOne-1, cctx->dictID); - FORWARD_IF_ERROR(fhSize); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); assert(fhSize <= dstCapacity); dstCapacity -= fhSize; dst = (char*)dst + fhSize; @@ -2645,7 +2772,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, { size_t const cSize = frame ? ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); cctx->consumedSrcSize += srcSize; cctx->producedCSize += (cSize + fhSize); assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); @@ -2682,7 +2809,7 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const { DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); - RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); } + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); } @@ -2691,6 +2818,7 @@ size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const * @return : 0, or an error code */ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ldmState_t* ls, ZSTD_cwksp* ws, ZSTD_CCtx_params const* params, const void* src, size_t srcSize, @@ -2702,6 +2830,11 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_window_update(&ms->window, src, srcSize); ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + if (params->ldmParams.enableLdm && ls != NULL) { + ZSTD_window_update(&ls->window, src, srcSize); + ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); + } + /* Assert that we the ms params match the params we're being given */ ZSTD_assertEqualCParams(params->cParams, ms->cParams); @@ -2714,6 +2847,9 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); + if (params->ldmParams.enableLdm && ls != NULL) + ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); + switch(params->cParams.strategy) { case ZSTD_fast: @@ -2756,102 +2892,123 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, NOTE: This behavior is not standard and could be improved in the future. */ static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { U32 s; - RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted); + RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted, "dict fse tables don't have all symbols"); for (s = 0; s <= maxSymbolValue; ++s) { - RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted); + RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted, "dict fse tables don't have all symbols"); } return 0; } - -/* Dictionary format : - * See : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format - */ -/*! ZSTD_loadZstdDictionary() : - * @return : dictID, or an error code - * assumptions : magic number supposed already checked - * dictSize supposed >= 8 - */ -static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* dict, size_t dictSize, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + short* offcodeNCount, unsigned* offcodeMaxValue, + const void* const dict, size_t dictSize) { - const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ const BYTE* const dictEnd = dictPtr + dictSize; - short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff; - size_t dictID; + dictPtr += 8; + bs->entropy.huf.repeatMode = HUF_repeat_check; - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); - assert(dictSize >= 8); - assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + { unsigned maxSymbolValue = 255; + unsigned hasZeroWeights = 1; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); - dictPtr += 4; /* skip magic number */ - dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr); - dictPtr += 4; + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights) + bs->entropy.huf.repeatMode = HUF_repeat_valid; - { unsigned maxSymbolValue = 255; - size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted); + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); dictPtr += hufHeaderSize; } { unsigned offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ /* fill all offset symbols to avoid garbage at end of table */ RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( bs->entropy.fse.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted); + dictionary_corrupted, ""); dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog; size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); /* Every match length code must have non-zero probability */ - FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); + FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), ""); RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( bs->entropy.fse.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted); + dictionary_corrupted, ""); dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog; size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); /* Every literal length code must have non-zero probability */ - FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); + FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), ""); RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( bs->entropy.fse.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted); + dictionary_corrupted, ""); dictPtr += litlengthHeaderSize; } - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); bs->rep[0] = MEM_readLE32(dictPtr+0); bs->rep[1] = MEM_readLE32(dictPtr+4); bs->rep[2] = MEM_readLE32(dictPtr+8); dictPtr += 12; + return dictPtr - (const BYTE*)dict; +} + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed >= 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + size_t dictID; + size_t eSize; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); + assert(dictSize >= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); + eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize); + FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); + dictPtr += eSize; + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); U32 offcodeMax = MaxOff; if (dictContentSize <= ((U32)-1) - 128 KB) { @@ -2859,20 +3016,19 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ } /* All offset values <= dictContentSize + 128 KB must be representable */ - FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); + FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), ""); /* All repCodes must be <= dictContentSize and != 0*/ { U32 u; for (u=0; u<3; u++) { - RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted); - RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted); + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); } } - bs->entropy.huf.repeatMode = HUF_repeat_valid; bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( - ms, ws, params, dictPtr, dictContentSize, dtlm)); + ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); return dictID; } } @@ -2882,6 +3038,7 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, + ldmState_t* ls, ZSTD_cwksp* ws, const ZSTD_CCtx_params* params, const void* dict, size_t dictSize, @@ -2891,7 +3048,7 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, { DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); if ((dict==NULL) || (dictSize<8)) { - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); return 0; } @@ -2899,15 +3056,15 @@ ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, /* dict restricted modes */ if (dictContentType == ZSTD_dct_rawContent) - return ZSTD_loadDictionaryContent(ms, ws, params, dict, dictSize, dtlm); + return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { if (dictContentType == ZSTD_dct_auto) { DEBUGLOG(4, "raw content dictionary detected"); return ZSTD_loadDictionaryContent( - ms, ws, params, dict, dictSize, dtlm); + ms, ls, ws, params, dict, dictSize, dtlm); } - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); assert(0); /* impossible */ } @@ -2944,17 +3101,18 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, } FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff) ); + ZSTDcrp_makeClean, zbuff) , ""); { size_t const dictID = cdict ? ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->workspace, params, cdict->dictContent, cdict->dictContentSize, - dictContentType, dtlm, cctx->entropyWorkspace) + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, + cdict->dictContentSize, dictContentType, dtlm, + cctx->entropyWorkspace) : ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->workspace, params, dict, dictSize, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); - FORWARD_IF_ERROR(dictID); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; } @@ -2971,7 +3129,7 @@ size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, { DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); /* compression parameters verification and optimization */ - FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) ); + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); return ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType, dtlm, cdict, @@ -2986,7 +3144,7 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, ZSTD_parameters params, unsigned long long pledgedSrcSize) { ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); return ZSTD_compressBegin_advanced_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL /*cdict*/, @@ -2995,9 +3153,9 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); + ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); @@ -3024,7 +3182,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) /* special case : empty frame */ if (cctx->stage == ZSTDcs_init) { fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); - FORWARD_IF_ERROR(fhSize); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); dstCapacity -= fhSize; op += fhSize; cctx->stage = ZSTDcs_ongoing; @@ -3033,7 +3191,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) if (cctx->stage != ZSTDcs_ending) { /* write one last empty block, make it the "last" block */ U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); MEM_writeLE32(op, cBlockHeader24); op += ZSTD_blockHeaderSize; dstCapacity -= ZSTD_blockHeaderSize; @@ -3041,7 +3199,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) if (cctx->appliedParams.fParams.checksumFlag) { U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); MEM_writeLE32(op, checksum); op += 4; @@ -3059,9 +3217,9 @@ size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 1 /* last chunk */); - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); - FORWARD_IF_ERROR(endResult); + FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); @@ -3081,7 +3239,7 @@ static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict,size_t dictSize, - ZSTD_parameters params) + const ZSTD_parameters* params) { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); @@ -3100,12 +3258,12 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, ZSTD_parameters params) { DEBUGLOG(4, "ZSTD_compress_advanced"); - FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams)); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); return ZSTD_compress_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, - params); + ¶ms); } /* Internal */ @@ -3119,7 +3277,7 @@ size_t ZSTD_compress_advanced_internal( DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - params, srcSize, ZSTDb_not_buffered) ); + params, srcSize, ZSTDb_not_buffered) , ""); return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } @@ -3129,8 +3287,9 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0); - ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0); + ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); + DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); assert(params.fParams.contentSizeFlag == 1); return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); } @@ -3176,7 +3335,7 @@ size_t ZSTD_estimateCDictSize_advanced( size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); } @@ -3203,7 +3362,7 @@ static size_t ZSTD_initCDict_internal( cdict->dictContent = dictBuffer; } else { void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); - RETURN_ERROR_IF(!internalBuffer, memory_allocation); + RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); cdict->dictContent = internalBuffer; memcpy(internalBuffer, dictBuffer, dictSize); } @@ -3220,7 +3379,7 @@ static size_t ZSTD_initCDict_internal( &cParams, ZSTDcrp_makeClean, ZSTDirp_reset, - ZSTD_resetTarget_CDict)); + ZSTD_resetTarget_CDict), ""); /* (Maybe) load the dictionary * Skips loading the dictionary if it is < 8 bytes. */ @@ -3230,10 +3389,10 @@ static size_t ZSTD_initCDict_internal( params.fParams.contentSizeFlag = 1; params.cParams = cParams; { size_t const dictID = ZSTD_compress_insertDictionary( - &cdict->cBlockState, &cdict->matchState, &cdict->workspace, + &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, ¶ms, cdict->dictContent, cdict->dictContentSize, dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); - FORWARD_IF_ERROR(dictID); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); assert(dictID <= (size_t)(U32)-1); cdict->dictID = (U32)dictID; } @@ -3287,7 +3446,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); @@ -3298,7 +3457,7 @@ ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionL ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) { - ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); return ZSTD_createCDict_advanced(dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, cParams, ZSTD_defaultCMem); @@ -3381,7 +3540,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced( ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) { DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); - RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); { ZSTD_CCtx_params params = cctx->requestedParams; params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER @@ -3425,7 +3584,7 @@ size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, const void* src, size_t srcSize, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) { - FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); } @@ -3497,7 +3656,7 @@ static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, dict, dictSize, dictContentType, ZSTD_dtlm_fast, cdict, ¶ms, pledgedSrcSize, - ZSTDb_buffered) ); + ZSTDb_buffered) , ""); cctx->inToCompress = 0; cctx->inBuffPos = 0; @@ -3519,8 +3678,8 @@ size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) */ U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); return 0; } @@ -3534,16 +3693,16 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_initCStream_internal"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); zcs->requestedParams = *params; assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if (dict) { - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); } else { /* Dictionary is cleared if !cdict */ - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); } return 0; } @@ -3556,10 +3715,10 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize) { DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); zcs->requestedParams.fParams = fParams; - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); return 0; } @@ -3567,8 +3726,8 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) { DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); return 0; } @@ -3587,20 +3746,20 @@ size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, */ U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; DEBUGLOG(4, "ZSTD_initCStream_advanced"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); - zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, params); - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); return 0; } size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) { DEBUGLOG(4, "ZSTD_initCStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); return 0; } @@ -3612,19 +3771,19 @@ size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigne */ U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; DEBUGLOG(4, "ZSTD_initCStream_srcSize"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); return 0; } size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) { DEBUGLOG(4, "ZSTD_initCStream"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); return 0; } @@ -3637,14 +3796,6 @@ static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) return hintInSize; } -static size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t const length = MIN(dstCapacity, srcSize); - if (length) memcpy(dst, src, length); - return length; -} - /** ZSTD_compressStream_generic(): * internal function for all *compressStream*() variants * non-static, because can be called from zstdmt_compress.c @@ -3655,11 +3806,11 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, ZSTD_EndDirective const flushMode) { const char* const istart = (const char*)input->src; - const char* const iend = istart + input->size; - const char* ip = istart + input->pos; + const char* const iend = input->size != 0 ? istart + input->size : istart; + const char* ip = input->pos != 0 ? istart + input->pos : istart; char* const ostart = (char*)output->dst; - char* const oend = ostart + output->size; - char* op = ostart + output->pos; + char* const oend = output->size != 0 ? ostart + output->size : ostart; + char* op = output->pos != 0 ? ostart + output->pos : ostart; U32 someMoreWork = 1; /* check expectations */ @@ -3685,7 +3836,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, size_t const cSize = ZSTD_compressEnd(zcs, op, oend-op, ip, iend-ip); DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); ip = iend; op += cSize; zcs->frameEnded = 1; @@ -3698,7 +3849,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip); zcs->inBuffPos += loaded; - ip += loaded; + if (loaded != 0) + ip += loaded; if ( (flushMode == ZSTD_e_continue) && (zcs->inBuffPos < zcs->inBuffTarget) ) { /* not enough input to fill full block : stop here */ @@ -3726,7 +3878,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, zcs->inBuff + zcs->inToCompress, iSize) : ZSTD_compressContinue(zcs, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize); - FORWARD_IF_ERROR(cSize); + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); zcs->frameEnded = lastBlock; /* prepare next block */ zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; @@ -3758,7 +3910,8 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); - op += flushed; + if (flushed) + op += flushed; zcs->outBuffFlushedSize += flushed; if (toFlush!=flushed) { /* flush not fully completed, presumably because dst is too small */ @@ -3802,7 +3955,7 @@ static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) ); + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); return ZSTD_nextInputSizeHint_MTorST(zcs); } @@ -3814,15 +3967,15 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, { DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); /* check conditions */ - RETURN_ERROR_IF(output->pos > output->size, GENERIC); - RETURN_ERROR_IF(input->pos > input->size, GENERIC); + RETURN_ERROR_IF(output->pos > output->size, GENERIC, "invalid buffer"); + RETURN_ERROR_IF(input->pos > input->size, GENERIC, "invalid buffer"); assert(cctx!=NULL); /* transparent initialization stage */ if (cctx->streamStage == zcss_init) { ZSTD_CCtx_params params = cctx->requestedParams; ZSTD_prefixDict const prefixDict = cctx->prefixDict; - FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */ + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); @@ -3841,14 +3994,14 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", params.nbWorkers); cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem); - RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation); + RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); } /* mt compression */ DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( cctx->mtctx, - prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, - cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); cctx->streamStage = zcss_load; cctx->appliedParams.nbWorkers = params.nbWorkers; } else @@ -3856,7 +4009,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, cctx->cdict, - params, cctx->pledgedSrcSizePlusOne-1) ); + params, cctx->pledgedSrcSizePlusOne-1) , ""); assert(cctx->streamStage == zcss_load); assert(cctx->appliedParams.nbWorkers == 0); } } @@ -3878,7 +4031,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); } - FORWARD_IF_ERROR(flushMin); + FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); /* Either we don't require maximum forward progress, we've finished the @@ -3888,7 +4041,7 @@ size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, return flushMin; } #endif - FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) ); + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); DEBUGLOG(5, "completed ZSTD_compressStream2"); return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ } @@ -3912,6 +4065,7 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { + DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); { size_t oPos = 0; size_t iPos = 0; @@ -3919,10 +4073,10 @@ size_t ZSTD_compress2(ZSTD_CCtx* cctx, dst, dstCapacity, &oPos, src, srcSize, &iPos, ZSTD_e_end); - FORWARD_IF_ERROR(result); + FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); if (result != 0) { /* compression not completed, due to lack of output space */ assert(oPos == dstCapacity); - RETURN_ERROR(dstSize_tooSmall); + RETURN_ERROR(dstSize_tooSmall, ""); } assert(iPos == srcSize); /* all input is expected consumed */ return oPos; @@ -3944,7 +4098,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) { ZSTD_inBuffer input = { NULL, 0, 0 }; size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); - FORWARD_IF_ERROR( remainingToFlush ); + FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ /* single thread mode : attempt to calculate remaining to flush more precisely */ { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; @@ -4069,35 +4223,56 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV }, }; -/*! ZSTD_getCParams() : +/*! ZSTD_getCParams_internal() : * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. - * Size values are optional, provide 0 if not known or unused */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) + * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. + * Use dictSize == 0 for unknown or unused. */ +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - size_t const addedSize = srcSizeHint ? 0 : 500; - U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */ + int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; + size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; + U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); int row = compressionLevel; - DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); + DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ - return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */ + /* refine parameters based on srcSize & dictSize */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); } } +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); +} + /*! ZSTD_getParams() : * same idea as ZSTD_getCParams() * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). * Fields of `ZSTD_frameParameters` are set to default values */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); memset(¶ms, 0, sizeof(params)); params.cParams = cParams; params.fParams.contentSizeFlag = 1; return params; } + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize); +} diff --git a/thirdparty/zstd/compress/zstd_compress_internal.h b/thirdparty/zstd/compress/zstd_compress_internal.h index 14036f873f..db73f6ce21 100644 --- a/thirdparty/zstd/compress/zstd_compress_internal.h +++ b/thirdparty/zstd/compress/zstd_compress_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -18,7 +18,7 @@ /*-************************************* * Dependencies ***************************************/ -#include "zstd_internal.h" +#include "../common/zstd_internal.h" #include "zstd_cwksp.h" #ifdef ZSTD_MULTITHREAD # include "zstdmt_compress.h" @@ -166,6 +166,7 @@ typedef struct { typedef struct { ZSTD_window_t window; /* State for the window round buffer management */ ldmEntry_t* hashTable; + U32 loadedDictEnd; BYTE* bucketOffsets; /* Next position in bucket to insert entry */ U64 hashPower; /* Used to compute the rolling hash. * Depends on ldmParams.minMatchLength */ @@ -249,6 +250,7 @@ struct ZSTD_CCtx_s { size_t staticSize; SeqCollector seqCollector; int isFirstBlock; + int initialized; seqStore_t seqStore; /* sequences storage ptrs */ ldmState_t ldmState; /* long distance matching state */ @@ -324,6 +326,31 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; } +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + /* ZSTD_cParam_withinBounds: * @return 1 if value is within cParam bounds, * 0 otherwise */ @@ -336,6 +363,30 @@ MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) return 1; } +/* ZSTD_noCompressBlock() : + * Writes uncompressed block to dst buffer from given src. + * Returns the size of the block */ +MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +{ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall, "dst buf too small for uncompressed block"); + MEM_writeLE24(dst, cBlockHeader24); + memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + return ZSTD_blockHeaderSize + srcSize; +} + +MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) +{ + BYTE* const op = (BYTE*)dst; + U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); + MEM_writeLE24(op, cBlockHeader); + op[3] = src; + return 4; +} + + /* ZSTD_minGain() : * minimum compression required * to generate a compress block or a compressed literals section. @@ -348,6 +399,21 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) return (srcSize >> minlog) + 2; } +MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +{ + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + /* fall-through */ + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} + /*! ZSTD_safecopyLiterals() : * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single @@ -433,8 +499,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val) if (MEM_64bits()) { # if defined(_MSC_VER) && defined(_WIN64) unsigned long r = 0; - _BitScanForward64( &r, (U64)val ); - return (unsigned)(r>>3); + return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; # elif defined(__GNUC__) && (__GNUC__ >= 4) return (__builtin_ctzll((U64)val) >> 3); # else @@ -451,8 +516,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val) } else { /* 32 bits */ # if defined(_MSC_VER) unsigned long r=0; - _BitScanForward( &r, (U32)val ); - return (unsigned)(r>>3); + return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; # elif defined(__GNUC__) && (__GNUC__ >= 3) return (__builtin_ctz((U32)val) >> 3); # else @@ -467,8 +531,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val) if (MEM_64bits()) { # if defined(_MSC_VER) && defined(_WIN64) unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); + return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0; # elif defined(__GNUC__) && (__GNUC__ >= 4) return (__builtin_clzll(val) >> 3); # else @@ -482,8 +545,7 @@ static unsigned ZSTD_NbCommonBytes (size_t val) } else { /* 32 bits */ # if defined(_MSC_VER) unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); + return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; # elif defined(__GNUC__) && (__GNUC__ >= 3) return (__builtin_clz((U32)val) >> 3); # else @@ -730,7 +792,10 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, */ U32 const cycleMask = (1U << cycleLog) - 1; U32 const current = (U32)((BYTE const*)src - window->base); - U32 const newCurrent = (current & cycleMask) + maxDist; + U32 const currentCycle0 = current & cycleMask; + /* Exclude zero so that newCurrent - maxDist >= 1. */ + U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; + U32 const newCurrent = currentCycle1 + maxDist; U32 const correction = current - newCurrent; assert((maxDist & cycleMask) == 0); assert(current > newCurrent); @@ -739,8 +804,17 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, window->base += correction; window->dictBase += correction; - window->lowLimit -= correction; - window->dictLimit -= correction; + if (window->lowLimit <= correction) window->lowLimit = 1; + else window->lowLimit -= correction; + if (window->dictLimit <= correction) window->dictLimit = 1; + else window->dictLimit -= correction; + + /* Ensure we can still reference the full window. */ + assert(newCurrent >= maxDist); + assert(newCurrent - maxDist >= 1); + /* Ensure that lowLimit and dictLimit didn't underflow. */ + assert(window->lowLimit <= newCurrent); + assert(window->dictLimit <= newCurrent); DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, window->lowLimit); @@ -844,6 +918,15 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window, } } } } +MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { + memset(window, 0, sizeof(*window)); + window->base = (BYTE const*)""; + window->dictBase = (BYTE const*)""; + window->dictLimit = 1; /* start from 1, so that 1st position is valid */ + window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + 1; /* see issue #1241 */ +} + /** * ZSTD_window_update(): * Updates the window by appending [src, src + srcSize) to the window. @@ -857,6 +940,10 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, BYTE const* const ip = (BYTE const*)src; U32 contiguous = 1; DEBUGLOG(5, "ZSTD_window_update"); + if (srcSize == 0) + return contiguous; + assert(window->base != NULL); + assert(window->dictBase != NULL); /* Check if blocks follow each other */ if (src != window->nextSrc) { /* not contiguous */ @@ -867,7 +954,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, window->dictLimit = (U32)distanceFromBase; window->dictBase = window->base; window->base = ip - distanceFromBase; - // ms->nextToUpdate = window->dictLimit; + /* ms->nextToUpdate = window->dictLimit; */ if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ contiguous = 0; } @@ -883,6 +970,9 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, return contiguous; } +/** + * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. + */ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) { U32 const maxDistance = 1U << windowLog; @@ -893,6 +983,19 @@ MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current return matchLowest; } +/** + * Returns the lowest allowed match index in the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.dictLimit; + U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + /* debug functions */ @@ -931,6 +1034,21 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) } #endif +/* =============================================================== + * Shared internal declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ + +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + short* offcodeNCount, unsigned* offcodeMaxValue, + const void* const dict, size_t dictSize); + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); /* ============================================================== * Private declarations @@ -940,6 +1058,7 @@ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) /* ZSTD_getCParamsFromCCtxParams() : * cParams are built depending on compressionLevel, src size hints, * LDM and manually set compression parameters. + * Note: srcSizeHint == 0 means 0! */ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); @@ -999,5 +1118,8 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); */ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); #endif /* ZSTD_COMPRESS_H */ diff --git a/thirdparty/zstd/compress/zstd_compress_literals.c b/thirdparty/zstd/compress/zstd_compress_literals.c index 6c13331182..17e7168d89 100644 --- a/thirdparty/zstd/compress/zstd_compress_literals.c +++ b/thirdparty/zstd/compress/zstd_compress_literals.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -18,7 +18,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, BYTE* const ostart = (BYTE* const)dst; U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); switch(flSize) { @@ -36,6 +36,7 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, } memcpy(ostart + flSize, src, srcSize); + DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); return srcSize + flSize; } @@ -62,6 +63,7 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* } ostart[flSize] = *(const BYTE*)src; + DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); return flSize+1; } @@ -80,8 +82,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, symbolEncodingType_e hType = set_compressed; size_t cLitSize; - DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", - disableLiteralCompression); + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", + disableLiteralCompression, (U32)srcSize); /* Prepare nextEntropy assuming reusing the existing table */ memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); @@ -102,14 +104,15 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, cLitSize = singleStream ? HUF_compress1X_repeat( ostart+lhSize, dstCapacity-lhSize, src, srcSize, - 255, 11, entropyWorkspace, entropyWorkspaceSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : HUF_compress4X_repeat( ostart+lhSize, dstCapacity-lhSize, src, srcSize, - 255, 11, entropyWorkspace, entropyWorkspaceSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); if (repeat != HUF_repeat_none) { /* reused the existing table */ + DEBUGLOG(5, "Reusing previous huffman table"); hType = set_repeat; } } @@ -150,5 +153,6 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, default: /* not possible : lhSize is {3,4,5} */ assert(0); } + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); return lhSize+cLitSize; } diff --git a/thirdparty/zstd/compress/zstd_compress_literals.h b/thirdparty/zstd/compress/zstd_compress_literals.h index 97273d7cfd..8b08705743 100644 --- a/thirdparty/zstd/compress/zstd_compress_literals.h +++ b/thirdparty/zstd/compress/zstd_compress_literals.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/compress/zstd_compress_sequences.c b/thirdparty/zstd/compress/zstd_compress_sequences.c index 0ff7a26823..f9f8097c83 100644 --- a/thirdparty/zstd/compress/zstd_compress_sequences.c +++ b/thirdparty/zstd/compress/zstd_compress_sequences.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -60,7 +60,7 @@ static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, BYTE wksp[FSE_NCOUNTBOUND]; S16 norm[MaxSeq + 1]; const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), ""); return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); } @@ -86,7 +86,7 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t * Returns the cost in bits of encoding the distribution in count using ctable. * Returns an error if ctable cannot represent all the symbols in count. */ -static size_t ZSTD_fseBitCost( +size_t ZSTD_fseBitCost( FSE_CTable const* ctable, unsigned const* count, unsigned const max) @@ -96,18 +96,22 @@ static size_t ZSTD_fseBitCost( unsigned s; FSE_CState_t cstate; FSE_initCState(&cstate, ctable); - RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, - "Repeat FSE_CTable has maxSymbolValue %u < %u", + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } for (s = 0; s <= max; ++s) { unsigned const tableLog = cstate.stateLog; unsigned const badCost = (tableLog + 1) << kAccuracyLog; unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); if (count[s] == 0) continue; - RETURN_ERROR_IF(bitCost >= badCost, GENERIC, - "Repeat FSE_CTable has Prob[%u] == 0", s); - cost += count[s] * bitCost; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += (size_t)count[s] * bitCost; } return cost >> kAccuracyLog; } @@ -117,15 +121,15 @@ static size_t ZSTD_fseBitCost( * table described by norm. The max symbol support by norm is assumed >= max. * norm must be valid for every symbol with non-zero probability in count. */ -static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max) +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) { unsigned const shift = 8 - accuracyLog; size_t cost = 0; unsigned s; assert(accuracyLog <= 8); for (s = 0; s <= max; ++s) { - unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; + unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; unsigned const norm256 = normAcc << shift; assert(norm256 > 0); assert(norm256 < 256); @@ -230,15 +234,15 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, switch (type) { case set_rle: - FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); - RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); *op = codeTable[0]; return 1; case set_repeat: memcpy(nextCTable, prevCTable, prevCTableSize); return 0; case set_basic: - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize)); /* note : could be pre-calculated */ + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ return 0; case set_compressed: { S16 norm[MaxSeq + 1]; @@ -249,14 +253,14 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity, nbSeq_1--; } assert(nbSeq_1 > 1); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), ""); { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - FORWARD_IF_ERROR(NCountSize); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize)); + FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); return NCountSize; } } - default: assert(0); RETURN_ERROR(GENERIC); + default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); } } @@ -290,7 +294,7 @@ ZSTD_encodeSequences_body( if (MEM_32bits()) BIT_flushBits(&blockStream); if (longOffsets) { U32 const ofBits = ofCodeTable[nbSeq-1]; - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); if (extraBits) { BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); BIT_flushBits(&blockStream); @@ -327,7 +331,7 @@ ZSTD_encodeSequences_body( BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); if (extraBits) { BIT_addBits(&blockStream, sequences[n].offset, extraBits); BIT_flushBits(&blockStream); /* (7)*/ diff --git a/thirdparty/zstd/compress/zstd_compress_sequences.h b/thirdparty/zstd/compress/zstd_compress_sequences.h index 57e8e367b0..68c6f9a5ac 100644 --- a/thirdparty/zstd/compress/zstd_compress_sequences.h +++ b/thirdparty/zstd/compress/zstd_compress_sequences.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -11,8 +11,8 @@ #ifndef ZSTD_COMPRESS_SEQUENCES_H #define ZSTD_COMPRESS_SEQUENCES_H -#include "fse.h" /* FSE_repeat, FSE_CTable */ -#include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ +#include "../common/fse.h" /* FSE_repeat, FSE_CTable */ +#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ typedef enum { ZSTD_defaultDisallowed = 0, @@ -44,4 +44,11 @@ size_t ZSTD_encodeSequences( FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max); + +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max); #endif /* ZSTD_COMPRESS_SEQUENCES_H */ diff --git a/thirdparty/zstd/compress/zstd_compress_superblock.c b/thirdparty/zstd/compress/zstd_compress_superblock.c new file mode 100644 index 0000000000..b693866c0a --- /dev/null +++ b/thirdparty/zstd/compress/zstd_compress_superblock.c @@ -0,0 +1,845 @@ +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_superblock.h" + +#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */ +#include "hist.h" /* HIST_countFast_wksp */ +#include "zstd_compress_internal.h" +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" + +/*-************************************* +* Superblock entropy buffer structs +***************************************/ +/** ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[500]; /* TODO give name to this value */ + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[500]; /* TODO give name to this value */ + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + + +/** ZSTD_buildSuperBlockEntropy_literal() : + * Builds entropy for the super-block literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int disableLiteralsCompression, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = 255; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralsCompression) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + +/** ZSTD_buildSuperBlockEntropy_sequences() : + * Builds entropy for the super-block sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); + BYTE* const cTableWksp = countWkspStart + countWkspSize; + const size_t cTableWkspSize = wkspEnd-cTableWksp; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + + assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); + memset(workspace, 0, wkspSize); + + fseMetadata->lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 LLtype; + unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + } } + /* build CTable for Offsets */ + { U32 Offtype; + unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWksp, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + } } + /* build CTable for MatchLengths */ + { U32 MLtype; + unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + } } + assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); + return op-ostart; +} + + +/** ZSTD_buildSuperBlockEntropy() : + * Builds entropy for the super-block. + * @return : 0 on success or error code */ +static size_t +ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_disableLiteralsCompression(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); + return 0; +} + +/** ZSTD_compressSubBlock_literal() : + * Compresses literals section for a sub-block. + * When we have to write the Huffman table we will sometimes choose a header + * size larger than necessary. This is because we have to pick the header size + * before we know the table size + compressed size, so we have a bound on the + * table size. If we guessed incorrectly, we fall back to uncompressed literals. + * + * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded + * in writing the header, otherwise it is set to 0. + * + * hufMetadata->hType has literals block type info. + * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. + * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. + * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block + * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block + * and the following sub-blocks' literals sections will be Treeless_Literals_Block. + * @return : compressed size of literals section of a sub-block + * Or 0 if it unable to compress. + * Or error code */ +static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + const BYTE* literals, size_t litSize, + void* dst, size_t dstSize, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + size_t const header = writeEntropy ? 200 : 0; + size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart + lhSize; + U32 const singleStream = lhSize == 3; + symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; + size_t cLitSize = 0; + + (void)bmi2; /* TODO bmi2... */ + + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); + + *entropyWritten = 0; + if (litSize == 0 || hufMetadata->hType == set_basic) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } else if (hufMetadata->hType == set_rle) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); + return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); + } + + assert(litSize > 0); + assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); + + if (writeEntropy && hufMetadata->hType == set_compressed) { + memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); + op += hufMetadata->hufDesSize; + cLitSize += hufMetadata->hufDesSize; + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); + } + + /* TODO bmi2 */ + { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) + : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); + op += cSize; + cLitSize += cSize; + if (cSize == 0 || ERR_isError(cSize)) { + DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); + return 0; + } + /* If we expand and we aren't writing a header then emit uncompressed */ + if (!writeEntropy && cLitSize >= litSize) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + /* If we are writing headers then allow expansion that doesn't change our header size. */ + if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { + assert(cLitSize > litSize); + DEBUGLOG(5, "Literals expanded beyond allowed header size"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + *entropyWritten = 1; + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); + return op-ostart; +} + +static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { + const seqDef* const sstart = sequences; + const seqDef* const send = sequences + nbSeq; + const seqDef* sp = sstart; + size_t matchLengthSum = 0; + size_t litLengthSum = 0; + while (send-sp > 0) { + ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); + litLengthSum += seqLen.litLength; + matchLengthSum += seqLen.matchLength; + sp++; + } + assert(litLengthSum <= litSize); + if (!lastSequence) { + assert(litLengthSum == litSize); + } + return matchLengthSum + litSize; +} + +/** ZSTD_compressSubBlock_sequences() : + * Compresses sequences section for a sub-block. + * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have + * symbol compression modes for the super-block. + * The first successfully compressed block will have these in its header. + * We set entropyWritten=1 when we succeed in compressing the sequences. + * The following sub-blocks will always have repeat mode. + * @return : compressed size of sequences section of a sub-block + * Or 0 if it is unable to compress + * Or error code. */ +static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); + + *entropyWritten = 0; + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, ""); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + return op - ostart; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); + + if (writeEntropy) { + const U32 LLtype = fseMetadata->llType; + const U32 Offtype = fseMetadata->ofType; + const U32 MLtype = fseMetadata->mlType; + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); + op += fseMetadata->fseTablesSize; + } else { + const U32 repeat = set_repeat; + *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + fseTables->matchlengthCTable, mlCode, + fseTables->offcodeCTable, ofCode, + fseTables->litlengthCTable, llCode, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(fseMetadata->lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } +#endif + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); + } + + /* zstd versions <= 1.4.0 mistakenly report error when + * sequences section body size is less than 3 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1664. + * This can happen when the previous sequences section block is compressed + * with rle mode and the current block's sequences section is compressed + * with repeat mode where sequences section body size can be 1 byte. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (op-seqHead < 4) { + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " + "an uncompressed block when sequences are < 4 bytes"); + return 0; + } +#endif + + *entropyWritten = 1; + return op - ostart; +} + +/** ZSTD_compressSubBlock() : + * Compresses a single sub-block. + * @return : compressed size of the sub-block + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* literals, size_t litSize, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, + int writeLitEntropy, int writeSeqEntropy, + int* litEntropyWritten, int* seqEntropyWritten, + U32 lastBlock) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart + ZSTD_blockHeaderSize; + DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", + litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); + { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, + &entropyMetadata->hufMetadata, literals, litSize, + op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); + FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); + if (cLitSize == 0) return 0; + op += cLitSize; + } + { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, + &entropyMetadata->fseMetadata, + sequences, nbSeq, + llCode, mlCode, ofCode, + cctxParams, + op, oend-op, + bmi2, writeSeqEntropy, seqEntropyWritten); + FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); + if (cSeqSize == 0) return 0; + op += cSeqSize; + } + /* Write block header */ + { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + } + return op-ostart; +} + +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = 255; + size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} + +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t cSizeEstimate = 0; + cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return cSizeEstimate + ZSTD_blockHeaderSize; +} + +static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) +{ + if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) + return 1; + if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) + return 1; + if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) + return 1; + return 0; +} + +/** ZSTD_compressSubBlock_multi() : + * Breaks super-block into multiple sub-blocks and compresses them. + * Entropy will be written to the first block. + * The following blocks will use repeat mode to compress. + * All sub-blocks are compressed blocks (no raw or rle blocks). + * @return : compressed size of the super block (which is multiple ZSTD blocks) + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, + const ZSTD_compressedBlockState_t* prevCBlock, + ZSTD_compressedBlockState_t* nextCBlock, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const int bmi2, U32 lastBlock, + void* workspace, size_t wkspSize) +{ + const seqDef* const sstart = seqStorePtr->sequencesStart; + const seqDef* const send = seqStorePtr->sequences; + const seqDef* sp = sstart; + const BYTE* const lstart = seqStorePtr->litStart; + const BYTE* const lend = seqStorePtr->lit; + const BYTE* lp = lstart; + BYTE const* ip = (BYTE const*)src; + BYTE const* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + const BYTE* llCodePtr = seqStorePtr->llCode; + const BYTE* mlCodePtr = seqStorePtr->mlCode; + const BYTE* ofCodePtr = seqStorePtr->ofCode; + size_t targetCBlockSize = cctxParams->targetCBlockSize; + size_t litSize, seqCount; + int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; + int writeSeqEntropy = 1; + int lastSequence = 0; + + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", + (unsigned)(lend-lp), (unsigned)(send-sstart)); + + litSize = 0; + seqCount = 0; + do { + size_t cBlockSizeEstimate = 0; + if (sstart == send) { + lastSequence = 1; + } else { + const seqDef* const sequence = sp + seqCount; + lastSequence = sequence == send - 1; + litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; + seqCount++; + } + if (lastSequence) { + assert(lp <= lend); + assert(litSize <= (size_t)(lend - lp)); + litSize = (size_t)(lend - lp); + } + /* I think there is an optimization opportunity here. + * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful + * since it recalculates estimate from scratch. + * For example, it would recount literal distribution and symbol codes everytime. + */ + cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, + &nextCBlock->entropy, entropyMetadata, + workspace, wkspSize, writeLitEntropy, writeSeqEntropy); + if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { + int litEntropyWritten = 0; + int seqEntropyWritten = 0; + const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); + const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, oend-op, + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + lastBlock && lastSequence); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Committed the sub-block"); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + sp += seqCount; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + litSize = 0; + seqCount = 0; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + } + } + } while (!lastSequence); + if (writeLitEntropy) { + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); + memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); + } + if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { + /* If we haven't written our entropy tables, then we've violated our contract and + * must emit an uncompressed block. + */ + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); + return 0; + } + if (ip < iend) { + size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); + DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + assert(cSize != 0); + op += cSize; + /* We have to regenerate the repcodes because we've skipped some sequences */ + if (sp < send) { + seqDef const* seq; + repcodes_t rep; + memcpy(&rep, prevCBlock->rep, sizeof(rep)); + for (seq = sstart; seq < sp; ++seq) { + rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + } + memcpy(nextCBlock->rep, &rep, sizeof(rep)); + } + } + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); + return op-ostart; +} + +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + + return ZSTD_compressSubBlock_multi(&zc->seqStore, + zc->blockState.prevCBlock, + zc->blockState.nextCBlock, + &entropyMetadata, + &zc->appliedParams, + dst, dstCapacity, + src, srcSize, + zc->bmi2, lastBlock, + zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */); +} diff --git a/thirdparty/zstd/compress/zstd_compress_superblock.h b/thirdparty/zstd/compress/zstd_compress_superblock.h new file mode 100644 index 0000000000..07f4cb1dc6 --- /dev/null +++ b/thirdparty/zstd/compress/zstd_compress_superblock.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_ADVANCED_H +#define ZSTD_COMPRESS_ADVANCED_H + +/*-************************************* +* Dependencies +***************************************/ + +#include "../zstd.h" /* ZSTD_CCtx */ + +/*-************************************* +* Target Compressed Block Size +***************************************/ + +/* ZSTD_compressSuperBlock() : + * Used to compress a super block when targetCBlockSize is being used. + * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock); + +#endif /* ZSTD_COMPRESS_ADVANCED_H */ diff --git a/thirdparty/zstd/compress/zstd_cwksp.h b/thirdparty/zstd/compress/zstd_cwksp.h index fc9765bd3f..a25c9263b7 100644 --- a/thirdparty/zstd/compress/zstd_cwksp.h +++ b/thirdparty/zstd/compress/zstd_cwksp.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -14,7 +14,7 @@ /*-************************************* * Dependencies ***************************************/ -#include "zstd_internal.h" +#include "../common/zstd_internal.h" #if defined (__cplusplus) extern "C" { @@ -24,16 +24,6 @@ extern "C" { * Constants ***************************************/ -/* define "workspace is too large" as this number of times larger than needed */ -#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 - -/* when workspace is continuously too large - * during at least this number of times, - * context's memory usage is considered wasteful, - * because it's sized to handle a worst case scenario which rarely happens. - * In which case, resize it down to free some memory */ -#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 - /* Since the workspace is effectively its own little malloc implementation / * arena, when we run under ASAN, we should similarly insert redzones between * each internal element of the workspace, so ASAN will catch overruns that @@ -468,7 +458,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { void* workspace = ZSTD_malloc(size, customMem); DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); - RETURN_ERROR_IF(workspace == NULL, memory_allocation); + RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); ZSTD_cwksp_init(ws, workspace, size); return 0; } diff --git a/thirdparty/zstd/compress/zstd_double_fast.c b/thirdparty/zstd/compress/zstd_double_fast.c index a661a48534..27eed66cfe 100644 --- a/thirdparty/zstd/compress/zstd_double_fast.c +++ b/thirdparty/zstd/compress/zstd_double_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -63,10 +63,8 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* ip = istart; const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowestValid = ms->window.dictLimit; - const U32 maxDistance = 1U << cParams->windowLog; /* presumes that, if there is a dictionary, it must be using Attach mode */ - const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid; + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); const BYTE* const prefixLowest = base + prefixLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; @@ -96,7 +94,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( dictCParams->hashLog : hBitsL; const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? dictCParams->chainLog : hBitsS; - const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); @@ -104,13 +102,15 @@ size_t ZSTD_compressBlock_doubleFast_generic( /* if a dictionary is attached, it must be within window range */ if (dictMode == ZSTD_dictMatchState) { - assert(lowestValid + maxDistance >= endIndex); + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); } /* init */ ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { - U32 const maxRep = (U32)(ip - prefixLowest); + U32 const current = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); + U32 const maxRep = current - windowLow; if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } @@ -198,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic( } } ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif continue; _search_next_long: @@ -271,7 +274,7 @@ _match_stored: U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState && repIndex2 < prefixLowestIndex ? - dictBase - dictIndexDelta + repIndex2 : + dictBase + repIndex2 - dictIndexDelta : base + repIndex2; if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { diff --git a/thirdparty/zstd/compress/zstd_double_fast.h b/thirdparty/zstd/compress/zstd_double_fast.h index 4fa31acfc0..14d944d69b 100644 --- a/thirdparty/zstd/compress/zstd_double_fast.h +++ b/thirdparty/zstd/compress/zstd_double_fast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -15,7 +15,7 @@ extern "C" { #endif -#include "mem.h" /* U32 */ +#include "../common/mem.h" /* U32 */ #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, diff --git a/thirdparty/zstd/compress/zstd_fast.c b/thirdparty/zstd/compress/zstd_fast.c index 6dbefee6b7..85a3a7a91e 100644 --- a/thirdparty/zstd/compress/zstd_fast.c +++ b/thirdparty/zstd/compress/zstd_fast.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -61,9 +61,7 @@ ZSTD_compressBlock_fast_generic( const BYTE* ip1; const BYTE* anchor = istart; const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 maxDistance = 1U << cParams->windowLog; - const U32 validStartIndex = ms->window.dictLimit; - const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex; + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; @@ -74,12 +72,21 @@ ZSTD_compressBlock_fast_generic( DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); ip0 += (ip0 == prefixStart); ip1 = ip0 + 1; - { U32 const maxRep = (U32)(ip0 - prefixStart); + { U32 const current = (U32)(ip0 - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); + U32 const maxRep = current - windowLow; if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; } /* Main Search Loop */ +#ifdef __INTEL_COMPILER + /* From intel 'The vector pragma indicates that the loop should be + * vectorized if it is legal to do so'. Can be used together with + * #pragma ivdep (but have opted to exclude that because intel + * warns against using it).*/ + #pragma vector always +#endif while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ size_t mLength; BYTE const* ip2 = ip0 + 2; @@ -91,19 +98,25 @@ ZSTD_compressBlock_fast_generic( U32 const current1 = (U32)(ip1-base); U32 const matchIndex0 = hashTable[h0]; U32 const matchIndex1 = hashTable[h1]; - BYTE const* repMatch = ip2-offset_1; + BYTE const* repMatch = ip2 - offset_1; const BYTE* match0 = base + matchIndex0; const BYTE* match1 = base + matchIndex1; U32 offcode; + +#if defined(__aarch64__) + PREFETCH_L1(ip0+256); +#endif + hashTable[h0] = current0; /* update hash table */ hashTable[h1] = current1; /* update hash table */ assert(ip0 + 1 == ip1); if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { - mLength = ip2[-1] == repMatch[-1] ? 1 : 0; + mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; ip0 = ip2 - mLength; match0 = repMatch - mLength; + mLength += 4; offcode = 0; goto _match; } @@ -128,19 +141,18 @@ _offset: /* Requires: ip0, match0 */ offset_2 = offset_1; offset_1 = (U32)(ip0-match0); offcode = offset_1 + ZSTD_REP_MOVE; - mLength = 0; + mLength = 4; /* Count the backwards match length */ while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ _match: /* Requires: ip0, match0, offcode */ /* Count the forward length */ - mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4; + mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); /* match found */ ip0 += mLength; anchor = ip0; - ip1 = ip0 + 1; if (ip0 <= ilimit) { /* Fill Table */ @@ -148,19 +160,18 @@ _match: /* Requires: ip0, match0, offcode */ hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */ - && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); - ip0 += rLength; - ip1 = ip0 + 1; - ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); - anchor = ip0; - continue; /* faster when present (confirmed on gcc-8) ... (?) */ - } - } + if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + ip1 = ip0 + 1; } /* save reps for next block */ @@ -387,7 +398,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const ilimit = iend - 8; U32 offset_1=rep[0], offset_2=rep[1]; - DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic"); + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); /* switch to "regular" variant if extDict is invalidated due to maxDistance */ if (prefixStartIndex == dictStartIndex) @@ -404,6 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; hashTable[h] = current; /* update hash table */ + DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current); assert(offset_1 <= current +1); /* check repIndex */ if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) diff --git a/thirdparty/zstd/compress/zstd_fast.h b/thirdparty/zstd/compress/zstd_fast.h index b74a88c57c..cf6aaa8e67 100644 --- a/thirdparty/zstd/compress/zstd_fast.h +++ b/thirdparty/zstd/compress/zstd_fast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -15,7 +15,7 @@ extern "C" { #endif -#include "mem.h" /* U32 */ +#include "../common/mem.h" /* U32 */ #include "zstd_compress_internal.h" void ZSTD_fillHashTable(ZSTD_matchState_t* ms, diff --git a/thirdparty/zstd/compress/zstd_lazy.c b/thirdparty/zstd/compress/zstd_lazy.c index 9ad7e03b54..4cf5c88b53 100644 --- a/thirdparty/zstd/compress/zstd_lazy.c +++ b/thirdparty/zstd/compress/zstd_lazy.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -660,12 +660,16 @@ ZSTD_compressBlock_lazy_generic( const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; - const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest); + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); /* init */ ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { - U32 const maxRep = (U32)(ip - prefixLowest); + U32 const current = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog); + U32 const maxRep = current - windowLow; if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; } @@ -677,6 +681,12 @@ ZSTD_compressBlock_lazy_generic( } /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif while (ip < ilimit) { size_t matchLength=0; size_t offset=0; @@ -929,11 +939,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const BYTE* const ilimit = iend - 8; const BYTE* const base = ms->window.base; const U32 dictLimit = ms->window.dictLimit; - const U32 lowestIndex = ms->window.lowLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictBase = ms->window.dictBase; const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + lowestIndex; + const BYTE* const dictStart = dictBase + ms->window.lowLimit; + const U32 windowLog = ms->cParams.windowLog; typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, @@ -942,10 +952,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( U32 offset_1 = rep[0], offset_2 = rep[1]; + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); + /* init */ ip += (ip == prefixStart); /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif while (ip < ilimit) { size_t matchLength=0; size_t offset=0; @@ -953,10 +971,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( U32 current = (U32)(ip-base); /* check repCode */ - { const U32 repIndex = (U32)(current+1 - offset_1); + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog); + const U32 repIndex = (U32)(current+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -983,10 +1002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( current++; /* check repCode */ if (offset) { + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); const U32 repIndex = (U32)(current - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1013,10 +1033,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( current++; /* check repCode */ if (offset) { + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog); const U32 repIndex = (U32)(current - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1057,10 +1078,12 @@ _storeSequence: /* check immediate repcode */ while (ip <= ilimit) { - const U32 repIndex = (U32)((ip-base) - offset_2); + const U32 repCurrent = (U32)(ip-base); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); + const U32 repIndex = repCurrent - offset_2; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; diff --git a/thirdparty/zstd/compress/zstd_lazy.h b/thirdparty/zstd/compress/zstd_lazy.h index bb1763069f..581936f03b 100644 --- a/thirdparty/zstd/compress/zstd_lazy.h +++ b/thirdparty/zstd/compress/zstd_lazy.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/compress/zstd_ldm.c b/thirdparty/zstd/compress/zstd_ldm.c index c3312ad3e3..8c47948358 100644 --- a/thirdparty/zstd/compress/zstd_ldm.c +++ b/thirdparty/zstd/compress/zstd_ldm.c @@ -1,15 +1,16 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ #include "zstd_ldm.h" -#include "debug.h" +#include "../common/debug.h" #include "zstd_fast.h" /* ZSTD_fillHashTable() */ #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ @@ -223,6 +224,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, return rollingHash; } +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params) +{ + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); + if ((size_t)(iend - ip) >= params->minMatchLength) { + U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); + ZSTD_ldm_fillLdmHashTable( + state, startingHash, ip, iend - params->minMatchLength, state->window.base, + params->hashLog - params->bucketSizeLog, + *params); + } +} + /** ZSTD_ldm_limitTableUpdate() : * @@ -449,6 +464,8 @@ size_t ZSTD_ldm_generateSequences( U32 const correction = ZSTD_window_correctOverflow( &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + /* invalidate dictionaries on overflow correction */ + ldmState->loadedDictEnd = 0; } /* 2. We enforce the maximum offset allowed. * @@ -457,8 +474,14 @@ size_t ZSTD_ldm_generateSequences( * TODO: * Test the chunk size. * * Try invalidation after the sequence generation and test the * the offset against maxDist directly. + * + * NOTE: Because of dictionaries + sequence splitting we MUST make sure + * that any offset used is valid at the END of the sequence, since it may + * be split into two sequences. This condition holds when using + * ZSTD_window_enforceMaxDist(), but if we move to checking offsets + * against maxDist directly, we'll have to carefully handle that case. */ - ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL); + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ newLeftoverSize = ZSTD_ldm_generateSequences_internal( ldmState, sequences, params, chunkStart, chunkSize); @@ -566,14 +589,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, if (sequence.offset == 0) break; - assert(sequence.offset <= (1U << cParams->windowLog)); assert(ip + sequence.litLength + sequence.matchLength <= iend); /* Fill tables for block compressor */ ZSTD_ldm_limitTableUpdate(ms, ip); ZSTD_ldm_fillFastTables(ms, ip); /* Run the block compressor */ - DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength); + DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); { size_t const newLitLength = blockCompressor(ms, seqStore, rep, ip, sequence.litLength); diff --git a/thirdparty/zstd/compress/zstd_ldm.h b/thirdparty/zstd/compress/zstd_ldm.h index a47846128b..229ea05a9e 100644 --- a/thirdparty/zstd/compress/zstd_ldm.h +++ b/thirdparty/zstd/compress/zstd_ldm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ #ifndef ZSTD_LDM_H @@ -15,7 +16,7 @@ extern "C" { #endif #include "zstd_compress_internal.h" /* ldmParams_t, U32 */ -#include "zstd.h" /* ZSTD_CCtx, size_t */ +#include "../zstd.h" /* ZSTD_CCtx, size_t */ /*-************************************* * Long distance matching @@ -23,6 +24,10 @@ extern "C" { #define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params); + /** * ZSTD_ldm_generateSequences(): * diff --git a/thirdparty/zstd/compress/zstd_opt.c b/thirdparty/zstd/compress/zstd_opt.c index 2e50fca6ff..36fff050cf 100644 --- a/thirdparty/zstd/compress/zstd_opt.c +++ b/thirdparty/zstd/compress/zstd_opt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -249,40 +249,6 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP } } -/* ZSTD_litLengthContribution() : - * @return ( cost(litlength) - cost(0) ) - * this value can then be added to rawLiteralsCost() - * to provide a cost which is directly comparable to a match ending at same position */ -static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) -{ - if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel); - - /* dynamic statistics */ - { U32 const llCode = ZSTD_LLcode(litLength); - int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER) - + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ - - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel); -#if 1 - return contribution; -#else - return MAX(0, contribution); /* sometimes better, sometimes not ... */ -#endif - } -} - -/* ZSTD_literalsContribution() : - * creates a fake cost for the literals part of a sequence - * which can be compared to the ending cost of a match - * should a new match start at this position */ -static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength, - const optState_t* const optPtr, - int optLevel) -{ - int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) - + ZSTD_litLengthContribution(litLength, optPtr, optLevel); - return contribution; -} - /* ZSTD_getMatchPrice() : * Provides the cost of the match part (offset + matchLength) of a sequence * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. @@ -603,7 +569,10 @@ U32 ZSTD_insertBtAndGetAllMatches ( U32 repLen = 0; assert(current >= dictLimit); if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ - if (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch)) { + /* We must validate the repcode offset because when we're using a dictionary the + * valid offset range shrinks when the dictionary goes out of bounds. + */ + if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; } } else { /* repIndex < dictLimit || repIndex >= current */ @@ -799,30 +768,6 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( /*-******************************* * Optimal parser *********************************/ -typedef struct repcodes_s { - U32 rep[3]; -} repcodes_t; - -static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) -{ - repcodes_t newReps; - if (offset >= ZSTD_REP_NUM) { /* full offset */ - newReps.rep[2] = rep[1]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = offset - ZSTD_REP_MOVE; - } else { /* repcode */ - U32 const repCode = offset + ll0; - if (repCode > 0) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = currentOffset; - } else { /* repCode == 0 */ - memcpy(&newReps, rep, sizeof(newReps)); - } - } - return newReps; -} static U32 ZSTD_totalLen(ZSTD_optimal_t sol) @@ -839,7 +784,7 @@ listStats(const U32* table, int lastEltID) int enb; for (enb=0; enb < nbElts; enb++) { (void)table; - //RAWLOG(2, "%3i:%3i, ", enb, table[enb]); + /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ RAWLOG(2, "%4i,", table[enb]); } RAWLOG(2, " \n"); @@ -894,7 +839,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; } opt[0].mlen = 0; /* means is_a_literal */ opt[0].litlen = litlen; - opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel); + /* We don't need to include the actual price of the literals because + * it is static for the duration of the forward pass, and is included + * in every price. We include the literal length to avoid negative + * prices when we subtract the previous literal length. + */ + opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel); /* large match -> immediate encoding */ { U32 const maxML = matches[nbMatches-1].len; @@ -923,7 +873,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, for (matchNb = 0; matchNb < nbMatches; matchNb++) { U32 const offset = matches[matchNb].off; U32 const end = matches[matchNb].len; - repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0); for ( ; pos <= end ; pos++ ) { U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); U32 const sequencePrice = literalsPrice + matchPrice; @@ -933,8 +882,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[pos].off = offset; opt[pos].litlen = litlen; opt[pos].price = sequencePrice; - ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory)); - memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); } } last_pos = pos-1; } @@ -961,7 +908,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[cur].off = 0; opt[cur].litlen = litlen; opt[cur].price = price; - memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep)); } else { DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), @@ -969,6 +915,21 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } } + /* Set the repcodes of the current position. We must do it here + * because we rely on the repcodes of the 2nd to last sequence being + * correct to set the next chunks repcodes during the backward + * traversal. + */ + ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); + assert(cur >= opt[cur].mlen); + if (opt[cur].mlen != 0) { + U32 const prev = cur - opt[cur].mlen; + repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); + } else { + memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); + } + /* last match must start at a minimum distance of 8 from oend */ if (inr > ilimit) continue; @@ -1009,7 +970,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* set prices using matches found at position == cur */ for (matchNb = 0; matchNb < nbMatches; matchNb++) { U32 const offset = matches[matchNb].off; - repcodes_t const repHistory = ZSTD_updateRep(opt[cur].rep, offset, ll0); U32 const lastML = matches[matchNb].len; U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; U32 mlen; @@ -1029,8 +989,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, opt[pos].off = offset; opt[pos].litlen = litlen; opt[pos].price = price; - ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory)); - memcpy(opt[pos].rep, &repHistory, sizeof(repHistory)); } else { DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); @@ -1046,6 +1004,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ assert(opt[0].mlen == 0); + /* Set the next chunk's repcodes based on the repcodes of the beginning + * of the last match, and the last sequence. This avoids us having to + * update them while traversing the sequences. + */ + if (lastSequence.mlen != 0) { + repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); + memcpy(rep, &reps, sizeof(reps)); + } else { + memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); + } + { U32 const storeEnd = cur + 1; U32 storeStart = storeEnd; U32 seqPos = cur; @@ -1082,20 +1051,6 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ continue; /* will finish */ } - /* repcodes update : like ZSTD_updateRep(), but update in place */ - if (offCode >= ZSTD_REP_NUM) { /* full offset */ - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offCode - ZSTD_REP_MOVE; - } else { /* repcode */ - U32 const repCode = offCode + (llen==0); - if (repCode) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - if (repCode >= 2) rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = currentOffset; - } } - assert(anchor + llen <= iend); ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); @@ -1104,7 +1059,6 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ } } ZSTD_setBasePrices(optStatePtr, optLevel); } - } /* while (ip < ilimit) */ /* Return the last literals size */ diff --git a/thirdparty/zstd/compress/zstd_opt.h b/thirdparty/zstd/compress/zstd_opt.h index 094f747665..9aba8a9018 100644 --- a/thirdparty/zstd/compress/zstd_opt.h +++ b/thirdparty/zstd/compress/zstd_opt.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the diff --git a/thirdparty/zstd/compress/zstdmt_compress.c b/thirdparty/zstd/compress/zstdmt_compress.c index bc3062b530..1e3c8fdbee 100644 --- a/thirdparty/zstd/compress/zstdmt_compress.c +++ b/thirdparty/zstd/compress/zstdmt_compress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -22,9 +22,9 @@ /* ====== Dependencies ====== */ #include <string.h> /* memcpy, memset */ #include <limits.h> /* INT_MAX, UINT_MAX */ -#include "mem.h" /* MEM_STATIC */ -#include "pool.h" /* threadpool */ -#include "threading.h" /* mutex */ +#include "../common/mem.h" /* MEM_STATIC */ +#include "../common/pool.h" /* threadpool */ +#include "../common/threading.h" /* mutex */ #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ #include "zstd_ldm.h" #include "zstdmt_compress.h" @@ -461,7 +461,13 @@ typedef struct { ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ } serialState_t; -static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize) +static int +ZSTDMT_serialState_reset(serialState_t* serialState, + ZSTDMT_seqPool* seqPool, + ZSTD_CCtx_params params, + size_t jobSize, + const void* dict, size_t const dictSize, + ZSTD_dictContentType_e dictContentType) { /* Adjust parameters */ if (params.ldmParams.enableLdm) { @@ -490,8 +496,7 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* /* Size the seq pool tables */ ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); /* Reset the window */ - ZSTD_window_clear(&serialState->ldmState.window); - serialState->ldmWindow = serialState->ldmState.window; + ZSTD_window_init(&serialState->ldmState.window); /* Resize tables and output space if necessary. */ if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { ZSTD_free(serialState->ldmState.hashTable, cMem); @@ -506,7 +511,24 @@ static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* /* Zero the tables */ memset(serialState->ldmState.hashTable, 0, hashSize); memset(serialState->ldmState.bucketOffsets, 0, bucketSize); + + /* Update window state and fill hash table with dict */ + serialState->ldmState.loadedDictEnd = 0; + if (dictSize > 0) { + if (dictContentType == ZSTD_dct_rawContent) { + BYTE const* const dictEnd = (const BYTE*)dict + dictSize; + ZSTD_window_update(&serialState->ldmState.window, dict, dictSize); + ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams); + serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base); + } else { + /* don't even load anything */ + } + } + + /* Initialize serialState's copy of ldmWindow. */ + serialState->ldmWindow = serialState->ldmState.window; } + serialState->params = params; serialState->params.jobSize = (U32)jobSize; return 0; @@ -1054,7 +1076,7 @@ static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(const ZSTD_CCtx_params* params) static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) { if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); - FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) ); + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , ""); mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); if (mtctx->bufPool == NULL) return ERROR(memory_allocation); mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); @@ -1076,7 +1098,7 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", compressionLevel); mtctx->params.compressionLevel = compressionLevel; - { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, 0, 0); + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0); cParams.windowLog = saved_wlog; mtctx->params.cParams = cParams; } @@ -1235,7 +1257,8 @@ ZSTDMT_computeNbJobs(const ZSTD_CCtx_params* params, size_t srcSize, unsigned nb /* ZSTDMT_compress_advanced_internal() : * This is a blocking function : it will only give back control to caller after finishing its compression job. */ -static size_t ZSTDMT_compress_advanced_internal( +static size_t +ZSTDMT_compress_advanced_internal( ZSTDMT_CCtx* mtctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -1267,10 +1290,11 @@ static size_t ZSTDMT_compress_advanced_internal( assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */ ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) ); - if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize)) + /* LDM doesn't even try to load the dictionary in single-ingestion mode */ + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize, NULL, 0, ZSTD_dct_auto)) return ERROR(memory_allocation); - FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */ + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) , ""); /* only expands if necessary */ { unsigned u; for (u=0; u<nbJobs; u++) { @@ -1403,7 +1427,7 @@ size_t ZSTDMT_initCStream_internal( /* init */ if (params.nbWorkers != mtctx->params.nbWorkers) - FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) ); + FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , ""); if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX; @@ -1500,7 +1524,8 @@ size_t ZSTDMT_initCStream_internal( mtctx->allJobsCompleted = 0; mtctx->consumed = 0; mtctx->produced = 0; - if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize)) + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize, + dict, dictSize, dictContentType)) return ERROR(memory_allocation); return 0; } @@ -1714,9 +1739,11 @@ static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, u assert(mtctx->doneJobID < mtctx->nextJobID); assert(cSize >= mtctx->jobs[wJobID].dstFlushed); assert(mtctx->jobs[wJobID].dstBuff.start != NULL); - memcpy((char*)output->dst + output->pos, - (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, - toFlush); + if (toFlush > 0) { + memcpy((char*)output->dst + output->pos, + (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, + toFlush); + } output->pos += toFlush; mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */ @@ -1786,7 +1813,7 @@ static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range) BYTE const* const bufferStart = (BYTE const*)buffer.start; BYTE const* const bufferEnd = bufferStart + buffer.capacity; BYTE const* const rangeStart = (BYTE const*)range.start; - BYTE const* const rangeEnd = rangeStart + range.size; + BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart; if (rangeStart == NULL || bufferStart == NULL) return 0; @@ -2060,7 +2087,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ size_t const jobSize = mtctx->inBuff.filled; assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); - FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) ); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , ""); } /* check for potential compressed data ready to be flushed */ @@ -2074,7 +2101,7 @@ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) ); + FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) , ""); /* recommended next input size : fill current input buffer */ return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ @@ -2091,7 +2118,7 @@ static size_t ZSTDMT_flushStream_internal(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* ou || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */ DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)", (U32)srcSize, (U32)endFrame); - FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) , ""); } /* check if there is any data available to flush */ diff --git a/thirdparty/zstd/compress/zstdmt_compress.h b/thirdparty/zstd/compress/zstdmt_compress.h index 12a526087d..89914eb7f8 100644 --- a/thirdparty/zstd/compress/zstdmt_compress.h +++ b/thirdparty/zstd/compress/zstdmt_compress.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -40,7 +40,7 @@ /* === Dependencies === */ #include <stddef.h> /* size_t */ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ -#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ +#include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ /* === Constants === */ diff --git a/thirdparty/zstd/decompress/huf_decompress.c b/thirdparty/zstd/decompress/huf_decompress.c index bb2d0a96bc..68293a1309 100644 --- a/thirdparty/zstd/decompress/huf_decompress.c +++ b/thirdparty/zstd/decompress/huf_decompress.c @@ -1,47 +1,27 @@ /* ****************************************************************** - huff0 huffman decoder, - part of Finite State Entropy library - Copyright (C) 2013-present, Yann Collet. - - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. ****************************************************************** */ /* ************************************************************** * Dependencies ****************************************************************/ #include <string.h> /* memcpy, memset */ -#include "compiler.h" -#include "bitstream.h" /* BIT_* */ -#include "fse.h" /* to compress headers */ +#include "../common/compiler.h" +#include "../common/bitstream.h" /* BIT_* */ +#include "../common/fse.h" /* to compress headers */ #define HUF_STATIC_LINKING_ONLY -#include "huf.h" -#include "error_private.h" +#include "../common/huf.h" +#include "../common/error_private.h" /* ************************************************************** * Macros @@ -61,9 +41,6 @@ * Error Management ****************************************************************/ #define HUF_isError ERR_isError -#ifndef CHECK_F -#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } -#endif /* ************************************************************** @@ -181,17 +158,29 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize /* fill DTable */ { U32 n; - for (n=0; n<nbSymbols; n++) { - U32 const w = huffWeight[n]; - U32 const length = (1 << w) >> 1; - U32 u; + size_t const nEnd = nbSymbols; + for (n=0; n<nEnd; n++) { + size_t const w = huffWeight[n]; + size_t const length = (1 << w) >> 1; + size_t const uStart = rankVal[w]; + size_t const uEnd = uStart + length; + size_t u; HUF_DEltX1 D; - D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); - for (u = rankVal[w]; u < rankVal[w] + length; u++) - dt[u] = D; - rankVal[w] += length; - } } - + D.byte = (BYTE)n; + D.nbBits = (BYTE)(tableLog + 1 - w); + rankVal[w] = (U32)uEnd; + if (length < 4) { + /* Use length in the loop bound so the compiler knows it is short. */ + for (u = 0; u < length; ++u) + dt[uStart + u] = D; + } else { + /* Unroll the loop 4 times, we know it is a power of 2. */ + for (u = uStart; u < uEnd; u += 4) { + dt[u + 0] = D; + dt[u + 1] = D; + dt[u + 2] = D; + dt[u + 3] = D; + } } } } return iSize; } @@ -282,6 +271,7 @@ HUF_decompress4X1_usingDTable_internal_body( { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; const void* const dtPtr = DTable + 1; const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; @@ -306,9 +296,9 @@ HUF_decompress4X1_usingDTable_internal_body( BYTE* op2 = opStart2; BYTE* op3 = opStart3; BYTE* op4 = opStart4; - U32 endSignal = BIT_DStream_unfinished; DTableDesc const dtd = HUF_getDTableDesc(DTable); U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); @@ -317,8 +307,7 @@ HUF_decompress4X1_usingDTable_internal_body( CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) { + for ( ; (endSignal) & (op4 < olimit) ; ) { HUF_DECODE_SYMBOLX1_2(op1, &bitD1); HUF_DECODE_SYMBOLX1_2(op2, &bitD2); HUF_DECODE_SYMBOLX1_2(op3, &bitD3); @@ -335,10 +324,10 @@ HUF_decompress4X1_usingDTable_internal_body( HUF_DECODE_SYMBOLX1_0(op2, &bitD2); HUF_DECODE_SYMBOLX1_0(op3, &bitD3); HUF_DECODE_SYMBOLX1_0(op4, &bitD4); - BIT_reloadDStream(&bitD1); - BIT_reloadDStream(&bitD2); - BIT_reloadDStream(&bitD3); - BIT_reloadDStream(&bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; } /* check corruption */ @@ -757,7 +746,6 @@ HUF_decompress1X2_usingDTable_internal_body( return dstSize; } - FORCE_INLINE_TEMPLATE size_t HUF_decompress4X2_usingDTable_internal_body( void* dst, size_t dstSize, @@ -769,6 +757,7 @@ HUF_decompress4X2_usingDTable_internal_body( { const BYTE* const istart = (const BYTE*) cSrc; BYTE* const ostart = (BYTE*) dst; BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); const void* const dtPtr = DTable+1; const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; @@ -793,7 +782,7 @@ HUF_decompress4X2_usingDTable_internal_body( BYTE* op2 = opStart2; BYTE* op3 = opStart3; BYTE* op4 = opStart4; - U32 endSignal; + U32 endSignal = 1; DTableDesc const dtd = HUF_getDTableDesc(DTable); U32 const dtLog = dtd.tableLog; @@ -804,8 +793,29 @@ HUF_decompress4X2_usingDTable_internal_body( CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) { + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else HUF_DECODE_SYMBOLX2_2(op1, &bitD1); HUF_DECODE_SYMBOLX2_2(op2, &bitD2); HUF_DECODE_SYMBOLX2_2(op3, &bitD3); @@ -822,8 +832,12 @@ HUF_decompress4X2_usingDTable_internal_body( HUF_DECODE_SYMBOLX2_0(op2, &bitD2); HUF_DECODE_SYMBOLX2_0(op3, &bitD3); HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + endSignal = (U32)LIKELY( + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif } /* check corruption */ diff --git a/thirdparty/zstd/decompress/zstd_ddict.c b/thirdparty/zstd/decompress/zstd_ddict.c index 0af3d23bfe..c8cb8ecc95 100644 --- a/thirdparty/zstd/decompress/zstd_ddict.c +++ b/thirdparty/zstd/decompress/zstd_ddict.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -15,17 +15,17 @@ * Dependencies *********************************************************/ #include <string.h> /* memcpy, memmove, memset */ -#include "cpu.h" /* bmi2 */ -#include "mem.h" /* low level memory routines */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY -#include "fse.h" +#include "../common/fse.h" #define HUF_STATIC_LINKING_ONLY -#include "huf.h" +#include "../common/huf.h" #include "zstd_decompress_internal.h" #include "zstd_ddict.h" #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -# include "zstd_legacy.h" +# include "../legacy/zstd_legacy.h" #endif @@ -65,6 +65,10 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) dctx->virtualStart = ddict->dictContent; dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif if (ddict->entropyPresent) { dctx->litEntropy = 1; dctx->fseEntropy = 1; @@ -107,7 +111,7 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, /* load entropy tables */ RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( &ddict->entropy, ddict->dictContent, ddict->dictSize)), - dictionary_corrupted); + dictionary_corrupted, ""); ddict->entropyPresent = 1; return 0; } @@ -133,7 +137,7 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ /* parse dictionary content */ - FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) ); + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); return 0; } diff --git a/thirdparty/zstd/decompress/zstd_ddict.h b/thirdparty/zstd/decompress/zstd_ddict.h index 0479d11bb0..af307efd3d 100644 --- a/thirdparty/zstd/decompress/zstd_ddict.h +++ b/thirdparty/zstd/decompress/zstd_ddict.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -16,7 +16,7 @@ * Dependencies *********************************************************/ #include <stddef.h> /* size_t */ -#include "zstd.h" /* ZSTD_DDict, and several public functions */ +#include "../zstd.h" /* ZSTD_DDict, and several public functions */ /*-******************************************************* diff --git a/thirdparty/zstd/decompress/zstd_decompress.c b/thirdparty/zstd/decompress/zstd_decompress.c index dd4591b7be..be5c7cfc33 100644 --- a/thirdparty/zstd/decompress/zstd_decompress.c +++ b/thirdparty/zstd/decompress/zstd_decompress.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -56,19 +56,19 @@ * Dependencies *********************************************************/ #include <string.h> /* memcpy, memmove, memset */ -#include "cpu.h" /* bmi2 */ -#include "mem.h" /* low level memory routines */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY -#include "fse.h" +#include "../common/fse.h" #define HUF_STATIC_LINKING_ONLY -#include "huf.h" -#include "zstd_internal.h" /* blockProperties_t */ +#include "../common/huf.h" +#include "../common/zstd_internal.h" /* blockProperties_t */ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -# include "zstd_legacy.h" +# include "../legacy/zstd_legacy.h" #endif @@ -111,7 +111,12 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->legacyContext = NULL; dctx->previousLegacyVersion = 0; dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->outBufferMode = ZSTD_obm_buffered; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif } ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) @@ -208,7 +213,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) { size_t const minInputSize = ZSTD_startingInputLength(format); - RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; U32 const dictID= fhd & 3; @@ -256,7 +261,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s zfhPtr->frameType = ZSTD_skippableFrame; return 0; } - RETURN_ERROR(prefix_unknown); + RETURN_ERROR(prefix_unknown, ""); } /* ensure there is enough `srcSize` to fully read/decode frame header */ @@ -280,7 +285,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s if (!singleSegment) { BYTE const wlByte = ip[pos++]; U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge); + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); windowSize = (1ULL << windowLog); windowSize += (windowSize >> 3) * (wlByte&7); } @@ -352,14 +357,14 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize) size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; U32 sizeU32; - RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong); + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, - frameParameter_unsupported); + frameParameter_unsupported, ""); { size_t const skippableSize = skippableHeaderSize + sizeU32; - RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong); + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); return skippableSize; } } @@ -439,7 +444,7 @@ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t he * harder. */ RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), - dictionary_wrong); + dictionary_wrong, ""); #endif if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); return 0; @@ -559,17 +564,6 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) * Frame decoding ***************************************************************/ - -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) -{ - if (dst != dctx->previousDstEnd) { /* not contiguous */ - dctx->dictEnd = dctx->previousDstEnd; - dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); - dctx->prefixStart = dst; - dctx->previousDstEnd = dst; - } -} - /** ZSTD_insertBlock() : * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) @@ -587,9 +581,9 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, DEBUGLOG(5, "ZSTD_copyRawBlock"); if (dst == NULL) { if (srcSize == 0) return 0; - RETURN_ERROR(dstBuffer_null); + RETURN_ERROR(dstBuffer_null, ""); } - RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); memcpy(dst, src, srcSize); return srcSize; } @@ -600,9 +594,9 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, { if (dst == NULL) { if (regenSize == 0) return 0; - RETURN_ERROR(dstBuffer_null); + RETURN_ERROR(dstBuffer_null, ""); } - RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall); + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); memset(dst, b, regenSize); return regenSize; } @@ -618,7 +612,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, { const BYTE* ip = (const BYTE*)(*srcPtr); BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + dstCapacity; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; BYTE* op = ostart; size_t remainingSrcSize = *srcSizePtr; @@ -627,15 +621,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* check */ RETURN_ERROR_IF( remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, - srcSize_wrong); + srcSize_wrong, ""); /* Frame Header */ { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, - srcSize_wrong); - FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) ); + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; } @@ -648,7 +642,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ip += ZSTD_blockHeaderSize; remainingSrcSize -= ZSTD_blockHeaderSize; - RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong); + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); switch(blockProperties.blockType) { @@ -663,13 +657,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, break; case bt_reserved : default: - RETURN_ERROR(corruption_detected); + RETURN_ERROR(corruption_detected, "invalid block type"); } if (ZSTD_isError(decodedSize)) return decodedSize; if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize); - op += decodedSize; + if (decodedSize != 0) + op += decodedSize; + assert(ip != NULL); ip += cBlockSize; remainingSrcSize -= cBlockSize; if (blockProperties.lastBlock) break; @@ -677,14 +673,14 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, - corruption_detected); + corruption_detected, ""); } if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); U32 checkRead; - RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong); + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); checkRead = MEM_readLE32(ip); - RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); ip += 4; remainingSrcSize -= 4; } @@ -741,7 +737,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, (unsigned)magicNumber, ZSTD_MAGICNUMBER); if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { size_t const skippableSize = readSkippableFrameSize(src, srcSize); - FORWARD_IF_ERROR(skippableSize); + FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); assert(skippableSize <= srcSize); src = (const BYTE *)src + skippableSize; @@ -751,11 +747,11 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, if (ddict) { /* we were called from ZSTD_decompress_usingDDict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict)); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); } else { /* this will initialize correctly with no dict if dict == NULL, so * use this in all cases but ddict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); } ZSTD_checkContinuity(dctx, dst); @@ -776,7 +772,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, "error."); if (ZSTD_isError(res)) return res; assert(res <= dstCapacity); - dst = (BYTE*)dst + res; + if (res != 0) + dst = (BYTE*)dst + res; dstCapacity -= res; } moreThan1Frame = 1; @@ -824,7 +821,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) size_t regenSize; ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - RETURN_ERROR_IF(dctx==NULL, memory_allocation); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); ZSTD_freeDCtx(dctx); return regenSize; @@ -842,6 +839,24 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr ****************************************/ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } +/** + * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * we allow taking a partial block as the input. Currently only raw uncompressed blocks can + * be streamed. + * + * For blocks that can be streamed, this allows us to reduce the latency until we produce + * output, and avoid copying the input. + * + * @param inputSize - The total amount of input that the caller currently has. + */ +static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { + if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) + return dctx->expected; + if (dctx->bType != bt_raw) + return dctx->expected; + return MIN(MAX(inputSize, 1), dctx->expected); +} + ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { switch(dctx->stage) { @@ -874,7 +889,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); /* Sanity check */ - RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed"); + RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); if (dstCapacity) ZSTD_checkContinuity(dctx, dst); switch (dctx->stage) @@ -899,7 +914,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_decodeFrameHeader: assert(src != NULL); memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); dctx->expected = ZSTD_blockHeaderSize; dctx->stage = ZSTDds_decodeBlockHeader; return 0; @@ -941,29 +956,41 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case bt_compressed: DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + dctx->expected = 0; /* Streaming not supported */ break; case bt_raw : + assert(srcSize <= dctx->expected); rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); + assert(rSize == srcSize); + dctx->expected -= rSize; break; case bt_rle : rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); + dctx->expected = 0; /* Streaming not supported */ break; case bt_reserved : /* should never happen */ default: - RETURN_ERROR(corruption_detected); + RETURN_ERROR(corruption_detected, "invalid block type"); } - if (ZSTD_isError(rSize)) return rSize; + FORWARD_IF_ERROR(rSize, ""); RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + dctx->previousDstEnd = (char*)dst + rSize; + + /* Stay on the same stage until we are finished streaming the block. */ + if (dctx->expected > 0) { + return rSize; + } if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); RETURN_ERROR_IF( dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN && dctx->decodedSize != dctx->fParams.frameContentSize, - corruption_detected); + corruption_detected, ""); if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ dctx->expected = 4; dctx->stage = ZSTDds_checkChecksum; @@ -974,7 +1001,6 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c } else { dctx->stage = ZSTDds_decodeBlockHeader; dctx->expected = ZSTD_blockHeaderSize; - dctx->previousDstEnd = (char*)dst + rSize; } return rSize; } @@ -984,7 +1010,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); U32 const check32 = MEM_readLE32(src); DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); - RETURN_ERROR_IF(check32 != h32, checksum_wrong); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; @@ -1005,7 +1031,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c default: assert(0); /* impossible */ - RETURN_ERROR(GENERIC); /* some compiler require default to do something */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ } } @@ -1016,6 +1042,10 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); dctx->prefixStart = dict; dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif return 0; } @@ -1029,7 +1059,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; - RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted); + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ dictPtr += 8; /* skip header = magic + dictID */ @@ -1048,16 +1078,16 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, dictPtr, dictEnd - dictPtr, workspace, workspaceSize); #endif - RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted); + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); dictPtr += hSize; } { short offcodeNCount[MaxOff+1]; unsigned offcodeMaxValue = MaxOff, offcodeLog; size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->OFTable, offcodeNCount, offcodeMaxValue, OF_base, OF_bits, @@ -1068,9 +1098,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog; size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->MLTable, matchlengthNCount, matchlengthMaxValue, ML_base, ML_bits, @@ -1081,9 +1111,9 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog; size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->LLTable, litlengthNCount, litlengthMaxValue, LL_base, LL_bits, @@ -1091,13 +1121,13 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, dictPtr += litlengthHeaderSize; } - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); { int i; size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); for (i=0; i<3; i++) { U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; RETURN_ERROR_IF(rep==0 || rep > dictContentSize, - dictionary_corrupted); + dictionary_corrupted, ""); entropy->rep[i] = rep; } } @@ -1115,7 +1145,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict /* load entropy tables */ { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); - RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); dict = (const char*)dict + eSize; dictSize -= eSize; } @@ -1138,6 +1168,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ dctx->litEntropy = dctx->fseEntropy = 0; dctx->dictID = 0; + dctx->bType = bt_reserved; ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ dctx->LLTptr = dctx->entropy.LLTable; @@ -1149,11 +1180,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); if (dict && dictSize) RETURN_ERROR_IF( ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), - dictionary_corrupted); + dictionary_corrupted, ""); return 0; } @@ -1172,7 +1203,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) DEBUGLOG(4, "DDict is %s", dctx->ddictIsCold ? "~cold~" : "hot!"); } - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); if (ddict) { /* NULL ddict is equivalent to no dictionary */ ZSTD_copyDDictParameters(dctx, ddict); } @@ -1263,11 +1294,11 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); ZSTD_clearDict(dctx); if (dict && dictSize != 0) { dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); - RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); dctx->ddict = dctx->ddictLocal; dctx->dictUses = ZSTD_use_indefinitely; } @@ -1286,7 +1317,7 @@ size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSi size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) { - FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType)); + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); dctx->dictUses = ZSTD_use_once; return 0; } @@ -1303,8 +1334,8 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) { DEBUGLOG(4, "ZSTD_initDStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); return ZSTD_startingInputLength(zds->format); } @@ -1320,8 +1351,8 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds) * this function cannot fail */ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) { - FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) ); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); return ZSTD_startingInputLength(dctx->format); } @@ -1330,14 +1361,14 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) * this function cannot fail */ size_t ZSTD_resetDStream(ZSTD_DStream* dctx) { - FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only)); + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); return ZSTD_startingInputLength(dctx->format); } size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); ZSTD_clearDict(dctx); if (ddict) { dctx->ddict = ddict; @@ -1354,9 +1385,9 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); size_t const min = (size_t)1 << bounds.lowerBound; size_t const max = (size_t)1 << bounds.upperBound; - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); - RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound); - RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); dctx->maxWindowSize = maxWindowSize; return 0; } @@ -1379,6 +1410,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) bounds.upperBound = (int)ZSTD_f_zstd1_magicless; ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_obm_buffered; + bounds.upperBound = (int)ZSTD_obm_stable; + return bounds; default:; } bounds.error = ERROR(parameter_unsupported); @@ -1398,12 +1433,12 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) } #define CHECK_DBOUNDS(p,v) { \ - RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ } size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); switch(dParam) { case ZSTD_d_windowLogMax: if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; @@ -1414,9 +1449,13 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value CHECK_DBOUNDS(ZSTD_d_format, value); dctx->format = (ZSTD_format_e)value; return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_outBufferMode_e)value; + return 0; default:; } - RETURN_ERROR(parameter_unsupported); + RETURN_ERROR(parameter_unsupported, ""); } size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) @@ -1428,7 +1467,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) } if ( (reset == ZSTD_reset_parameters) || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); ZSTD_clearDict(dctx); dctx->format = ZSTD_f_zstd1; dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; @@ -1449,7 +1488,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); size_t const minRBSize = (size_t) neededSize; RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, - frameParameter_windowTooLarge); + frameParameter_windowTooLarge, ""); return minRBSize; } @@ -1467,30 +1506,94 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) ZSTD_frameHeader zfh; size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); if (ZSTD_isError(err)) return err; - RETURN_ERROR_IF(err>0, srcSize_wrong); + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, - frameParameter_windowTooLarge); + frameParameter_windowTooLarge, ""); return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); } /* ***** Decompression ***** */ -MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) { - size_t const length = MIN(dstCapacity, srcSize); - memcpy(dst, src, length); - return length; + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; +} + +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} + +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_obm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!"); } +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_obm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : oend - *op; + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_obm_stable); + } + return 0; +} size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { - const char* const istart = (const char*)(input->src) + input->pos; - const char* const iend = (const char*)(input->src) + input->size; + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; const char* ip = istart; - char* const ostart = (char*)(output->dst) + output->pos; - char* const oend = (char*)(output->dst) + output->size; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; char* op = ostart; U32 someMoreWork = 1; @@ -1506,6 +1609,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB "forbidden. out: pos: %u vs size: %u", (U32)output->pos, (U32)output->size); DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); while (someMoreWork) { switch(zds->streamStage) @@ -1516,6 +1620,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; zds->legacyVersion = 0; zds->hostageByte = 0; + zds->expectedOutBuffer = *output; /* fall-through */ case zdss_loadHeader : @@ -1543,7 +1648,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB "legacy support is incompatible with static dctx"); FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion, - dict, dictSize)); + dict, dictSize), ""); zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ @@ -1570,7 +1675,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB } } /* check for single-pass mode opportunity */ - if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); if (cSize <= (size_t)(iend-istart)) { @@ -1586,15 +1692,23 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB break; } } + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_obm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + /* Consume header (see ZSTDds_decodeFrameHeader) */ DEBUGLOG(4, "Consume header"); - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds))); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); zds->stage = ZSTDds_skipFrame; } else { - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); zds->expected = ZSTD_blockHeaderSize; zds->stage = ZSTDds_decodeBlockHeader; } @@ -1605,40 +1719,48 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB (U32)(zds->maxWindowSize >> 10) ); zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, - frameParameter_windowTooLarge); + frameParameter_windowTooLarge, ""); /* Adapt buffer sizes to frame header instructions */ { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); - size_t const neededOutBuffSize = ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize); - if ((zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize)) { - size_t const bufferSize = neededInBuffSize + neededOutBuffSize; - DEBUGLOG(4, "inBuff : from %u to %u", - (U32)zds->inBuffSize, (U32)neededInBuffSize); - DEBUGLOG(4, "outBuff : from %u to %u", - (U32)zds->outBuffSize, (U32)neededOutBuffSize); - if (zds->staticSize) { /* static DCtx */ - DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); - assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ - RETURN_ERROR_IF( - bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), - memory_allocation); - } else { - ZSTD_free(zds->inBuff, zds->customMem); - zds->inBuffSize = 0; - zds->outBuffSize = 0; - zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); - RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation); - } - zds->inBuffSize = neededInBuffSize; - zds->outBuff = zds->inBuff + zds->inBuffSize; - zds->outBuffSize = neededOutBuffSize; - } } + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered + ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_free(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } zds->streamStage = zdss_read; /* fall-through */ case zdss_read: DEBUGLOG(5, "stage zdss_read"); - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip); DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); if (neededInSize==0) { /* end of frame */ zds->streamStage = zdss_init; @@ -1646,15 +1768,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB break; } if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ - int const isSkipFrame = ZSTD_isSkipFrame(zds); - size_t const decodedSize = ZSTD_decompressContinue(zds, - zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), - ip, neededInSize); - if (ZSTD_isError(decodedSize)) return decodedSize; + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); ip += neededInSize; - if (!decodedSize && !isSkipFrame) break; /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; - zds->streamStage = zdss_flush; + /* Function modifies the stage so we must break */ break; } } if (ip==iend) { someMoreWork = 0; break; } /* no more input */ @@ -1666,6 +1782,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB size_t const toLoad = neededInSize - zds->inPos; int const isSkipFrame = ZSTD_isSkipFrame(zds); size_t loadedSize; + /* At this point we shouldn't be decompressing a block that we can stream. */ + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); if (isSkipFrame) { loadedSize = MIN(toLoad, (size_t)(iend-ip)); } else { @@ -1679,17 +1797,11 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ /* decode loaded input */ - { size_t const decodedSize = ZSTD_decompressContinue(zds, - zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, - zds->inBuff, neededInSize); - if (ZSTD_isError(decodedSize)) return decodedSize; - zds->inPos = 0; /* input is consumed */ - if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; } /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; - } } - zds->streamStage = zdss_flush; - /* fall-through */ - + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } case zdss_flush: { size_t const toFlushSize = zds->outEnd - zds->outStart; size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); @@ -1712,17 +1824,21 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB default: assert(0); /* impossible */ - RETURN_ERROR(GENERIC); /* some compiler require default to do something */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ } } /* result */ input->pos = (size_t)(ip - (const char*)(input->src)); output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + if ((ip==istart) && (op==ostart)) { /* no forward progress */ zds->noForwardProgress ++; if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { - RETURN_ERROR_IF(op==oend, dstSize_tooSmall); - RETURN_ERROR_IF(ip==iend, srcSize_wrong); + RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); + RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); assert(0); } } else { diff --git a/thirdparty/zstd/decompress/zstd_decompress_block.c b/thirdparty/zstd/decompress/zstd_decompress_block.c index 767e5f9a0b..ad3b3d8dbd 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_block.c +++ b/thirdparty/zstd/decompress/zstd_decompress_block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -15,14 +15,14 @@ * Dependencies *********************************************************/ #include <string.h> /* memcpy, memmove, memset */ -#include "compiler.h" /* prefetch */ -#include "cpu.h" /* bmi2 */ -#include "mem.h" /* low level memory routines */ +#include "../common/compiler.h" /* prefetch */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ #define FSE_STATIC_LINKING_ONLY -#include "fse.h" +#include "../common/fse.h" #define HUF_STATIC_LINKING_ONLY -#include "huf.h" -#include "zstd_internal.h" +#include "../common/huf.h" +#include "../common/zstd_internal.h" #include "zstd_decompress_internal.h" /* ZSTD_DCtx */ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */ #include "zstd_decompress_block.h" @@ -56,7 +56,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { - RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong); + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); { U32 const cBlockHeader = MEM_readLE24(src); U32 const cSize = cBlockHeader >> 3; @@ -64,7 +64,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); bpPtr->origSize = cSize; /* only useful for RLE */ if (bpPtr->blockType == bt_rle) return 1; - RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected); + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); return cSize; } } @@ -80,7 +80,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); - RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); { const BYTE* const istart = (const BYTE*) src; symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); @@ -89,7 +89,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, { case set_repeat: DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); - RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); /* fall-through */ case set_compressed: @@ -121,8 +121,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); break; } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); - RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected); + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); /* prefetch huffman table if cold */ if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { @@ -160,7 +160,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } } - RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected); + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; @@ -190,7 +190,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected); + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); memcpy(dctx->litBuffer, istart+lhSize, litSize); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; @@ -222,7 +222,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); break; } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; @@ -440,8 +440,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb switch(type) { case set_rle : - RETURN_ERROR_IF(!srcSize, srcSize_wrong); - RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected); + RETURN_ERROR_IF(!srcSize, srcSize_wrong, ""); + RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, ""); { U32 const symbol = *(const BYTE*)src; U32 const baseline = baseValue[symbol]; U32 const nbBits = nbAdditionalBits[symbol]; @@ -453,7 +453,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb *DTablePtr = defaultTable; return 0; case set_repeat: - RETURN_ERROR_IF(!flagRepeatTable, corruption_detected); + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); /* prefetch FSE table if used */ if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { const void* const pStart = *DTablePtr; @@ -465,8 +465,8 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb { unsigned tableLog; S16 norm[MaxSeq+1]; size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected); - RETURN_ERROR_IF(tableLog > maxLog, corruption_detected); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); *DTablePtr = DTableSpace; return headerSize; @@ -487,28 +487,28 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); /* check */ - RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong); + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); /* SeqHead */ nbSeq = *ip++; if (!nbSeq) { *nbSeqPtr=0; - RETURN_ERROR_IF(srcSize != 1, srcSize_wrong); + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); return 1; } if (nbSeq > 0x7F) { if (nbSeq == 0xFF) { - RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong); + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; } else { - RETURN_ERROR_IF(ip >= iend, srcSize_wrong); + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); nbSeq = ((nbSeq-0x80)<<8) + *ip++; } } *nbSeqPtr = nbSeq; /* FSE table descriptors */ - RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); @@ -521,7 +521,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, LL_base, LL_bits, LL_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += llhSize; } @@ -531,7 +531,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, OF_base, OF_bits, OF_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += ofhSize; } @@ -541,7 +541,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ML_base, ML_bits, ML_defaultDTable, dctx->fseEntropy, dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += mlhSize; } } @@ -580,7 +580,7 @@ typedef struct { * Precondition: *ip <= *op * Postcondition: *op - *op >= 8 */ -static void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { +HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { assert(*ip <= *op); if (offset < 8) { /* close range match, overlap */ @@ -665,15 +665,15 @@ size_t ZSTD_execSequenceEnd(BYTE* op, { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - /* bounds checks */ - assert(oLitEnd < oMatchEnd); - RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer"); - RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer"); + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); /* copy literals */ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); @@ -683,7 +683,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op, /* copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); match = dictEnd - (prefixStart-match); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); @@ -709,16 +709,27 @@ size_t ZSTD_execSequence(BYTE* op, BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; - /* Errors and uncommon cases handled here. */ - assert(oLitEnd < oMatchEnd); - if (iLitEnd > litLimit || oMatchEnd > oend_w) + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); assert(iLitEnd <= litLimit /* Literal length is in bounds */); assert(oLitEnd <= oend_w /* Can wildcopy literals */); assert(oMatchEnd <= oend_w /* Can wildcopy matches */); @@ -729,7 +740,7 @@ size_t ZSTD_execSequence(BYTE* op, */ assert(WILDCOPY_OVERLENGTH >= 16); ZSTD_copy16(op, (*litPtr)); - if (sequence.litLength > 16) { + if (UNLIKELY(sequence.litLength > 16)) { ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); } op = oLitEnd; @@ -738,7 +749,7 @@ size_t ZSTD_execSequence(BYTE* op, /* Copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix -> go into extDict */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); match = dictEnd + (match - prefixStart); if (match + sequence.matchLength <= dictEnd) { memmove(oLitEnd, match, sequence.matchLength); @@ -760,7 +771,7 @@ size_t ZSTD_execSequence(BYTE* op, /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy * without overlap checking. */ - if (sequence.offset >= WILDCOPY_VECLEN) { + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { /* We bet on a full wildcopy for matches, since we expect matches to be * longer than literals (in general). In silesia, ~10% of matches are longer * than 16 bytes. @@ -802,6 +813,14 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) DStatePtr->state = DInfo.nextState + lowBits; } +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +{ + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) * bits before reloading. This value is the maximum number of bytes we read @@ -813,25 +832,26 @@ ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) : 0) typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) { seq_t seq; - U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; - U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; - U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; - U32 const totalBits = llBits+mlBits+ofBits; - U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; - U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; - U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; + ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; + ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; + ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; + U32 const llBase = llDInfo.baseValue; + U32 const mlBase = mlDInfo.baseValue; + U32 const ofBase = ofDInfo.baseValue; + BYTE const llBits = llDInfo.nbAdditionalBits; + BYTE const mlBits = mlDInfo.nbAdditionalBits; + BYTE const ofBits = ofDInfo.nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; /* sequence */ { size_t offset; - if (!ofBits) - offset = 0; - else { + if (ofBits > 1) { ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); assert(ofBits <= MaxOff); @@ -845,59 +865,138 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); } - } - - if (ofBits <= 1) { - offset += (llBase==0); - if (offset) { - size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { /* offset == 0 */ - offset = seqState->prevOffset[0]; - } - } else { seqState->prevOffset[2] = seqState->prevOffset[1]; seqState->prevOffset[1] = seqState->prevOffset[0]; seqState->prevOffset[0] = offset; - } + } else { + U32 const ll0 = (llBase == 0); + if (LIKELY((ofBits == 0))) { + if (LIKELY(!ll0)) + offset = seqState->prevOffset[0]; + else { + offset = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } seq.offset = offset; } - seq.matchLength = mlBase - + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/) : 0); /* <= 16 bits */ + seq.matchLength = mlBase; + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) BIT_reloadDStream(&seqState->DStream); /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - seq.litLength = llBase - + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits/*>0*/) : 0); /* <= 16 bits */ + seq.litLength = llBase; + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - /* ANS state update */ - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + if (prefetch == ZSTD_p_prefetch) { + size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, offset is only used for prefetching */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update + * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). + * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). + * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the + * better option, so it is the default for other compilers. But, if you + * measure that it is worse, please put up a pull request. + */ + { +#if defined(__GNUC__) && !defined(__clang__) + const int kUseUpdateFseState = 1; +#else + const int kUseUpdateFseState = 0; +#endif + if (kUseUpdateFseState) { + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + } else { + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + } + } return seq; } +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +} +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG FORCE_INLINE_TEMPLATE size_t DONT_VECTORIZE ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; @@ -910,46 +1009,104 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; /* Regen sequences */ if (nbSeq) { seqState_t seqState; + size_t error = 0; dctx->fseEntropy = 1; { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } RETURN_ERROR_IF( ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected); + corruption_detected, ""); ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); ZSTD_STATIC_ASSERT( BIT_DStream_unfinished < BIT_DStream_completed && BIT_DStream_endOfBuffer < BIT_DStream_completed && BIT_DStream_completed < BIT_DStream_overflow); - for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { - nbSeq--; - { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - if (ZSTD_isError(oneSeqSize)) return oneSeqSize; - op += oneSeqSize; - } } +#if defined(__GNUC__) && defined(__x86_64__) + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * I've been able to reproduce this issue on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +#endif + for ( ; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + BIT_reloadDStream(&(seqState.DStream)); + /* gcc and clang both don't like early returns in this loop. + * gcc doesn't like early breaks either. + * Instead save an error and report it at the end. + * When there is an error, don't increment op, so we don't + * overwrite. + */ + if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize; + else op += oneSeqSize; + if (UNLIKELY(!--nbSeq)) break; + } /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - RETURN_ERROR_IF(nbSeq, corruption_detected); - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected); + if (ZSTD_isError(error)) return error; + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); /* save reps for next block */ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } } /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } return op-ostart; @@ -959,99 +1116,21 @@ static size_t ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - - #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequenceLong(seqState_t* seqState, ZSTD_longOffset_e const longOffsets) -{ - seq_t seq; - U32 const llBits = seqState->stateLL.table[seqState->stateLL.state].nbAdditionalBits; - U32 const mlBits = seqState->stateML.table[seqState->stateML.state].nbAdditionalBits; - U32 const ofBits = seqState->stateOffb.table[seqState->stateOffb.state].nbAdditionalBits; - U32 const totalBits = llBits+mlBits+ofBits; - U32 const llBase = seqState->stateLL.table[seqState->stateLL.state].baseValue; - U32 const mlBase = seqState->stateML.table[seqState->stateML.state].baseValue; - U32 const ofBase = seqState->stateOffb.table[seqState->stateOffb.state].baseValue; - - /* sequence */ - { size_t offset; - if (!ofBits) - offset = 0; - else { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets) { - U32 const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN_32-1); - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - if (MEM_32bits() || extraBits) BIT_reloadDStream(&seqState->DStream); - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - } - - if (ofBits <= 1) { - offset += (llBase==0); - if (offset) { - size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq.offset = offset; - } - - seq.matchLength = mlBase + ((mlBits>0) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && (totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Verify that there is enough bits to read the rest of the data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - - seq.litLength = llBase + ((llBits>0) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); - - { size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, overly large offset will be detected in ZSTD_execSequenceLong() */ - seqState->pos = pos + seq.matchLength; - } - - /* ANS state update */ - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - - return seq; -} - FORCE_INLINE_TEMPLATE size_t ZSTD_decompressSequencesLong_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; @@ -1063,6 +1142,7 @@ ZSTD_decompressSequencesLong_body( const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; /* Regen sequences */ if (nbSeq) { @@ -1078,36 +1158,45 @@ ZSTD_decompressSequencesLong_body( seqState.prefixStart = prefixStart; seqState.pos = (size_t)(op-prefixStart); seqState.dictEnd = dictEnd; + assert(dst != NULL); assert(iend >= ip); RETURN_ERROR_IF( ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected); + corruption_detected, ""); ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) { - sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset); + sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch); PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ } - RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected); + RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, ""); /* decode and decompress */ for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) { - seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset); + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); +#endif if (ZSTD_isError(oneSeqSize)) return oneSeqSize; PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ sequences[seqNb & STORED_SEQS_MASK] = sequence; op += oneSeqSize; } - RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected); + RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, ""); /* finish queue */ seqNb -= seqAdvance; for ( ; seqNb<nbSeq ; seqNb++) { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); +#endif if (ZSTD_isError(oneSeqSize)) return oneSeqSize; op += oneSeqSize; } @@ -1118,9 +1207,11 @@ ZSTD_decompressSequencesLong_body( /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } return op-ostart; @@ -1130,9 +1221,10 @@ static size_t ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1146,9 +1238,10 @@ DONT_VECTORIZE ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1157,9 +1250,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1169,21 +1263,23 @@ typedef size_t (*ZSTD_decompressSequences_t)( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset); + const ZSTD_longOffset_e isLongOffset, + const int frame); #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { DEBUGLOG(5, "ZSTD_decompressSequences"); #if DYNAMIC_BMI2 if (dctx->bmi2) { - return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -1198,15 +1294,16 @@ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { DEBUGLOG(5, "ZSTD_decompressSequencesLong"); #if DYNAMIC_BMI2 if (dctx->bmi2) { - return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif - return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -1240,7 +1337,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) } #endif - size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, @@ -1256,7 +1352,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); - RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong); + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); /* Decode literals section */ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); @@ -1282,6 +1378,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ip += seqHSize; srcSize -= seqHSize; + RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) if ( !usePrefetchDecoder @@ -1300,17 +1398,28 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, if (usePrefetchDecoder) #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG /* else */ - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); #endif } } +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +{ + if (dst != dctx->previousDstEnd) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + + size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) diff --git a/thirdparty/zstd/decompress/zstd_decompress_block.h b/thirdparty/zstd/decompress/zstd_decompress_block.h index 7e92960410..bf39b7350c 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_block.h +++ b/thirdparty/zstd/decompress/zstd_decompress_block.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -16,8 +16,8 @@ * Dependencies *********************************************************/ #include <stddef.h> /* size_t */ -#include "zstd.h" /* DCtx, and some public functions */ -#include "zstd_internal.h" /* blockProperties_t, and some public functions */ +#include "../zstd.h" /* DCtx, and some public functions */ +#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */ #include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */ diff --git a/thirdparty/zstd/decompress/zstd_decompress_internal.h b/thirdparty/zstd/decompress/zstd_decompress_internal.h index ccbdfa090f..9ad96c5548 100644 --- a/thirdparty/zstd/decompress/zstd_decompress_internal.h +++ b/thirdparty/zstd/decompress/zstd_decompress_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -19,8 +19,8 @@ /*-******************************************************* * Dependencies *********************************************************/ -#include "mem.h" /* BYTE, U16, U32 */ -#include "zstd_internal.h" /* ZSTD_seqSymbol */ +#include "../common/mem.h" /* BYTE, U16, U32 */ +#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */ @@ -95,6 +95,11 @@ typedef enum { ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ } ZSTD_dictUses_e; +typedef enum { + ZSTD_obm_buffered = 0, /* Buffer the output */ + ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ +} ZSTD_outBufferMode_e; + struct ZSTD_DCtx_s { const ZSTD_seqSymbol* LLTptr; @@ -147,10 +152,19 @@ struct ZSTD_DCtx_s U32 legacyVersion; U32 hostageByte; int noForwardProgress; + ZSTD_outBufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; /* workspace */ BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + + size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif }; /* typedef'd to ZSTD_DCtx within "zstd.h" */ @@ -160,7 +174,7 @@ struct ZSTD_DCtx_s /*! ZSTD_loadDEntropy() : * dict : must point at beginning of a valid zstd dictionary. - * @return : size of entropy tables read */ + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize); diff --git a/thirdparty/zstd/zstd.h b/thirdparty/zstd/zstd.h index 72080ea87e..8c6fc6ae90 100644 --- a/thirdparty/zstd/zstd.h +++ b/thirdparty/zstd/zstd.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -72,7 +72,7 @@ extern "C" { /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 4 +#define ZSTD_VERSION_RELEASE 5 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ @@ -274,7 +274,10 @@ typedef enum { * Default level is ZSTD_CLEVEL_DEFAULT==3. * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. * Note 1 : it's possible to pass a negative compression level. - * Note 2 : setting a level resets all other compression parameters to default */ + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ /* Advanced compression parameters : * It's possible to pin down compression parameters to some specific values. * In which case, these values are no longer dynamically selected by the compressor */ @@ -519,11 +522,13 @@ typedef enum { /* note : additional experimental parameters are also available * within the experimental section of the API. * At the time of this writing, they include : - * ZSTD_c_format + * ZSTD_d_format + * ZSTD_d_stableOutBuffer * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly */ - ZSTD_d_experimentalParam1=1000 + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001 } ZSTD_dParameter; @@ -763,7 +768,7 @@ ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* This function is redundant with the advanced API and equivalent to: * - * ZSTD_DCtx_reset(zds); + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); * ZSTD_DCtx_refDDict(zds, NULL); */ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); @@ -1263,23 +1268,28 @@ ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, ***************************************/ /*! ZSTD_estimate*() : - * These functions make it possible to estimate memory usage of a future - * {D,C}Ctx, before its creation. + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. * - * ZSTD_estimateCCtxSize() will provide a budget large enough for any - * compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(), - * this estimate does not include space for a window buffer, so this estimate - * is guaranteed to be enough for single-shot compressions, but not streaming - * compressions. It will however assume the input may be arbitrarily large, - * which is the worst case. If srcSize is known to always be small, - * ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with - * ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with - * ZSTD_CCtxParams_setParameter(). + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. * - * Note: only single-threaded compression is supported. This function will - * return an error code if ZSTD_c_nbWorkers is >= 1. */ + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); @@ -1642,6 +1652,37 @@ ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowS * allowing selection between ZSTD_format_e input compression formats */ #define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 /*! ZSTD_DCtx_setFormat() : * Instruct the decoder context about what kind of data to decode next. |